Skip to content

Commit

Permalink
Merge pull request #67 from semi-h/operators
Browse files Browse the repository at this point in the history
Add support for scalar product operator
  • Loading branch information
semi-h committed Mar 5, 2024
2 parents 266cbc0 + ce9289f commit 85246f3
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 50 deletions.
36 changes: 25 additions & 11 deletions src/backend.f90
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ module m_base_backend
procedure(sum_intox), deferred :: sum_yintox
procedure(sum_intox), deferred :: sum_zintox
procedure(vecadd), deferred :: vecadd
procedure(get_fields), deferred :: get_fields
procedure(set_fields), deferred :: set_fields
procedure(scalar_product), deferred :: scalar_product
procedure(get_field), deferred :: get_field
procedure(set_field), deferred :: set_field
procedure(alloc_tdsops), deferred :: alloc_tdsops
end type base_backend_t

Expand Down Expand Up @@ -126,7 +127,20 @@ end subroutine vecadd
end interface

abstract interface
subroutine get_fields(self, u_out, v_out, w_out, u, v, w)
real(dp) function scalar_product(self, x, y) result(s)
!! Calculates the scalar product of two input fields
import :: base_backend_t
import :: dp
import :: field_t
implicit none

class(base_backend_t) :: self
class(field_t), intent(in) :: x, y
end function scalar_product
end interface

abstract interface
subroutine get_field(self, arr, f)
!! copy the specialist data structure from device or host back
!! to a regular 3D data structure.
import :: base_backend_t
Expand All @@ -135,23 +149,23 @@ subroutine get_fields(self, u_out, v_out, w_out, u, v, w)
implicit none

class(base_backend_t) :: self
real(dp), dimension(:, :, :), intent(out) :: u_out, v_out, w_out
class(field_t), intent(in) :: u, v, w
end subroutine get_fields
real(dp), dimension(:, :, :), intent(out) :: arr
class(field_t), intent(in) :: f
end subroutine get_field

subroutine set_fields(self, u, v, w, u_in, v_in, w_in)
subroutine set_field(self, f, arr)
!! copy the initial condition stored in a regular 3D data
!! structure into the specialist data structure arrays on the
!! structure into the specialist data structure array on the
!! device or host.
import :: base_backend_t
import :: dp
import :: field_t
implicit none

class(base_backend_t) :: self
class(field_t), intent(inout) :: u, v, w
real(dp), dimension(:, :, :), intent(in) :: u_in, v_in, w_in
end subroutine set_fields
class(field_t), intent(inout) :: f
real(dp), dimension(:, :, :), intent(in) :: arr
end subroutine set_field
end interface

abstract interface
Expand Down
61 changes: 44 additions & 17 deletions src/cuda/backend.f90
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module m_cuda_backend
use iso_fortran_env, only: stderr => error_unit
use cudafor
use mpi

use m_allocator, only: allocator_t, field_t
use m_base_backend, only: base_backend_t
Expand All @@ -16,7 +17,7 @@ module m_cuda_backend
use m_cuda_kernels_dist, only: transeq_3fused_dist, transeq_3fused_subs
use m_cuda_kernels_reorder, only: &
reorder_x2y, reorder_x2z, reorder_y2x, reorder_y2z, reorder_z2x, &
reorder_z2y, sum_yintox, sum_zintox, axpby, buffer_copy
reorder_z2y, sum_yintox, sum_zintox, scalar_product, axpby, buffer_copy

implicit none

Expand All @@ -41,8 +42,9 @@ module m_cuda_backend
procedure :: sum_yintox => sum_yintox_cuda
procedure :: sum_zintox => sum_zintox_cuda
procedure :: vecadd => vecadd_cuda
procedure :: set_fields => set_fields_cuda
procedure :: get_fields => get_fields_cuda
procedure :: scalar_product => scalar_product_cuda
procedure :: set_field => set_field_cuda
procedure :: get_field => get_field_cuda
procedure :: transeq_cuda_dist
procedure :: transeq_cuda_thom
procedure :: tds_solve_dist
Expand Down Expand Up @@ -529,6 +531,35 @@ subroutine vecadd_cuda(self, a, x, b, y)

end subroutine vecadd_cuda

real(dp) function scalar_product_cuda(self, x, y) result(s)
implicit none

class(cuda_backend_t) :: self
class(field_t), intent(in) :: x, y

real(dp), device, pointer, dimension(:, :, :) :: x_d, y_d
real(dp), device, allocatable :: sum_d
type(dim3) :: blocks, threads
integer :: n, ierr

select type(x); type is (cuda_field_t); x_d => x%data_d; end select
select type(y); type is (cuda_field_t); y_d => y%data_d; end select

allocate (sum_d)
sum_d = 0._dp

n = size(x_d, dim = 2)
blocks = dim3(size(x_d, dim = 3), 1, 1)
threads = dim3(SZ, 1, 1)
call scalar_product<<<blocks, threads>>>(sum_d, x_d, y_d, n)

s = sum_d

call MPI_Allreduce(MPI_IN_PLACE, s, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
MPI_COMM_WORLD, ierr)

end function scalar_product_cuda

subroutine copy_into_buffers(u_send_s_dev, u_send_e_dev, u_dev, n)
implicit none

Expand All @@ -547,31 +578,27 @@ subroutine copy_into_buffers(u_send_s_dev, u_send_e_dev, u_dev, n)

end subroutine copy_into_buffers

subroutine set_fields_cuda(self, u, v, w, u_in, v_in, w_in)
subroutine set_field_cuda(self, f, arr)
implicit none

class(cuda_backend_t) :: self
class(field_t), intent(inout) :: u, v, w
real(dp), dimension(:, :, :), intent(in) :: u_in, v_in, w_in
class(field_t), intent(inout) :: f
real(dp), dimension(:, :, :), intent(in) :: arr

select type(u); type is (cuda_field_t); u%data_d = u_in; end select
select type(v); type is (cuda_field_t); v%data_d = v_in; end select
select type(w); type is (cuda_field_t); w%data_d = w_in; end select
select type(f); type is (cuda_field_t); f%data_d = arr; end select

end subroutine set_fields_cuda
end subroutine set_field_cuda

subroutine get_fields_cuda(self, u_out, v_out, w_out, u, v, w)
subroutine get_field_cuda(self, arr, f)
implicit none

class(cuda_backend_t) :: self
real(dp), dimension(:, :, :), intent(out) :: u_out, v_out, w_out
class(field_t), intent(in) :: u, v, w
real(dp), dimension(:, :, :), intent(out) :: arr
class(field_t), intent(in) :: f

select type(u); type is (cuda_field_t); u_out = u%data_d; end select
select type(v); type is (cuda_field_t); v_out = v%data_d; end select
select type(w); type is (cuda_field_t); w_out = w%data_d; end select
select type(f); type is (cuda_field_t); arr = f%data_d; end select

end subroutine get_fields_cuda
end subroutine get_field_cuda

end module m_cuda_backend

21 changes: 21 additions & 0 deletions src/cuda/kernels/reorder.f90
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,27 @@ attributes(global) subroutine axpby(n, alpha, x, beta, y)

end subroutine axpby

attributes(global) subroutine scalar_product(s, x, y, n)
implicit none

real(dp), device, intent(inout) :: s
real(dp), device, intent(in), dimension(:, :, :) :: x, y
integer, value, intent(in) :: n

real(dp) :: s_pncl !! pencil sum
integer :: i, j, b, ierr

i = threadIdx%x
b = blockIdx%x

s_pncl = 0._dp
do j = 1, n
s_pncl = s_pncl + x(i, j, b)*y(i, j, b)
end do
ierr = atomicadd(s, s_pncl)

end subroutine scalar_product

attributes(global) subroutine buffer_copy(u_send_s, u_send_e, u, n, n_halo)
implicit none

Expand Down
39 changes: 23 additions & 16 deletions src/omp/backend.f90
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ module m_omp_backend
procedure :: sum_yintox => sum_yintox_omp
procedure :: sum_zintox => sum_zintox_omp
procedure :: vecadd => vecadd_omp
procedure :: set_fields => set_fields_omp
procedure :: get_fields => get_fields_omp
procedure :: scalar_product => scalar_product_omp
procedure :: set_field => set_field_omp
procedure :: get_field => get_field_omp
procedure :: transeq_omp_dist
end type omp_backend_t

Expand Down Expand Up @@ -304,6 +305,16 @@ subroutine vecadd_omp(self, a, x, b, y)

end subroutine vecadd_omp

real(dp) function scalar_product_omp(self, x, y) result(s)
implicit none

class(omp_backend_t) :: self
class(field_t), intent(in) :: x, y

s = 0._dp

end function scalar_product_omp

subroutine copy_into_buffers(u_send_s, u_send_e, u, n, n_blocks)
implicit none

Expand All @@ -329,31 +340,27 @@ subroutine copy_into_buffers(u_send_s, u_send_e, u, n, n_blocks)

end subroutine copy_into_buffers

subroutine set_fields_omp(self, u, v, w, u_in, v_in, w_in)
subroutine set_field_omp(self, f, arr)
implicit none

class(omp_backend_t) :: self
class(field_t), intent(inout) :: u, v, w
real(dp), dimension(:, :, :), intent(in) :: u_in, v_in, w_in
class(field_t), intent(inout) :: f
real(dp), dimension(:, :, :), intent(in) :: arr

u%data = u_in
v%data = v_in
w%data = w_in
f%data = arr

end subroutine set_fields_omp
end subroutine set_field_omp

subroutine get_fields_omp(self, u_out, v_out, w_out, u, v, w)
subroutine get_field_omp(self, arr, f)
implicit none

class(omp_backend_t) :: self
real(dp), dimension(:, :, :), intent(out) :: u_out, v_out, w_out
class(field_t), intent(in) :: u, v, w
real(dp), dimension(:, :, :), intent(out) :: arr
class(field_t), intent(in) :: f

u_out = u%data
v_out = v%data
w_out = w%data
arr = f%data

end subroutine get_fields_omp
end subroutine get_field_omp

end module m_omp_backend

12 changes: 6 additions & 6 deletions src/solver.f90
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ function init(backend, time_integrator, xdirps, ydirps, zdirps, globs) &
v_init = 0
w_init = 0

call solver%backend%set_fields( &
solver%u, solver%v, solver%w, u_init, v_init, w_init &
)
call solver%backend%set_field(solver%u, u_init)
call solver%backend%set_field(solver%v, v_init)
call solver%backend%set_field(solver%w, w_init)

deallocate(u_init, v_init, w_init)
print*, 'initial conditions are set'
Expand Down Expand Up @@ -548,9 +548,9 @@ subroutine run(self, n_iter, u_out, v_out, w_out)

print*, 'run end'

call self%backend%get_fields( &
u_out, v_out, w_out, self%u, self%v, self%w &
)
call self%backend%get_field(u_out, self%u)
call self%backend%get_field(v_out, self%v)
call self%backend%get_field(w_out, self%w)

end subroutine run

Expand Down

0 comments on commit 85246f3

Please sign in to comment.