program CG
! Conjugate Gradient
! Solve 1D heat equation
! Solve A T = 0, with A the matrix corresponding to the laplacian
!
! Heat equation : dT/dt = alpha d^2 T/dx^2
! When converged, dT/dt = 0, the equation becomes : 0 = alpha d^2 T/dx^2 <--> 0 = d^2 T/dx^2
! Numericaly, this means : (T(i+1)-2.0*T(i)+T(i-1)) * 1/(Dx*Dx) = 0 <--> A T = 0 with the matrix A, a diagonal symetric positiv definit matrix
! The matrix is like :
! -2/(dx*dx) 1/(dx*dx) 0 0 0 0 ....
! 1/(dx*dx) -2/(dx*dx) 1/(dx*dx) 0 0 0 ....
! 0 1/(dx*dx) -2/(dx*dx) 1/(dx*dx) 0 0 ....
! 0 0 1/(dx*dx) -2/(dx*dx) 1/(dx*dx) 0 ....
! 0 0 0 1/(dx*dx) -2/(dx*dx) 1/(dx*dx) 0
! etc
use mpi
implicit none
integer, parameter :: ndim = 1
real(8), dimension(1:ndim) :: Lx,Dx,InvDxDx
integer, dimension(1:ndim) :: Nx,Global_Nx
real(8) :: beta, rho, rho0, gam, sum_rho, sum_gam, max_Residue
integer :: n,i
real(8), allocatable, dimension(:) :: T,Residue,p,q
integer :: rank, nb_mpi_processes, ierror, tag, statu(MPI_STATUS_SIZE),niter
real(8), dimension(:), allocatable :: Field,Field_buff
character(len=4) :: charbuff
integer, dimension(1) :: nb_process_axe ! number of processes per axe of cartesian topology
logical, dimension(1) :: cart_boundaries ! Type of boundaries : .true. -> periodic, .false. -> non periodic
integer :: MPI_COMM_CART ! our new communicator
integer, dimension(1) :: cart_position, pos ! position of process on the cartesian topology ( and pos a buffer )
integer, dimension(-1:1) :: cart_neigh ! neigbours rank on the cartesian topology
tag = 7777
call MPI_INIT( ierror )
call MPI_COMM_SIZE( MPI_COMM_WORLD , nb_mpi_processes , ierror )
call MPI_COMM_RANK( MPI_COMM_WORLD , rank , ierror )
if(nb_mpi_processes /= 4) stop 'This program is design to be run with 4 processes only'
nb_process_axe(1) = 4
cart_boundaries(1) = .false.
call MPI_CART_CREATE( MPI_COMM_WORLD , ndim , nb_process_axe(1:ndim) , &
& cart_boundaries(1:ndim) , .true. , MPI_COMM_CART , ierror )
call MPI_CART_COORDS( MPI_COMM_CART , rank , ndim , cart_position(1:ndim) , ierror )
! Starting from 2009, MPI does not return MPI_PROC_NULL when calling to a position outside the domain
! You need to do it yourself, removing one of the greatest aspects of this type of communicator...
do i=-1,1
if((cart_boundaries(1) .eqv. .false.) .AND. &
& ((cart_position(1) == 0 .AND. i==-1) .OR. (cart_position(1) == nb_process_axe(1)-1) .AND. i==1)) then
cart_neigh(i) = MPI_PROC_NULL
else
pos(1) = cart_position(1) + i
call MPI_CART_RANK( MPI_COMM_CART , pos(1:ndim) , cart_neigh(i) , ierror )
end if
end do
! Global Variables
Global_Nx(1) = 100
Lx(:) = 0.1
Dx(:) = Lx(:)/((Global_Nx(:)-1)*1.0)
InvDxDx(:) = 1.0/(Dx(:)*Dx(:))
! Local Variables
if(modulo(Global_Nx(1),nb_mpi_processes) /= 0) stop "Error, number of points cannot be divided by the number of processes. STOP."
Nx(1) = Global_Nx(1)/nb_mpi_processes
allocate(T(0:Nx(1)+1),Residue(0:Nx(1)+1),p(0:Nx(1)+1),q(0:Nx(1)+1))
! Initial Solution
T(1:Nx(1)) = 0.0
if(rank == 0) T(0) = 10.0
if(rank == nb_mpi_processes-1) T(Nx(1)+1) = -1.0
! Initialisation
! Recv left, send right
call MPI_SENDRECV ( T(Nx(1)) , 1 , MPI_DOUBLE_PRECISION , cart_neigh(1) , tag , &
& T(0) , 1 , MPI_DOUBLE_PRECISION , cart_neigh(-1) , tag , MPI_COMM_CART , statu , ierror )
! Recv right, send left
call MPI_SENDRECV ( T(1) , 1 , MPI_DOUBLE_PRECISION , cart_neigh(-1) , tag , &
& T(Nx(1)+1) , 1 , MPI_DOUBLE_PRECISION , cart_neigh(1) , tag , MPI_COMM_CART , statu , ierror )
do i=1,Nx(1)
Residue(i) = - (T(i+1)-2.0*T(i)+T(i-1)) * InvDxDx(1)
end do
p(:) = 0.0
beta = 0.0
rho = 0.0
do i=1,Nx(1)
rho = rho + Residue(i)*Residue(i)
end do
call MPI_ALLREDUCE ( rho , sum_rho , 1 , MPI_DOUBLE_PRECISION , MPI_SUM , MPI_COMM_WORLD , ierror)
! Prefer not using the buffer variable in calculations (sum_rho, sum_gam, etc) to avoid mistakes. It will not cost more, and it is more secure.
rho = sum_rho
! Iteration
n = 0
do
n = n + 1
do i=1,Nx(1)
p(i) = Residue(i) + beta * p(i)
end do
! Matrix Operation : q = A p
call MPI_SENDRECV ( p(Nx(1)) , 1 , MPI_DOUBLE_PRECISION , cart_neigh(1) , tag , &
& p(0) , 1 , MPI_DOUBLE_PRECISION , cart_neigh(-1) , tag , MPI_COMM_CART , statu , ierror )
! Recv right, send left
call MPI_SENDRECV ( p(1) , 1 , MPI_DOUBLE_PRECISION , cart_neigh(-1) , tag , &
& p(Nx(1)+1) , 1 , MPI_DOUBLE_PRECISION , cart_neigh(1) , tag , MPI_COMM_CART , statu , ierror )
do i=1,Nx(1)
q(i) = (p(i+1)-2.0*p(i)+p(i-1)) * InvDxDx(1)
end do
gam = 0.0
do i=1,Nx(1)
gam = gam + p(i)*q(i)
end do
call MPI_ALLREDUCE ( gam , sum_gam , 1 , MPI_DOUBLE_PRECISION , MPI_SUM , MPI_COMM_WORLD , ierror)
! Prefer not using the buffer variable in calculations (sum_rho, sum_gam, etc) to avoid mistakes. It will not cost more, and it is more secure.
gam = sum_gam
gam = rho / gam
do i=1,Nx(1)
T(i) = T(i) + gam * p(i)
end do
do i=1,Nx(1)
Residue(i) = Residue(i) - gam * q(i)
end do
! Test if Residue is < 1.e-7 so that solution is converged
call MPI_ALLREDUCE ( maxval(abs(Residue(1:Nx(1)))) , max_Residue , 1 , MPI_DOUBLE_PRECISION , MPI_MAX , MPI_COMM_WORLD , ierror)
if(max_Residue < 1e-7) exit ! Converged Solution
if(rank==0) write(*,*) "CG step ",n,"Residue",max_Residue
rho0 = rho ! Save old rho value
rho = 0.0
do i=1,Nx(1)
rho = rho + Residue(i)*Residue(i)
end do
call MPI_ALLREDUCE ( rho , sum_rho , 1 , MPI_DOUBLE_PRECISION , MPI_SUM , MPI_COMM_WORLD , ierror)
! Prefer not using the buffer variable in calculations (sum_rho, sum_gam, etc) to avoid mistakes. It will not cost more, and it is more secure.
rho = sum_rho
beta = rho / rho0
end do
write(*,*) "Solution converged in ", n ,"Iterations, with a res of ", max_Residue
! Solution plot
! Last write
write(charbuff,'(I4.4)') rank
open(10,file=trim(adjustl('OUT-CG'//'.'//trim(adjustl(charbuff))//'.dat')))
do i=1,10
write(10,*) i+(rank*10),T(i)
end do
close(10)
deallocate(T,Residue,p,q)
end program CG