I am trying to optimize a fortran program and to this end I'm trying to use openMP directives of the intel fortran compiler.
I'm a newbie in openMP, so I've taken the most basic approach:
- Code: Select all
program first_task
implicit none
integer :: ip,i,j
integer, parameter :: npd=40,nv=3,nc=4,ngkmax=64
complex(8),parameter :: one=(1.,0.),zero=(0.,0.)
complex(8),allocatable :: mat(:,:),sums(:),vaux(:),wfpw(:)
complex(8) :: aux,wfvr,aux2
allocate(mat(ngkmax,ngkmax),sums(npd),vaux(ngkmax),wfpw(ngkmax))
call omp_set_nested(.true.)
forall(i=1:ngkmax) wfpw(i)=cmplx(i)
sums=zero
!$OMP PARALLEL PRIVATE(aux,ip,i,j,wfvr) REDUCTION(+:aux2)
!$OMP DO
do ip=1,npd
aux=cmplx(1./ip)
call matrix(aux,ngkmax,mat)
aux2=zero
do i=1,nv
do j=1,nc
call zgemv('n',ngkmax,ngkmax,one,mat,ngkmax,wfpw,1,zero,vaux,1)
wfvr=dot_product(wfpw,vaux)
aux2=aux2+wfvr
end do
end do
sums(ip)=aux2
end do
!$OMP END PARALLEL
print*, sum(sums)
deallocate(mat,sums)
end program first_task
where (of course, I don't use just diagonal matrices):
- Code: Select all
subroutine matrix(fac,ndim,mat)
implicit none
integer,intent(in) ::ndim
complex(8),intent(in) :: fac
complex(8),intent(out) :: mat(:,:)
integer :: i
mat=(0.,0.)
forall(i=1:ndim) mat(i,i)=fac
end subroutine matrix
However at the end, after compiling and run, I obtain very different results from those given the code without OMP directives; moreover they change from run to run.
Using intel analysis tools, I could see the problem has to do with the fact some variables (mat and vaux, I think) are read and written by several threads, but honestly I do not how to fixed it.
Thanks in advance for any help,
Arcesio Castañeda Medina
Max-Planck Institute for Microstructure Physics
Weinberg 2
06120 Halle
Germany
