Openmp with Fortran 90 Problem, inefficient

General OpenMP discussion

Openmp with Fortran 90 Problem, inefficient

Postby javierburgos » Thu Jun 06, 2013 1:11 am

[]regards,
I have a problem with openmp parallelize this loop with Fortran90
I compile and run this loop in series without openmp, the calculation time is 7.20E-2 seconds, but when I use openmp time is 0.1480 seconds, someone can tell me I'm doing wrong?
below give the example of the loop with openmp
time for calculation in serie = 7.20E-2 s
time for calculation in parallel=0.1480 s


Example Openmp

timeTot1 = 0.0d0
CALL CPU_TIME(time3)
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,y,x)
do x = 1, xDim
do y = 1, yDim
do i= 1,8
if ((x+v(i,0)>0).and.(x+v(i,0)<xDim+1) .and. (y+v(i,1)>0) .and. (y+v(i,1)<yDim+1)) then
if ((w(y,x) /= wall).and.(w(y+v(i,1),x+v(i,0)) == wall)) then
if ((v(i,0) /= 0).and.(v(i,1) /= 0)) then
m = (v(i,1))/(v(i,0))
b = y-m*x
!crear los fluid solid links para cada linea
x_sol = (bab-b)/(m-mab) !AB
y_sol = m*x_sol + b
if (((x_sol-x)*v(i,0)<1).and.((x_sol-x)*v(i,0)>=0).and.((y_sol-y)*v(i,1) < 1).and.((y_sol-y)*v(i,1)>=0)) then
if ((y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
x_sol = (bbc-b)/(m-mbc) !BC
y_sol = m*x_sol + b
if (((x_sol-x)*v(i,0)<1).and.((x_sol-x)*v(i,0)>=0).and.((y_sol-y)*v(i,1) < 1).and.((y_sol-y)*v(i,1)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
x_sol = (bcd-b)/(m-mcd) !CD
y_sol = m*x_sol + b
if (((x_sol-x)*v(i,0)<1).and.((x_sol-x)*v(i,0)>=0).and.((y_sol-y)*v(i,1) < 1).and.((y_sol-y)*v(i,1)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
x_sol = (bda-b)/(m-mda) !DA
y_sol = m*x_sol + b
if (((x_sol-x)*v(i,0)<1).and.((x_sol-x)*v(i,0)>=0).and.((y_sol-y)*v(i,1) < 1).and.((y_sol-y)*v(i,1)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
else if (v(i,0) == 0) then
x_sol = x
y_sol = (mab*x+bab) !AB
if (((y_sol-y)*v(i,1) < 1).and.((y_sol-y)*v(i,1)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
y_sol = (mbc*x+bbc) !BC
if (((y_sol-y)*v(i,1) < 1).and.((y_sol-y)*v(i,1)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
y_sol = (mcd*x+bcd) !CD
if (((y_sol-y)*v(i,1) < 1).and.((y_sol-y)*v(i,1)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
y_sol = (mda*x+bda) !DA
if (((y_sol-y)*v(i,1) < 1).and.((y_sol-y)*v(i,1)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
else if (v(i,1) == 0) then
x_sol = (y-bab)/mab !AB
y_sol = y
if (((x_sol-x)*v(i,0)<1).and.((x_sol-x)*v(i,0)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
x_sol = (y-bbc)/mbc !BC
if (((x_sol-x)*v(i,0)<1).and.((x_sol-x)*v(i,0)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
x_sol = (y-bcd)/mcd !CD
if (((x_sol-x)*v(i,0)<1).and.((x_sol-x)*v(i,0)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
x_sol = (y-bda)/mda !DA
if (((x_sol-x)*v(i,0)<1).and.((x_sol-x)*v(i,0)>=0)) then
if ((y_sol-mab*x_sol-bab <= 0).and.(y_sol-mbc*x_sol-bbc <= 0).and.(y_sol-mcd*x_sol-bcd >= 0).and.(y_sol-mda*x_sol-bda >= 0)) then
cont_L = cont_L +1
x_w(cont_L,0) = x_sol
x_w(cont_L,1) = y_sol
write(31,*) x_w(cont_L,0),x_w(cont_L,1), i, x_w(cont_L,0)-x,x_w(cont_L,1)-y,cont_L
end if
end if
end if
! write(*,*) x_w(cont_L,0),x_w(cont_L,1),v(i,1)*(sqrt((x-obstX)**2.0d0 + (y-obstY)**2.0d0) - obstR),cont_L
end if
end if
end do
end do
end do
!$OMP END PARALLEL DO
CALL CPU_TIME(time4)
timeTot1 = timeTot1 + (time4-time3)
write(*,*) 'total time:', timeTot1, ' seconds


close(31)
javierburgos
 
Posts: 2
Joined: Wed Jun 05, 2013 7:52 am

Re: Openmp with Fortran 90 Problem, inefficient

Postby MarkB » Thu Jun 06, 2013 4:48 am

Hi there,

Please see my comment in your other thread about timers: viewtopic.php?f=3&t=1588#p6343

This is not a straightforward loop to parallelise, because of the use of cont_L to index into x_w. Instead of
Code: Select all
cont_L = cont_L + 1


you need to do something like

Code: Select all
!$omp atomic capture
g_cont_L = g_cont_L + 1
cont_L= g_cont_L
!$omp end atomic


where g_cont_L is shared and cont_L is private.
Even then you may not see very good speed up: an alternative would be to use a temporary array for each thread, and for each thread to copy back values into the correct place in x_w.

In addition, m,b, x_sol and y_sol should be private.

There is a lot of I/O in this loop, which will severely limit any scalability on most systems.

Hope that helps,
Mark.
MarkB
 
Posts: 422
Joined: Thu Jan 08, 2009 10:12 am


Return to Using OpenMP

Who is online

Users browsing this forum: Google [Bot] and 6 guests

cron