Hi guys,

I'm having a code that should compute LU decomposition. I observed that the calculation of U cannot be parallelized but for L we can have some paralelization there..The problem is the the code paralelized takes longer than the code executed without paralelization!!! Where am i wrong? Thank you

SUBROUTINE decLU(A,n,nmax)

integer n,i,j,k,nmax,CHUNK,TID

parameter(chunk=10)

real A(nmax,nmax)

real sumP(n),sum

do j=1,n

do i=1,j

sum=A(i,j)

do k=1,i-1

sum=sum-A(i,k)*A(k,j)

enddo

A(i,j)=sum

enddo

!$OMP PARALLEL SHARED(sumP,A) PRIVATE(i)

!$OMP DO SCHEDULE(DYNAMIC,CHUNK)

do i=j+1,n

sumP(i)=A(i,j)

do k=1,j-1

sumP(i)=sumP(i)-A(i,k)*A(k,j)

enddo

A(i,j)=sumP(i)/A(j,j)

enddo

!$OMP END DO NOWAIT

!$OMP END PARALLEL

enddo

return

END