- Code: Select all
#pragma omp parallel shared(n)
{
for(round=0; round<n; ++round)
{
/* column step, I want to run this 4 G32 functions in parallel, but don't know,
that is proper approach to this problem */
#pragma omp critical
G32( 0, 4, 8,12, 0);
#pragma omp critical
G32( 1, 5, 9,13, 1);
#pragma omp critical
G32( 2, 6,10,14, 2);
#pragma omp critical
G32( 3, 7,11,15, 3);
/* diagonal step, and same here */
#pragma omp critical
G32( 0, 5,10,15, 4);
#pragma omp critical
G32( 1, 6,11,12, 5);
#pragma omp critical
G32( 2, 7, 8,13, 6);
#pragma omp critical
G32( 3, 4, 9,14, 7);
}
}
And this is G32 funtion:
- Code: Select all
#define G32(a,b,c,d,i)\
do { \
v[a] = ADD32(v[a],v[b])+XOR32(m[sigma[round][2*i]], c32[sigma[round][2*i+1]]);\
v[d] = ROT32(XOR32(v[d],v[a]),16);\
v[c] = ADD32(v[c],v[d]);\
v[b] = ROT32(XOR32(v[b],v[c]),12);\
v[a] = ADD32(v[a],v[b])+XOR32(m[sigma[round][2*i+1]], c32[sigma[round][2*i]]);\
v[d] = ROT32(XOR32(v[d],v[a]), 8);\
v[c] = ADD32(v[c],v[d]);\
v[b] = ROT32(XOR32(v[b],v[c]), 7);\
} while (0)
So the question is how to parallelize this loop properly?
