//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");