Permalink
Browse files

Introduce new matrix multiplication method

This one actually works, and can be split into workers in a similar
way to the scalar and array ops.

Still need to assess performance.
  • Loading branch information...
1 parent d6c8e3d commit f27d6dadc5357380f177ad62a3e2d4d036bf3d3c Steve Kerrison committed Nov 24, 2011
Showing with 59 additions and 3 deletions.
  1. +19 −3 module_matrix/src/matrix.xc
  2. +37 −0 module_matrix/src/matrix_worker.c
  3. +3 −0 module_matrix/src/matrix_worker.h
View
22 module_matrix/src/matrix.xc
@@ -247,6 +247,7 @@ int matrix_mul(int A[], short dimA[2], int B[], short dimB[2],
{
int retval[8] = {0,0,0,0,0,0,0,0}, i;
int ptA, ptB, ptC, ptDimA, ptDimB, ptRetval;
+ int dstSize = dimA[0]* dimB[1], blockSize, lastBlock;
POINTER(ptA,A);
POINTER(ptB,B);
POINTER(ptC,C);
@@ -276,10 +277,25 @@ int matrix_mul(int A[], short dimA[2], int B[], short dimB[2],
//FIXME - Use a thread-safe strategy for in-place results
return -1; //In-place result not supported at the moment
}
- par (int t = 0; t < MATRIX_NTHREADS; t++)
+
+ /* Small matrix, use a single thread... */
+ if (dstSize < MATRIX_NTHREADS)
{
- matrix_mul_worker(ptA,ptDimA,ptB,ptDimB,ptC,ptRetval,
- MATRIX_NTHREADS, t);
+ matrix_mul_worker_new(ptA,ptDimA,ptB,ptDimB,ptC,0,dimA[0] * dimB[1],ptRetval);
+ return retval[0];
+ }
+ {blockSize,lastBlock} = matrix_calc_block(dstSize,MATRIX_NTHREADS);
+ par
+ {
+ par (int t = 0; t < MATRIX_NTHREADS-1; t++)
+ {
+ matrix_mul_worker_new(ptA,ptDimA,ptB,ptDimB,ptC,
+ blockSize * t, blockSize,
+ ptRetval + t * sizeof(int));
+ }
+ matrix_mul_worker_new(ptA,ptDimA,ptB,ptDimB,ptC,
+ blockSize * (MATRIX_NTHREADS-1), lastBlock,
+ ptRetval + (MATRIX_NTHREADS-1) * sizeof(int));
}
for (i = 1; i < 8; i++)
{
View
37 module_matrix/src/matrix_worker.c
@@ -21,6 +21,43 @@
#define myrand() rand()
#endif
+void matrix_mul_worker_new(int ptA, int ptDimA, int ptB, int ptDimB, int ptC,
+ short startC, short lenC, int ptOps)
+{
+ int *A = (int *)ptA, *B = (int *)ptB, *C = (int *)ptC,
+ *ops = (int *)ptOps;
+ short *dimA = (short *)ptDimA, *dimB = (short *)ptDimB;
+ int col = startC % dimB[1], row = startC / dimB[1];
+ for (int dst = startC; dst < startC + lenC; dst += 1)
+ {
+ C[dst] = 0;
+ for (int e = 0; e < dimA[1]; e += 1)
+ {
+ C[dst] += A[row * dimA[1] + e] * B[dimB[1] * e + col];
+ }
+ col += 1;
+ if (col == dimB[1])
+ {
+ col = 0;
+ row += 1;
+ }
+ }
+ /*for (int c = colB; c < colB + lenB; c+= 1)
+ {
+ for (int r = rowA; r < rowA + lenA; r += 1)
+ {
+ int dst = r * dimB[1] + c;
+ C[dst] = 0;
+ for (int e = 0; e < dimA[1]; e += 1)
+ {
+ C[dst] += A[r * dimA[1] + e] * B[c + dimB[1] * e];
+ }
+ }
+ }*/
+ *ops = lenC * (dimA[1] + 1);
+ return;
+}
+
void matrix_mul_worker(int ptA, int ptDimA, int ptB, int ptDimB, int ptC,
int ptOps, char nThreads, char offset)
{
View
3 module_matrix/src/matrix_worker.h
@@ -16,6 +16,9 @@
void matrix_mul_worker(int ptA, int ptDimA, int ptB, int ptDimB, int ptC,
int ptOps, char nThreads, char offset);
+void matrix_mul_worker_new(int ptA, int ptDimA, int ptB, int ptDimB, int ptC,
+ short startC, short lenC, int ptOps);
+
/* Piecewise (array) ops */
void matrix_arr_worker_add(int ptA, int ptB, int ptC,

0 comments on commit f27d6da

Please sign in to comment.