Skip to content

Commit

Permalink
Apply similar optimisations from scalar to array ops (needs testing!).
Browse files Browse the repository at this point in the history
  • Loading branch information
Steve Kerrison committed Nov 18, 2011
1 parent 0606c78 commit 5c3f4e4
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 61 deletions.
66 changes: 44 additions & 22 deletions module_matrix/src/matrix.xc
Expand Up @@ -13,6 +13,27 @@
#include "matrix_worker.h"
#include "xs1.h"

{int,int} matrix_calc_block(int size, int nthreads)
{
int blockSize, blockRem, lastBlock;
blockSize = size / nthreads;
blockRem = size % blockSize;
if (blockRem != 0)
{
if ((blockSize + 1)*(nthreads-1) < size)
{
blockSize += 1;

}
lastBlock = size - (blockSize * (nthreads-1));
}
else
{
lastBlock = blockSize;
}
return {blockSize,lastBlock};
}

int matrix_redim(short dims[4],short rows, short columns)
{
if (dims[2] < rows || dims[3] < columns)
Expand All @@ -29,7 +50,7 @@ int matrix_sca_op(enum matrix_ops op, int A[], short dimA[2], int S,
{
int retval[8] = {0,0,0,0,0,0,0,0}, i;
int ptA, ptC, ptDimA, ptRetval;
int srcSize = dimA[0] * dimA[1], blockSize, blockRem, lastBlock;
int srcSize = dimA[0] * dimA[1], blockSize, lastBlock;
POINTER(ptA,A);
POINTER(ptC,C);
POINTER(ptDimA,dimA);
Expand Down Expand Up @@ -57,34 +78,20 @@ int matrix_sca_op(enum matrix_ops op, int A[], short dimA[2], int S,
/* Early-out for small data */
if (srcSize < MATRIX_NTHREADS * MATRIX_NTHREADS)
{
matrix_sca_worker(op,ptA,ptDimA,S,ptC,ptRetval,
matrix_sca_worker(op,ptA,S,ptC,ptRetval,
0, srcSize);
return retval[0];
}
/* More optimal distribution of workload */
blockSize = srcSize / MATRIX_NTHREADS;
blockRem = srcSize % blockSize;
if (blockRem != 0)
{
if ((blockSize + 1)*(MATRIX_NTHREADS-1) < srcSize)
{
blockSize += 1;

}
lastBlock = srcSize - (blockSize * (MATRIX_NTHREADS-1));
}
else
{
lastBlock = blockSize;
}
{blockSize,lastBlock} = matrix_calc_block(srcSize,MATRIX_NTHREADS);
par
{
par (int t = 0; t < MATRIX_NTHREADS-1; t++)
{
matrix_sca_worker(op,ptA,ptDimA,S,ptC,ptRetval+(t * sizeof(int)),
matrix_sca_worker(op,ptA,S,ptC,ptRetval+(t * sizeof(int)),
blockSize * t, blockSize);
}
matrix_sca_worker(op,ptA,ptDimA,S,ptC,ptRetval+((MATRIX_NTHREADS-1) * sizeof(int)),
matrix_sca_worker(op,ptA,S,ptC,ptRetval+((MATRIX_NTHREADS-1) * sizeof(int)),
blockSize * (MATRIX_NTHREADS-1), lastBlock);
}
for (i = 1; i < 8; i++)
Expand All @@ -99,6 +106,7 @@ int matrix_arr_op(enum matrix_ops op, int A[], short dimA[2], int B[], short dim
{
int retval[8] = {0,0,0,0,0,0,0,0}, i;
int ptA, ptB, ptC, ptDimA, ptDimB, ptRetval;
int srcSize = dimA[0] * dimA[1], blockSize, lastBlock;
POINTER(ptA,A);
POINTER(ptB,B);
POINTER(ptC,C);
Expand Down Expand Up @@ -129,10 +137,24 @@ int matrix_arr_op(enum matrix_ops op, int A[], short dimA[2], int B[], short dim
{
ptC = ptA;
}
par (int t = 0; t < MATRIX_NTHREADS; t++)
/* Early-out for small data */
if (srcSize < MATRIX_NTHREADS * MATRIX_NTHREADS)
{
matrix_arr_worker(op,ptA,ptDimA,ptB,ptDimB,ptC,ptRetval,
MATRIX_NTHREADS,t);
matrix_arr_worker(op,ptA,ptB,ptC,ptRetval,
0, srcSize);
return retval[0];
}
/* More optimal distribution of workload */
{blockSize,lastBlock} = matrix_calc_block(srcSize,MATRIX_NTHREADS);
par
{
par (int t = 0; t < MATRIX_NTHREADS-1; t++)
{
matrix_arr_worker(op,ptA,ptB,ptC,ptRetval+(t * sizeof(int)),
blockSize * t, blockSize);
}
matrix_arr_worker(op,ptA,ptB,ptC,ptRetval+((MATRIX_NTHREADS-1) * sizeof(int)),
blockSize * (MATRIX_NTHREADS-1), lastBlock);
}
for (i = 1; i < 8; i++)
{
Expand Down
64 changes: 28 additions & 36 deletions module_matrix/src/matrix_worker.c
Expand Up @@ -46,51 +46,43 @@ void matrix_mul_worker(int ptA, int ptDimA, int ptB, int ptDimB, int ptC,
return;
}

void matrix_arr_worker(enum matrix_ops op, int ptA, int ptDimA, int ptB, int ptDimB, int ptC,
int ptOps, char nThreads, char offset)
void matrix_arr_worker(enum matrix_ops op, int ptA, int ptB, int ptC,
int ptOps, short offset, short len)
{
int *A = (int *)ptA, *B = (int *)ptB, *C = (int *)ptC,
*ops = (int *)ptOps;
short *dimA = (short *)ptDimA, *dimB = (short *)ptDimB;
int r,c;
ops += offset;
*ops = 0;
int rlim = dimA[0], clim = dimB[1];
for (r = 0; r < rlim; r++)
*ops = (int *)ptOps, base;
for (base = offset; base < offset + len; base += 1)
{
for (c = offset; c < clim; c += nThreads)
int res, a = A[base], b = B[base];
switch (op)
{
int res, a = A[r * rlim + c], b = B[r * rlim + c];
switch (op)
{
case ADD:
res = a + b;
break;
case SUB:
res = a - b;
break;
case MUL:
res = a * b;
break;
case DIV:
case SDIV:
res = a / b;
break;
case UDIV:
res = (unsigned)a / (unsigned)b;
break;
case RAND: //Fall through to default
default:
break;
}
C[r * rlim + c] = res;
*ops += 1;
case ADD:
res = a + b;
break;
case SUB:
res = a - b;
break;
case MUL:
res = a * b;
break;
case DIV:
case SDIV:
res = a / b;
break;
case UDIV:
res = (unsigned)a / (unsigned)b;
break;
case RAND: //Fall through to default
default:
break;
}
C[base] = res;
}
*ops = len;
return;
}

void matrix_sca_worker(enum matrix_ops op, int ptA, int ptDimA, int S, int ptC,
void matrix_sca_worker(enum matrix_ops op, int ptA, int S, int ptC,
int ptOps, short offset, short len)
{
int *A = (int *)ptA, *C = (int *)ptC,
Expand Down
6 changes: 3 additions & 3 deletions module_matrix/src/matrix_worker.h
Expand Up @@ -16,10 +16,10 @@
void matrix_mul_worker(int ptA, int ptDimA, int ptB, int ptDimB, int ptC,
int ptOps, char nThreads, char offset);

void matrix_arr_worker(enum matrix_ops op, int ptA, int ptDimA, int ptB, int ptDimB, int ptC,
int ptOps, char nThreads, char offset);
void matrix_arr_worker(enum matrix_ops op, int ptA, int ptB, int ptC,
int ptOps, short offset, short len);

void matrix_sca_worker(enum matrix_ops op, int ptA, int ptDimA, int S, int ptC,
void matrix_sca_worker(enum matrix_ops op, int ptA, int S, int ptC,
int ptOps, short offset, short len);

#endif /* MATRIX_WORKER_H_ */

0 comments on commit 5c3f4e4

Please sign in to comment.