From ac10ed52cfa64782d49d0cb78b9fdd451850d3c5 Mon Sep 17 00:00:00 2001 From: Tom van Dijk Date: Thu, 16 Feb 2023 22:31:25 +0100 Subject: [PATCH] Redesign to supply LaceWorker* in every task --- benchmarks/cholesky/cholesky-lace.c | 76 +- benchmarks/cilksort/cilksort-lace.c | 34 +- benchmarks/dfs/dfs-lace.c | 6 +- benchmarks/fib/fib-lace.c | 8 +- benchmarks/fib/fib-lace.cpp | 8 +- benchmarks/heat/heat-lace.c | 28 +- benchmarks/integrate/integrate-lace.c | 8 +- benchmarks/knapsack/knapsack-lace.c | 8 +- benchmarks/matmul/matmul-lace.c | 18 +- benchmarks/nqueens/nqueens-lace.c | 6 +- benchmarks/pi/pi-lace.c | 8 +- benchmarks/strassen/strassen-lace.c | 30 +- benchmarks/uts/uts-lace.c | 6 +- benchmarks/uts/uts2-lace.c | 6 +- src/lace.c | 147 ++- src/lace.h | 804 ++++++------ src/lace.sh | 105 +- src/lace14.c | 147 ++- src/lace14.h | 1684 ++++++++++++------------- 19 files changed, 1546 insertions(+), 1591 deletions(-) diff --git a/benchmarks/cholesky/cholesky-lace.c b/benchmarks/cholesky/cholesky-lace.c index 636fb1a..b8a3ef7 100644 --- a/benchmarks/cholesky/cholesky-lace.c +++ b/benchmarks/cholesky/cholesky-lace.c @@ -351,7 +351,7 @@ static Matrix set_matrix(int depth, Matrix a, int r, int c, Real value) */ TASK_5(Matrix, mul_and_subT, int, depth, int, lower, Matrix, a, Matrix, b, Matrix, r) -Matrix mul_and_subT(int depth, int lower, Matrix a, Matrix b, Matrix r) +Matrix mul_and_subT(LaceWorker* worker, int depth, int lower, Matrix a, Matrix b, Matrix r) { if (depth == BLOCK_DEPTH) { LeafNode *A = (LeafNode *) a; @@ -389,46 +389,46 @@ Matrix mul_and_subT(int depth, int lower, Matrix a, Matrix b, Matrix r) // first spawn if (a->child[_00] && b->child[TR_00]) - mul_and_subT_SPAWN(depth, lower, a->child[_00], b->child[TR_00], r00); + mul_and_subT_SPAWN(worker, depth, lower, a->child[_00], b->child[TR_00], r00); if (!lower && a->child[_00] && b->child[TR_01]) - mul_and_subT_SPAWN(depth, 0, a->child[_00], b->child[TR_01], r01); + mul_and_subT_SPAWN(worker, depth, 0, a->child[_00], b->child[TR_01], r01); if (a->child[_10] && b->child[TR_00]) - mul_and_subT_SPAWN(depth, 0, a->child[_10], b->child[TR_00], r10); + mul_and_subT_SPAWN(worker, depth, 0, a->child[_10], b->child[TR_00], r10); if (a->child[_10] && b->child[TR_01]) - mul_and_subT_SPAWN(depth, lower, a->child[_10], b->child[TR_01], r11); + mul_and_subT_SPAWN(worker, depth, lower, a->child[_10], b->child[TR_01], r11); // then sync if (a->child[_10] && b->child[TR_01]) - r11 = mul_and_subT_SYNC(); + r11 = mul_and_subT_SYNC(worker); if (a->child[_10] && b->child[TR_00]) - r10 = mul_and_subT_SYNC(); + r10 = mul_and_subT_SYNC(worker); if (!lower && a->child[_00] && b->child[TR_01]) - r01 = mul_and_subT_SYNC(); + r01 = mul_and_subT_SYNC(worker); if (a->child[_00] && b->child[TR_00]) - r00 = mul_and_subT_SYNC(); + r00 = mul_and_subT_SYNC(worker); // first spawn if (a->child[_01] && b->child[TR_10]) - mul_and_subT_SPAWN(depth, lower, a->child[_01], b->child[TR_10], r00); + mul_and_subT_SPAWN(worker, depth, lower, a->child[_01], b->child[TR_10], r00); if (!lower && a->child[_01] && b->child[TR_11]) - mul_and_subT_SPAWN(depth, 0, a->child[_01], b->child[TR_11], r01); + mul_and_subT_SPAWN(worker, depth, 0, a->child[_01], b->child[TR_11], r01); if (a->child[_11] && b->child[TR_10]) - mul_and_subT_SPAWN(depth, 0, a->child[_11], b->child[TR_10], r10); + mul_and_subT_SPAWN(worker, depth, 0, a->child[_11], b->child[TR_10], r10); if (a->child[_11] && b->child[TR_11]) - mul_and_subT_SPAWN(depth, lower, a->child[_11], b->child[TR_11], r11); + mul_and_subT_SPAWN(worker, depth, lower, a->child[_11], b->child[TR_11], r11); // then sync if (a->child[_11] && b->child[TR_11]) - r11 = mul_and_subT_SYNC(); + r11 = mul_and_subT_SYNC(worker); if (a->child[_11] && b->child[TR_10]) - r10 = mul_and_subT_SYNC(); + r10 = mul_and_subT_SYNC(worker); if (!lower && a->child[_01] && b->child[TR_11]) - r01 = mul_and_subT_SYNC(); + r01 = mul_and_subT_SYNC(worker); if (a->child[_01] && b->child[TR_10]) - r00 = mul_and_subT_SYNC(); + r00 = mul_and_subT_SYNC(worker); if (r == NULL) { if (r00 || r01 || r10 || r11) @@ -449,7 +449,7 @@ Matrix mul_and_subT(int depth, int lower, Matrix a, Matrix b, Matrix r) */ TASK_3(Matrix, backsub, int, depth, Matrix, a, Matrix, l) -Matrix backsub(int depth, Matrix a, Matrix l) +Matrix backsub(LaceWorker* worker, int depth, Matrix a, Matrix l) { if (depth == BLOCK_DEPTH) { LeafNode *A = (LeafNode *) a; @@ -470,20 +470,20 @@ Matrix backsub(int depth, Matrix a, Matrix l) l10 = l->child[_10]; l11 = l->child[_11]; - if (a00) backsub_SPAWN(depth, a00, l00); - if (a10) backsub_SPAWN(depth, a10, l00); - if (a10) a10 = backsub_SYNC(); - if (a00) a00 = backsub_SYNC(); + if (a00) backsub_SPAWN(worker, depth, a00, l00); + if (a10) backsub_SPAWN(worker, depth, a10, l00); + if (a10) a10 = backsub_SYNC(worker); + if (a00) a00 = backsub_SYNC(worker); - if (a00 && l10) mul_and_subT_SPAWN(depth, 0, a00, l10, a01); - if (a10 && l10) mul_and_subT_SPAWN(depth, 0, a10, l10, a11); - if (a10 && l10) a11 = mul_and_subT_SYNC(); - if (a00 && l10) a01 = mul_and_subT_SYNC(); + if (a00 && l10) mul_and_subT_SPAWN(worker, depth, 0, a00, l10, a01); + if (a10 && l10) mul_and_subT_SPAWN(worker, depth, 0, a10, l10, a11); + if (a10 && l10) a11 = mul_and_subT_SYNC(worker); + if (a00 && l10) a01 = mul_and_subT_SYNC(worker); - if (a01) backsub_SPAWN(depth, a01, l11); - if (a11) backsub_SPAWN(depth, a11, l11); - if (a11) a11 = backsub_SYNC(); - if (a01) a01 = backsub_SYNC(); + if (a01) backsub_SPAWN(worker, depth, a01, l11); + if (a11) backsub_SPAWN(worker, depth, a11, l11); + if (a11) a11 = backsub_SYNC(worker); + if (a01) a01 = backsub_SYNC(worker); a->child[_00] = a00; a->child[_01] = a01; @@ -499,7 +499,7 @@ Matrix backsub(int depth, Matrix a, Matrix l) */ TASK_2(Matrix, cholesky, int, depth, Matrix, a) -Matrix cholesky(int depth, Matrix a) +Matrix cholesky(LaceWorker* worker, int depth, Matrix a) { if (depth == BLOCK_DEPTH) { LeafNode *A = (LeafNode *) a; @@ -514,14 +514,14 @@ Matrix cholesky(int depth, Matrix a) a11 = a->child[_11]; if (!a10) { - cholesky_SPAWN(depth, a00); - a11 = cholesky(depth, a11); - a00 = cholesky_SYNC(); + cholesky_SPAWN(worker, depth, a00); + a11 = cholesky(worker, depth, a11); + a00 = cholesky_SYNC(worker); } else { - a00 = cholesky(depth, a00); - a10 = backsub(depth, a10, a00); - a11 = mul_and_subT(depth, 1, a10, a10, a11); - a11 = cholesky(depth, a11); + a00 = cholesky(worker, depth, a00); + a10 = backsub(worker, depth, a10, a00); + a11 = mul_and_subT(worker, depth, 1, a10, a10, a11); + a11 = cholesky(worker, depth, a11); } a->child[_00] = a00; a->child[_10] = a10; diff --git a/benchmarks/cilksort/cilksort-lace.c b/benchmarks/cilksort/cilksort-lace.c index f20ca56..6be145f 100644 --- a/benchmarks/cilksort/cilksort-lace.c +++ b/benchmarks/cilksort/cilksort-lace.c @@ -300,7 +300,7 @@ ELM *binsplit(ELM val, ELM *low, ELM *high) VOID_TASK_5(cilkmerge, ELM*, low1, ELM*, high1, ELM*, low2, ELM*, high2, ELM*, lowdest) -void cilkmerge(ELM* low1, ELM* high1, ELM* low2, ELM* high2, ELM* lowdest) +void cilkmerge(LaceWorker* worker, ELM* low1, ELM* high1, ELM* low2, ELM* high2, ELM* lowdest) { /* * Cilkmerge: Merges range [low1, high1] with range [low2, high2] @@ -352,15 +352,15 @@ void cilkmerge(ELM* low1, ELM* high1, ELM* low2, ELM* high2, ELM* lowdest) * the appropriate location */ *(lowdest + lowsize + 1) = *split1; - cilkmerge_SPAWN(low1, split1 - 1, low2, split2, lowdest); - cilkmerge(split1 + 1, high1, split2 + 1, high2, lowdest + lowsize + 2); - cilkmerge_SYNC(); + cilkmerge_SPAWN(worker, low1, split1 - 1, low2, split2, lowdest); + cilkmerge(worker, split1 + 1, high1, split2 + 1, high2, lowdest + lowsize + 2); + cilkmerge_SYNC(worker); return; } VOID_TASK_3(cilksort, ELM*, low, ELM*, tmp, long, size) -void cilksort(ELM* low, ELM* tmp, long size) +void cilksort(LaceWorker* worker, ELM* low, ELM* tmp, long size) { /* * divide the input in four parts of the same size (A, B, C, D) @@ -386,20 +386,20 @@ void cilksort(ELM* low, ELM* tmp, long size) D = C + quarter; tmpD = tmpC + quarter; - cilksort_SPAWN(A, tmpA, quarter); - cilksort_SPAWN(B, tmpB, quarter); - cilksort_SPAWN(C, tmpC, quarter); - cilksort_SPAWN(D, tmpD, size - 3 * quarter); - cilksort_SYNC(); - cilksort_SYNC(); - cilksort_SYNC(); - cilksort_SYNC(); + cilksort_SPAWN(worker, A, tmpA, quarter); + cilksort_SPAWN(worker, B, tmpB, quarter); + cilksort_SPAWN(worker, C, tmpC, quarter); + cilksort_SPAWN(worker, D, tmpD, size - 3 * quarter); + cilksort_SYNC(worker); + cilksort_SYNC(worker); + cilksort_SYNC(worker); + cilksort_SYNC(worker); - cilkmerge_SPAWN(A, A + quarter - 1, B, B + quarter - 1, tmpA); - cilkmerge(C, C + quarter - 1, D, low + size - 1, tmpC); - cilkmerge_SYNC(); + cilkmerge_SPAWN(worker, A, A + quarter - 1, B, B + quarter - 1, tmpA); + cilkmerge(worker, C, C + quarter - 1, D, low + size - 1, tmpC); + cilkmerge_SYNC(worker); - cilkmerge(tmpA, tmpC - 1, tmpC, tmpA + size - 1, A); + cilkmerge(worker, tmpA, tmpC - 1, tmpC, tmpA + size - 1, A); } void scramble_array(ELM *arr, unsigned long size) diff --git a/benchmarks/dfs/dfs-lace.c b/benchmarks/dfs/dfs-lace.c index 3c33324..b6e9f76 100644 --- a/benchmarks/dfs/dfs-lace.c +++ b/benchmarks/dfs/dfs-lace.c @@ -23,12 +23,12 @@ int __attribute__((noinline)) loop() TASK_1(int, tree, int, d) -int tree(int d) +int tree(LaceWorker* worker, int d) { if( d>0 ) { int i; - for (i=0;i 1) { int im = (il + iu) / 2; - heat_SPAWN(m, il, im); - heat(m, im, iu); - heat_SYNC(); + heat_SPAWN(worker, m, il, im); + heat(worker, m, im, iu); + heat_SYNC(worker); return; } @@ -81,14 +81,14 @@ void heat(double ** m, int il, int iu) } } -void diffuse(double ** out, double ** in, int il, int iu, double t) +void diffuse(LaceWorker* worker, double ** out, double ** in, int il, int iu, double t) { if (iu - il > 1) { int im = (il + iu) / 2; - diffuse_SPAWN(out, in, il, im, t); - diffuse(out, in, im, iu, t); - diffuse_SYNC(); + diffuse_SPAWN(worker, out, in, il, im, t); + diffuse(worker, out, in, im, iu, t); + diffuse_SYNC(worker); return; } @@ -145,23 +145,23 @@ void init(int n) } } -void prep() +void prep(LaceWorker* worker) { - heat(even, 0, nx); + heat(worker, even, 0, nx); } -void test() +void test(LaceWorker* worker) { double t = tu; int i; for (i = 1; i <= nt; i += 2) { - diffuse(odd, even, 0, nx, t += dt); - diffuse(even, odd, 0, nx, t += dt); + diffuse(worker, odd, even, 0, nx, t += dt); + diffuse(worker, even, odd, 0, nx, t += dt); } if (nt % 2) { - diffuse(odd, even, 0, nx, t += dt); + diffuse(worker, odd, even, 0, nx, t += dt); } } diff --git a/benchmarks/integrate/integrate-lace.c b/benchmarks/integrate/integrate-lace.c index d50138a..d9b6d44 100644 --- a/benchmarks/integrate/integrate-lace.c +++ b/benchmarks/integrate/integrate-lace.c @@ -16,7 +16,7 @@ static double f(double x) TASK_5(double, integrate, double, x1, double, y1, double, x2, double, y2, double, area) double -integrate(double x1, double y1, double x2, double y2, double area) +integrate(LaceWorker* worker, double x1, double y1, double x2, double y2, double area) { double half = (x2 - x1) / 2; double x0 = x1 + half; @@ -30,9 +30,9 @@ integrate(double x1, double y1, double x2, double y2, double area) return area_x1x2; } - integrate_SPAWN(x1, y1, x0, y0, area_x1x0); - area_x0x2 = integrate(x0, y0, x2, y2, area_x0x2); - area_x1x0 = integrate_SYNC(); + integrate_SPAWN(worker, x1, y1, x0, y0, area_x1x0); + area_x0x2 = integrate(worker, x0, y0, x2, y2, area_x0x2); + area_x1x0 = integrate_SYNC(worker); return area_x1x0 + area_x0x2; } diff --git a/benchmarks/knapsack/knapsack-lace.c b/benchmarks/knapsack/knapsack-lace.c index 5550365..66473fa 100644 --- a/benchmarks/knapsack/knapsack-lace.c +++ b/benchmarks/knapsack/knapsack-lace.c @@ -68,7 +68,7 @@ int read_input(const char *filename, struct item *items, int *capacity, int *n) */ TASK_4(int, knapsack, struct item *, e, int, c, int, n, int, v) -int knapsack(struct item *e, int c, int n, int v) +int knapsack(LaceWorker* worker, struct item *e, int c, int n, int v) { int with, without, best; double ub; @@ -89,12 +89,12 @@ int knapsack(struct item *e, int c, int n, int v) /* * compute the best solution without the current item in the knapsack */ - knapsack_SPAWN(e + 1, c, n - 1, v); + knapsack_SPAWN(worker, e + 1, c, n - 1, v); /* compute the best solution with the current item in the knapsack */ - with = knapsack(e + 1, c - e->weight, n - 1, v + e->value); + with = knapsack(worker, e + 1, c - e->weight, n - 1, v + e->value); - without = knapsack_SYNC(); + without = knapsack_SYNC(worker); best = with > without ? with : without; diff --git a/benchmarks/matmul/matmul-lace.c b/benchmarks/matmul/matmul-lace.c index 0921d72..d4e8a17 100644 --- a/benchmarks/matmul/matmul-lace.c +++ b/benchmarks/matmul/matmul-lace.c @@ -73,7 +73,7 @@ void iter_matmul(REAL *A, REAL *B, REAL *C, int n) * C \in M(m, p) */ VOID_TASK_8(rec_matmul, REAL*, A, REAL*, B, REAL*, C, int, m, int, n, int, p, int, ld, int, add) -void rec_matmul(REAL* A, REAL* B, REAL* C, int m, int n, int p, int ld, int add) +void rec_matmul(LaceWorker* worker, REAL* A, REAL* B, REAL* C, int m, int n, int p, int ld, int add) { if ((m + n + p) <= 64) { int i, j, k; @@ -97,18 +97,18 @@ void rec_matmul(REAL* A, REAL* B, REAL* C, int m, int n, int p, int ld, int add) } } else if (m >= n && n >= p) { int m1 = m >> 1; - rec_matmul_SPAWN(A, B, C, m1, n, p, ld, add); - rec_matmul(A + m1 * ld, B, C + m1 * ld, m - m1, n, p, ld, add); - rec_matmul_SYNC(); + rec_matmul_SPAWN(worker, A, B, C, m1, n, p, ld, add); + rec_matmul(worker, A + m1 * ld, B, C + m1 * ld, m - m1, n, p, ld, add); + rec_matmul_SYNC(worker); } else if (n >= m && n >= p) { int n1 = n >> 1; - rec_matmul(A, B, C, m, n1, p, ld, add); - rec_matmul(A + n1, B + n1 * ld, C, m, n - n1, p, ld, 1); + rec_matmul(worker, A, B, C, m, n1, p, ld, add); + rec_matmul(worker, A + n1, B + n1 * ld, C, m, n - n1, p, ld, 1); } else { int p1 = p >> 1; - rec_matmul_SPAWN(A, B, C, m, n, p1, ld, add); - rec_matmul(A, B + p1, C + p1, m, n, p - p1, ld, add); - rec_matmul_SYNC(); + rec_matmul_SPAWN(worker, A, B, C, m, n, p1, ld, add); + rec_matmul(worker, A, B + p1, C + p1, m, n, p - p1, ld, add); + rec_matmul_SYNC(worker); } } diff --git a/benchmarks/nqueens/nqueens-lace.c b/benchmarks/nqueens/nqueens-lace.c index 600c6a6..39af397 100644 --- a/benchmarks/nqueens/nqueens-lace.c +++ b/benchmarks/nqueens/nqueens-lace.c @@ -10,7 +10,7 @@ */ TASK_4(int, nqueens, const int*, a, int, n, int, d, int, i) -int nqueens(const int* a, int n, int d, int i) +int nqueens(LaceWorker* worker, const int* a, int n, int d, int i) { // copy queens from a to new array aa and check if ok int aa[d + 1]; @@ -32,13 +32,13 @@ int nqueens(const int* a, int n, int d, int i) // if not reached, place the next queen recursively for (int k = 0; k> 1; /* MatixSize / 2 */ @@ -546,36 +546,36 @@ void OptimizedStrassenMultiply(REAL * C, REAL * A, REAL * B, unsigned MatrixSize } /* end column loop */ /* M2 = A11 x B11 */ - OptimizedStrassenMultiply_SPAWN(M2, A11, B11, QuadrantSize, QuadrantSize, RowWidthA, RowWidthB); + OptimizedStrassenMultiply_SPAWN(worker, M2, A11, B11, QuadrantSize, QuadrantSize, RowWidthA, RowWidthB); /* M5 = S1 * S5 */ - OptimizedStrassenMultiply_SPAWN(M5, S1, S5, QuadrantSize, QuadrantSize, QuadrantSize, QuadrantSize); + OptimizedStrassenMultiply_SPAWN(worker, M5, S1, S5, QuadrantSize, QuadrantSize, QuadrantSize, QuadrantSize); /* Step 1 of T1 = S2 x S6 + M2 */ - OptimizedStrassenMultiply_SPAWN(T1sMULT, S2, S6, QuadrantSize, QuadrantSize, QuadrantSize, QuadrantSize); + OptimizedStrassenMultiply_SPAWN(worker, T1sMULT, S2, S6, QuadrantSize, QuadrantSize, QuadrantSize, QuadrantSize); /* Step 1 of T2 = T1 + S3 x S7 */ - OptimizedStrassenMultiply_SPAWN(C22, S3, S7, QuadrantSize, RowWidthC /*FIXME*/, QuadrantSize, QuadrantSize); + OptimizedStrassenMultiply_SPAWN(worker, C22, S3, S7, QuadrantSize, RowWidthC /*FIXME*/, QuadrantSize, QuadrantSize); /* Step 1 of C11 = M2 + A12 * B21 */ - OptimizedStrassenMultiply_SPAWN(C11, A12, B21, QuadrantSize, RowWidthC, RowWidthA, RowWidthB); + OptimizedStrassenMultiply_SPAWN(worker, C11, A12, B21, QuadrantSize, RowWidthC, RowWidthA, RowWidthB); /* Step 1 of C12 = S4 x B22 + T1 + M5 */ - OptimizedStrassenMultiply_SPAWN(C12, S4, B22, QuadrantSize, RowWidthC, QuadrantSize, RowWidthB); + OptimizedStrassenMultiply_SPAWN(worker, C12, S4, B22, QuadrantSize, RowWidthC, QuadrantSize, RowWidthB); /* Step 1 of C21 = T2 - A22 * S8 */ - OptimizedStrassenMultiply_SPAWN(C21, A22, S8, QuadrantSize, RowWidthC, RowWidthA, QuadrantSize); + OptimizedStrassenMultiply_SPAWN(worker, C21, A22, S8, QuadrantSize, RowWidthC, RowWidthA, QuadrantSize); /********************************************** ** Synchronization Point **********************************************/ - OptimizedStrassenMultiply_SYNC(); - OptimizedStrassenMultiply_SYNC(); - OptimizedStrassenMultiply_SYNC(); - OptimizedStrassenMultiply_SYNC(); - OptimizedStrassenMultiply_SYNC(); - OptimizedStrassenMultiply_SYNC(); - OptimizedStrassenMultiply_SYNC(); + OptimizedStrassenMultiply_SYNC(worker); + OptimizedStrassenMultiply_SYNC(worker); + OptimizedStrassenMultiply_SYNC(worker); + OptimizedStrassenMultiply_SYNC(worker); + OptimizedStrassenMultiply_SYNC(worker); + OptimizedStrassenMultiply_SYNC(worker); + OptimizedStrassenMultiply_SYNC(worker); /*************************************************************************** diff --git a/benchmarks/uts/uts-lace.c b/benchmarks/uts/uts-lace.c index 739a4bc..c342884 100644 --- a/benchmarks/uts/uts-lace.c +++ b/benchmarks/uts/uts-lace.c @@ -68,7 +68,7 @@ typedef struct { } Result; TASK_2(Result, parTreeSearch, int, depth, Node *, parent) -Result parTreeSearch(int depth, Node *parent) { +Result parTreeSearch(LaceWorker* worker, int depth, Node *parent) { int numChildren, childType; counter_t parentHeight = parent->height; @@ -91,10 +91,10 @@ Result parTreeSearch(int depth, Node *parent) { for (j = 0; j < computeGranularity; j++) { rng_spawn(parent->state.state, child->state.state, i); } - parTreeSearch_SPAWN(depth+1, child); + parTreeSearch_SPAWN(worker, depth+1, child); } for (i = 0; i < numChildren; i++) { - Result c = parTreeSearch_SYNC(); + Result c = parTreeSearch_SYNC(worker); if (c.maxdepth>r.maxdepth) r.maxdepth = c.maxdepth; r.size += c.size; r.leaves += c.leaves; diff --git a/benchmarks/uts/uts2-lace.c b/benchmarks/uts/uts2-lace.c index 0325c64..ee54456 100644 --- a/benchmarks/uts/uts2-lace.c +++ b/benchmarks/uts/uts2-lace.c @@ -69,7 +69,7 @@ typedef struct { } Result; TASK_2(Result, parTreeSearch, int, depth, Node *, parent) -Result parTreeSearch(int depth, Node * parent) { +Result parTreeSearch(LaceWorker* worker, int depth, Node * parent) { int numChildren, childType; counter_t parentHeight = parent->height; @@ -92,7 +92,7 @@ Result parTreeSearch(int depth, Node * parent) { for (j = 0; j < computeGranularity; j++) { rng_spawn(parent->state.state, child->state.state, i); } - parTreeSearch_SPAWN(depth+1, child); + parTreeSearch_SPAWN(worker, depth+1, child); } /* Wait a bit */ @@ -100,7 +100,7 @@ Result parTreeSearch(int depth, Node * parent) { nanosleep(&tim, NULL); for (i = 0; i < numChildren; i++) { - Result c = parTreeSearch_SYNC(); + Result c = parTreeSearch_SYNC(worker); if (c.maxdepth>r.maxdepth) r.maxdepth = c.maxdepth; r.size += c.size; r.leaves += c.leaves; diff --git a/src/lace.c b/src/lace.c index a4304c7..5301c3b 100644 --- a/src/lace.c +++ b/src/lace.c @@ -93,8 +93,8 @@ static unsigned int n_workers = 0; typedef struct { Worker worker_public; char pad1[PAD(sizeof(Worker), LINE_SIZE)]; - WorkerP worker_private; - char pad2[PAD(sizeof(WorkerP), LINE_SIZE)]; + LaceWorker worker_private; + char pad2[PAD(sizeof(LaceWorker), LINE_SIZE)]; Task deque[]; } worker_data; @@ -111,7 +111,7 @@ static size_t workers_memory_size = 0; /** * (Secret) holds pointer to private Worker data, just for stats collection at end */ -static WorkerP **workers_p; +static LaceWorker **workers_p; /** * Flag to signal all workers to quit. @@ -123,7 +123,7 @@ static atomic_uint workers_running = 0; * Thread-specific mechanism to access current worker data */ #ifdef __linux__ -__thread WorkerP *lace_thread_worker; +__thread LaceWorker *lace_thread_worker; #else pthread_key_t lace_thread_worker_key; #endif @@ -132,10 +132,10 @@ pthread_key_t lace_thread_worker_key; #define LACE_LEAP_RANDOM 1 #endif -Worker* lace_steal(WorkerP *self, Worker *victim); -int lace_shrink_shared(WorkerP *w); -void lace_leapfrog(WorkerP *__lace_worker); -void lace_drop_slow(WorkerP *w, Task *head); +Worker* lace_steal(LaceWorker *self, Worker *victim); +int lace_shrink_shared(LaceWorker *w); +void lace_leapfrog(LaceWorker *__lace_worker); +void lace_drop_slow(LaceWorker *w, Task *head); /** * Global newframe variable used for the implementation of NEWFRAME and TOGETHER @@ -242,7 +242,7 @@ lace_check_memory(void) { #if LACE_USE_HWLOC // get our current worker - WorkerP *w = lace_get_worker(); + LaceWorker *w = lace_get_worker(); void* mem = workers_memory[w->worker]; // get pinned PUs @@ -374,7 +374,7 @@ lace_init_worker(unsigned int worker) // Set pointers Worker *wt = workers[worker] = &workers_memory[worker]->worker_public; - WorkerP *w = workers_p[worker] = &workers_memory[worker]->worker_private; + LaceWorker *w = workers_p[worker] = &workers_memory[worker]->worker_private; w->dq = workers_memory[worker]->deque; w->head = w->dq; #ifdef __linux__ @@ -497,9 +497,9 @@ void lace_run_task(Task *task) { // check if we are really not in a Lace thread - WorkerP* self = lace_get_worker(); + LaceWorker* self = lace_get_worker(); if (self != 0) { - task->f(task); + task->f(self, task); } else { // if needed, wake up the workers lace_resume(); @@ -521,7 +521,7 @@ lace_run_task(Task *task) } static inline void -lace_steal_external(WorkerP *self) +lace_steal_external(LaceWorker *self) { ExtTask *stolen_task = atomic_exchange(&external_task, NULL); if (stolen_task != 0) { @@ -530,7 +530,7 @@ lace_steal_external(WorkerP *self) atomic_store_explicit(&stolen_task->task->thief, self->_public, memory_order_relaxed); lace_time_event(self, 1); // atomic_thread_fence(memory_order_relaxed); - stolen_task->task->f(stolen_task->task); + stolen_task->task->f(self, stolen_task->task); // atomic_thread_fence(memory_order_relaxed); lace_time_event(self, 2); // atomic_thread_fence(memory_order_relaxed); @@ -544,11 +544,9 @@ lace_steal_external(WorkerP *self) /** * (Try to) steal and execute a task from a random worker. */ -//VOID_TASK_0(lace_steal_random); -void lace_steal_random(void) +void lace_steal_random(LaceWorker *__lace_worker) { - WorkerP *__lace_worker = lace_get_worker(); - lace_check_yield(); + lace_check_yield(__lace_worker); if (__builtin_expect(atomic_load_explicit(&external_task, memory_order_acquire) != 0, 0)) { lace_steal_external(__lace_worker); @@ -571,17 +569,17 @@ void lace_steal_random(void) */ VOID_TASK_1(lace_steal_loop, atomic_int*, quit); -void lace_steal_loop(atomic_int* quit) +void lace_steal_loop(LaceWorker* lace_worker, atomic_int* quit) { // Determine who I am - const int worker_id = lace_get_worker()->worker; + const int worker_id = lace_worker->worker; // Prepare self, victim Worker ** const self = &workers[worker_id]; Worker ** victim = self; #if LACE_PIE_TIMES - __lace_worker->time = gethrtime(); + lace_worker->time = gethrtime(); #endif uint32_t seed = worker_id; @@ -602,16 +600,16 @@ void lace_steal_loop(atomic_int* quit) victim = workers + (rng(&seed, n-1) + worker_id + 1) % n; } - PR_COUNTSTEALS(__lace_worker, CTR_steal_tries); - Worker *res = lace_steal(lace_get_worker(), *victim); + PR_COUNTSTEALS(lace_worker, CTR_steal_tries); + Worker *res = lace_steal(lace_worker, *victim); if (res == LACE_STOLEN) { - PR_COUNTSTEALS(__lace_worker, CTR_steals); + PR_COUNTSTEALS(lace_worker, CTR_steals); } else if (res == LACE_BUSY) { - PR_COUNTSTEALS(__lace_worker, CTR_steal_busy); + PR_COUNTSTEALS(lace_worker, CTR_steal_busy); } } - lace_check_yield(); + lace_check_yield(lace_worker); if (__builtin_expect(atomic_load_explicit(&external_task, memory_order_acquire) != 0, 0)) { lace_steal_external(lace_get_worker()); @@ -648,7 +646,7 @@ lace_worker_thread(void* arg) workers_running += 1; // Run the steal loop - lace_steal_loop(&lace_quits); + lace_steal_loop(lace_get_worker(), &lace_quits); // Time worker exit event lace_time_event(__lace_worker, 9); @@ -1014,9 +1012,8 @@ void lace_stop() * 5) Restore the old frame */ void -lace_exec_in_new_frame(Task *root) +lace_exec_in_new_frame(LaceWorker* __lace_worker, Task *root) { - WorkerP *__lace_worker = lace_get_worker(); Task *__lace_dq_head = __lace_worker->head; TailSplitNA old; @@ -1046,7 +1043,7 @@ lace_exec_in_new_frame(Task *root) lace_barrier(); // execute task - root->f(root); + root->f(__lace_worker, root); // wait until all workers are back (else they may steal from previous frame) lace_barrier(); @@ -1066,7 +1063,7 @@ lace_exec_in_new_frame(Task *root) * Each Lace worker executes lace_yield to execute the task in a new frame. */ void -lace_yield(void) +lace_yield(LaceWorker *worker) { // make a local copy of the task Task _t; @@ -1075,7 +1072,7 @@ lace_yield(void) // wait until all workers have made a local copy lace_barrier(); - lace_exec_in_new_frame(&_t); + lace_exec_in_new_frame(worker, &_t); } /** @@ -1085,22 +1082,22 @@ lace_yield(void) VOID_TASK_2(lace_together_root, Task*, t, atomic_int*, finished); void -lace_together_root(Task* t, atomic_int* finished) +lace_together_root(LaceWorker* lace_worker, Task* t, atomic_int* finished) { // run the root task - t->f(t); + t->f(lace_worker, t); // signal out completion *finished -= 1; // while threads aren't done, steal randomly - while (*finished != 0) lace_steal_random(); + while (*finished != 0) lace_steal_random(lace_worker); } VOID_TASK_1(lace_wrap_together, Task*, task); void -lace_wrap_together(Task* task) +lace_wrap_together(LaceWorker* worker, Task* task) { /* synchronization integer (decrease by 1 when done...) */ atomic_int done = n_workers; @@ -1117,7 +1114,7 @@ lace_wrap_together(Task* task) while (1) { Task *expected = 0; if (atomic_compare_exchange_weak(&lace_newframe.t, &expected, &_t2)) break; - lace_yield(); + lace_yield(worker); } // wait until other workers have made a local copy @@ -1126,21 +1123,21 @@ lace_wrap_together(Task* task) // reset the newframe struct atomic_store_explicit(&lace_newframe.t, NULL, memory_order_relaxed); - lace_exec_in_new_frame(&_t2); + lace_exec_in_new_frame(worker, &_t2); } VOID_TASK_2(lace_newframe_root, Task*, t, atomic_int*, done); void -lace_newframe_root(Task* t, atomic_int *done) +lace_newframe_root(LaceWorker *lace_worker, Task* t, atomic_int *done) { - t->f(t); + t->f(lace_worker, t); *done = 1; } VOID_TASK_1(lace_wrap_newframe, Task*, task); void -lace_wrap_newframe(Task *task) +lace_wrap_newframe(LaceWorker* worker, Task* task) { /* synchronization integer (set to 1 when done...) */ atomic_int done = 0; @@ -1156,7 +1153,7 @@ lace_wrap_newframe(Task *task) while (1) { Task *expected = 0; if (atomic_compare_exchange_weak(&lace_newframe.t, &expected, &_s)) break; - lace_yield(); + lace_yield(worker); } // wait until other workers have made a local copy @@ -1173,15 +1170,15 @@ lace_wrap_newframe(Task *task) t2->d.args.arg_1 = task; t2->d.args.arg_2 = &done; - lace_exec_in_new_frame(&_t2); + lace_exec_in_new_frame(worker, &_t2); } void lace_run_together(Task *t) { - WorkerP* self = lace_get_worker(); + LaceWorker* self = lace_get_worker(); if (self != 0) { - lace_wrap_together(t); + lace_wrap_together(self, t); } else { lace_wrap_together_RUN(t); } @@ -1190,9 +1187,9 @@ lace_run_together(Task *t) void lace_run_newframe(Task *t) { - WorkerP* self = lace_get_worker(); + LaceWorker* self = lace_get_worker(); if (self != 0) { - lace_wrap_newframe(t); + lace_wrap_newframe(self, t); } else { lace_wrap_newframe_RUN(t); } @@ -1209,7 +1206,7 @@ lace_abort_stack_overflow(void) } Worker* -lace_steal(WorkerP *self, Worker *victim) +lace_steal(LaceWorker *self, Worker *victim) { if (victim != NULL && !victim->allstolen) { TailSplitNA ts; @@ -1223,7 +1220,7 @@ lace_steal(WorkerP *self, Worker *victim) Task *t = &victim->dq[ts.ts.tail]; atomic_store_explicit(&t->thief, self->_public, memory_order_relaxed); lace_time_event(self, 1); - t->f(t); + t->f(self, t); lace_time_event(self, 2); atomic_store_explicit(&t->thief, THIEF_COMPLETED, memory_order_release); lace_time_event(self, 8); @@ -1245,7 +1242,7 @@ lace_steal(WorkerP *self, Worker *victim) } int -lace_shrink_shared(WorkerP *w) +lace_shrink_shared(LaceWorker *w) { Worker *wt = w->_public; TailSplitNA ts; /* Use non-atomic version to emit better code */ @@ -1275,29 +1272,32 @@ lace_shrink_shared(WorkerP *w) } void -lace_leapfrog(WorkerP *__lace_worker) +lace_leapfrog(LaceWorker *lace_worker) { - lace_time_event(__lace_worker, 3); - Task *t = __lace_worker->head; + lace_time_event(lace_worker, 3); + Task *t = lace_worker->head; Worker *thief = t->thief; if (thief != THIEF_COMPLETED) { while ((size_t)thief <= 1) thief = t->thief; /* PRE-LEAP: increase head again */ - __lace_worker->head += 1; + lace_worker->head += 1; /* Now leapfrog */ int attempts = 32; while (thief != THIEF_COMPLETED) { - PR_COUNTSTEALS(__lace_worker, CTR_leap_tries); - Worker *res = lace_steal(__lace_worker, thief); + PR_COUNTSTEALS(lace_worker, CTR_leap_tries); + Worker *res = lace_steal(lace_worker, thief); if (res == LACE_NOWORK) { - lace_check_yield(); - if ((LACE_LEAP_RANDOM) && (--attempts == 0)) { lace_steal_random(); attempts = 32; } + lace_check_yield(lace_worker); + if ((LACE_LEAP_RANDOM) && (--attempts == 0)) { + lace_steal_random(lace_worker); + attempts = 32; + } } else if (res == LACE_STOLEN) { - PR_COUNTSTEALS(__lace_worker, CTR_leaps); + PR_COUNTSTEALS(lace_worker, CTR_leaps); } else if (res == LACE_BUSY) { - PR_COUNTSTEALS(__lace_worker, CTR_leap_busy); + PR_COUNTSTEALS(lace_worker, CTR_leap_busy); } atomic_thread_fence(memory_order_acquire); thief = t->thief; @@ -1305,24 +1305,24 @@ lace_leapfrog(WorkerP *__lace_worker) /* POST-LEAP: really pop the finished task */ atomic_thread_fence(memory_order_acquire); - if (__lace_worker->allstolen == 0) { + if (lace_worker->allstolen == 0) { /* Assume: tail = split = head (pre-pop) */ /* Now we do a real pop ergo either decrease tail,split,head or declare allstolen */ - Worker *wt = __lace_worker->_public; + Worker *wt = lace_worker->_public; wt->allstolen = 1; - __lace_worker->allstolen = 1; + lace_worker->allstolen = 1; } - __lace_worker->head -= 1; + lace_worker->head -= 1; } /*compiler_barrier();*/ atomic_thread_fence(memory_order_acquire); atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); - lace_time_event(__lace_worker, 4); + lace_time_event(lace_worker, 4); } int -lace_sync(WorkerP *w, Task *head) +lace_sync(LaceWorker *w, Task *head) { if ((w->allstolen) || (w->split > head && lace_shrink_shared(w))) { lace_leapfrog(w); @@ -1344,22 +1344,21 @@ lace_sync(WorkerP *w, Task *head) } void -lace_drop_slow(WorkerP *w, Task *head) +lace_drop_slow(LaceWorker *w, Task *head) { if ((w->allstolen) || (w->split > head && lace_shrink_shared(w))) lace_leapfrog(w); } void -lace_drop(void) +lace_drop(LaceWorker *_lace_worker) { - WorkerP *w = lace_get_worker(); - Task* lace_head = w->head - 1; - w->head = lace_head; - if (__builtin_expect(0 == w->_public->movesplit, 1)) { - if (__builtin_expect(w->split <= lace_head, 1)) { + Task* lace_head = _lace_worker->head - 1; + _lace_worker->head = lace_head; + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { + if (__builtin_expect(_lace_worker->split <= lace_head, 1)) { return; } } - lace_drop_slow(w, lace_head); + lace_drop_slow(_lace_worker, lace_head); } diff --git a/src/lace.h b/src/lace.h index df440ec..aec10ac 100644 --- a/src/lace.h +++ b/src/lace.h @@ -44,10 +44,10 @@ extern "C" { /** * Type definitions used in the functions below. - * - WorkerP contains the (private) Worker data + * - LaceWorker contains the (private) Worker data * - Task contains a single Task */ -typedef struct _WorkerP WorkerP; +typedef struct _LaceWorker LaceWorker; typedef struct _Task Task; /* Typical cacheline size of system architectures */ @@ -69,7 +69,7 @@ typedef struct _Task Task; #endif #define TASK_COMMON_FIELDS(type) \ - void (*f)(struct type *); \ + void (*f)(LaceWorker *, struct type *); \ _Atomic(struct _Worker*) thief; struct __lace_common_fields_only { TASK_COMMON_FIELDS(_Task) }; @@ -109,7 +109,7 @@ typedef struct _Worker { uint8_t movesplit; } Worker; -typedef struct _WorkerP { +typedef struct _LaceWorker { Task *head; // my head Task *split; // same as dq+ts.ts.split Task *end; // dq+dq_size @@ -127,10 +127,10 @@ typedef struct _WorkerP { #endif int16_t pu; // my pu (for HWLOC) -} WorkerP; +} LaceWorker; #ifdef __linux__ -extern __thread WorkerP *lace_thread_worker; +extern __thread LaceWorker *lace_thread_worker; #else extern pthread_key_t lace_thread_worker_key; #endif @@ -186,7 +186,7 @@ void lace_stop(void); * Steal a random task. * Only use this from inside a Lace task. */ -void lace_steal_random(void); +void lace_steal_random(LaceWorker*); /** * Enter the Lace barrier. (all active workers must enter it before we can continue) @@ -204,13 +204,13 @@ unsigned int lace_worker_count(void); * Only run this from inside a Lace task. * (Used by LACE_VARS) */ -static inline WorkerP* +static inline LaceWorker* lace_get_worker(void) { #ifdef __linux__ return lace_thread_worker; #else - return (WorkerP*)pthread_getspecific(lace_thread_worker_key); + return (LaceWorker*)pthread_getspecific(lace_thread_worker_key); #endif } @@ -222,7 +222,7 @@ static inline int lace_is_worker(void) { return lace_get_worker() != NULL ? 1 : /** * Retrieve the current head of the deque of the worker. */ - static inline Task *lace_get_head(void) { return lace_get_worker()->head; } +static inline Task *lace_get_head(void) { return lace_get_worker()->head; } /** * Helper function to call from outside Lace threads. @@ -248,7 +248,7 @@ void lace_run_together(Task *task); /** * Instead of SYNCing on the next task, drop the task (unless stolen already) */ -void lace_drop(void); +void lace_drop(LaceWorker *lace_worker); /** * Get the current worker id. @@ -278,7 +278,7 @@ static inline int lace_is_completed_task(Task* t) { return ((size_t)(Worker*)t-> /** * Check if current tasks must be interrupted, and if so, interrupt. */ -static inline void lace_check_yield(void); +static inline void lace_check_yield(LaceWorker*); /** * Make all tasks of the current worker shared. @@ -379,7 +379,7 @@ typedef enum { #define LACE_NOWORK ((Worker*)2) #if LACE_PIE_TIMES -static void lace_time_event( WorkerP *w, int event ) +static void lace_time_event( LaceWorker *w, int event ) { uint64_t now = gethrtime(), prev = w->time; @@ -489,12 +489,12 @@ extern lace_newframe_t lace_newframe; /** * Interrupt the current worker and run a task in a new frame */ -void lace_yield(void); +void lace_yield(LaceWorker*); /** * Check if current tasks must be interrupted, and if so, interrupt. */ -static inline void lace_check_yield(void) { if (__builtin_expect(atomic_load_explicit(&lace_newframe.t, memory_order_relaxed) != NULL, 0)) lace_yield(); } +static inline void lace_check_yield(LaceWorker *w) { if (__builtin_expect(atomic_load_explicit(&lace_newframe.t, memory_order_relaxed) != NULL, 0)) lace_yield(w); } /** * Make all tasks of the current worker shared. @@ -502,7 +502,7 @@ static inline void lace_check_yield(void) { if (__builtin_expect(atomic_load_exp static inline void __attribute__((unused)) lace_make_all_shared(void) { - WorkerP* w = lace_get_worker(); + LaceWorker* w = lace_get_worker(); if (w->split != w->head) { w->split = w->head; w->_public->ts.ts.split = w->head - w->dq; @@ -512,7 +512,7 @@ lace_make_all_shared(void) /** * Helper function for _SYNC implementations */ -int lace_sync(WorkerP *w, Task *head); +int lace_sync(LaceWorker *w, Task *head); // Task macros for tasks of arity 0 @@ -527,21 +527,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(); \ +RTYPE NAME(LaceWorker*); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(); \ + t->d.res = NAME(lace_worker); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN() \ +void NAME##_SPAWN(LaceWorker* _lace_worker) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -553,26 +552,26 @@ void NAME##_SPAWN() \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -601,8 +600,9 @@ void NAME##_TOGETHER() static inline __attribute__((unused)) \ RTYPE NAME##_RUN() \ { \ - if (lace_is_worker()) { \ - return NAME(); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -614,27 +614,26 @@ RTYPE NAME##_RUN() } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(); \ + return NAME(_lace_worker); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(); \ + return NAME(_lace_worker); \ } \ } \ \ @@ -650,21 +649,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(); \ +void NAME(LaceWorker*); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(); \ + NAME(lace_worker); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN() \ +void NAME##_SPAWN(LaceWorker* _lace_worker) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -676,26 +674,26 @@ void NAME##_SPAWN() \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -724,8 +722,9 @@ void NAME##_TOGETHER() static inline __attribute__((unused)) \ void NAME##_RUN() \ { \ - if (lace_is_worker()) { \ - return NAME(); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -737,27 +736,26 @@ void NAME##_RUN() } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(); \ + return NAME(_lace_worker); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(); \ + return NAME(_lace_worker); \ } \ } \ \ @@ -776,21 +774,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1); \ +RTYPE NAME(LaceWorker*, ATYPE_1); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1); \ + t->d.res = NAME(lace_worker, t->d.args.arg_1); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -802,26 +799,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1) t->d.args.arg_1 = arg_1; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -850,8 +847,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1) static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -863,27 +861,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1) } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1); \ + return NAME(_lace_worker, t->d.args.arg_1); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1); \ + return NAME(_lace_worker, t->d.args.arg_1); \ } \ } \ \ @@ -899,21 +896,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1); \ +void NAME(LaceWorker*, ATYPE_1); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1); \ + NAME(lace_worker, t->d.args.arg_1); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -925,26 +921,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1) t->d.args.arg_1 = arg_1; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -973,8 +969,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1) static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -986,27 +983,26 @@ void NAME##_RUN(ATYPE_1 arg_1) } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1); \ + return NAME(_lace_worker, t->d.args.arg_1); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1); \ + return NAME(_lace_worker, t->d.args.arg_1); \ } \ } \ \ @@ -1025,21 +1021,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2); \ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1051,26 +1046,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1099,8 +1094,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2) static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1112,27 +1108,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ } \ \ @@ -1148,21 +1143,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2); \ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1174,26 +1168,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1222,8 +1216,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2) static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1235,27 +1230,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ } \ \ @@ -1274,21 +1268,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1300,26 +1293,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1348,8 +1341,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1361,27 +1355,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ } \ } \ \ @@ -1397,21 +1390,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1423,26 +1415,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1471,8 +1463,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1484,27 +1477,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ } \ } \ \ @@ -1523,21 +1515,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1549,26 +1540,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1597,8 +1588,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1610,27 +1602,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ } \ \ @@ -1646,21 +1637,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4); \ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1672,26 +1662,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1720,8 +1710,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1733,27 +1724,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ } \ \ @@ -1772,21 +1762,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1798,26 +1787,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1846,8 +1835,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1859,27 +1849,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ } \ \ @@ -1895,21 +1884,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1921,26 +1909,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1969,8 +1957,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1982,27 +1971,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ } \ \ @@ -2021,21 +2009,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2047,26 +2034,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2095,8 +2082,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2108,27 +2096,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ } \ \ @@ -2144,21 +2131,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2170,26 +2156,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2218,8 +2204,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2231,27 +2218,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ } \ \ diff --git a/src/lace.sh b/src/lace.sh index 78fef55..06b64ca 100755 --- a/src/lace.sh +++ b/src/lace.sh @@ -51,10 +51,10 @@ extern "C" { /** * Type definitions used in the functions below. - * - WorkerP contains the (private) Worker data + * - LaceWorker contains the (private) Worker data * - Task contains a single Task */ -typedef struct _WorkerP WorkerP; +typedef struct _LaceWorker LaceWorker; typedef struct _Task Task; /* Typical cacheline size of system architectures */ @@ -76,7 +76,7 @@ typedef struct _Task Task; #endif #define TASK_COMMON_FIELDS(type) \ - void (*f)(struct type *); \ + void (*f)(LaceWorker *, struct type *); \ _Atomic(struct _Worker*) thief; struct __lace_common_fields_only { TASK_COMMON_FIELDS(_Task) }; @@ -116,7 +116,7 @@ typedef struct _Worker { uint8_t movesplit; } Worker; -typedef struct _WorkerP { +typedef struct _LaceWorker { Task *head; // my head Task *split; // same as dq+ts.ts.split Task *end; // dq+dq_size @@ -134,10 +134,10 @@ typedef struct _WorkerP { #endif int16_t pu; // my pu (for HWLOC) -} WorkerP; +} LaceWorker; #ifdef __linux__ -extern __thread WorkerP *lace_thread_worker; +extern __thread LaceWorker *lace_thread_worker; #else extern pthread_key_t lace_thread_worker_key; #endif @@ -193,7 +193,7 @@ void lace_stop(void); * Steal a random task. * Only use this from inside a Lace task. */ -void lace_steal_random(void); +void lace_steal_random(LaceWorker*); /** * Enter the Lace barrier. (all active workers must enter it before we can continue) @@ -211,13 +211,13 @@ unsigned int lace_worker_count(void); * Only run this from inside a Lace task. * (Used by LACE_VARS) */ -static inline WorkerP* +static inline LaceWorker* lace_get_worker(void) { #ifdef __linux__ return lace_thread_worker; #else - return (WorkerP*)pthread_getspecific(lace_thread_worker_key); + return (LaceWorker*)pthread_getspecific(lace_thread_worker_key); #endif } @@ -229,7 +229,7 @@ static inline int lace_is_worker(void) { return lace_get_worker() != NULL ? 1 : /** * Retrieve the current head of the deque of the worker. */ - static inline Task *lace_get_head(void) { return lace_get_worker()->head; } +static inline Task *lace_get_head(void) { return lace_get_worker()->head; } /** * Helper function to call from outside Lace threads. @@ -255,7 +255,7 @@ void lace_run_together(Task *task); /** * Instead of SYNCing on the next task, drop the task (unless stolen already) */ -void lace_drop(void); +void lace_drop(LaceWorker *lace_worker); /** * Get the current worker id. @@ -285,7 +285,7 @@ static inline int lace_is_completed_task(Task* t) { return ((size_t)(Worker*)t-> /** * Check if current tasks must be interrupted, and if so, interrupt. */ -static inline void lace_check_yield(void); +static inline void lace_check_yield(LaceWorker*); /** * Make all tasks of the current worker shared. @@ -386,7 +386,7 @@ typedef enum { #define LACE_NOWORK ((Worker*)2) #if LACE_PIE_TIMES -static void lace_time_event( WorkerP *w, int event ) +static void lace_time_event( LaceWorker *w, int event ) { uint64_t now = gethrtime(), prev = w->time; @@ -496,12 +496,12 @@ extern lace_newframe_t lace_newframe; /** * Interrupt the current worker and run a task in a new frame */ -void lace_yield(void); +void lace_yield(LaceWorker*); /** * Check if current tasks must be interrupted, and if so, interrupt. */ -static inline void lace_check_yield(void) { if (__builtin_expect(atomic_load_explicit(&lace_newframe.t, memory_order_relaxed) != NULL, 0)) lace_yield(); } +static inline void lace_check_yield(LaceWorker *w) { if (__builtin_expect(atomic_load_explicit(&lace_newframe.t, memory_order_relaxed) != NULL, 0)) lace_yield(w); } /** * Make all tasks of the current worker shared. @@ -509,7 +509,7 @@ static inline void lace_check_yield(void) { if (__builtin_expect(atomic_load_exp static inline void __attribute__((unused)) lace_make_all_shared(void) { - WorkerP* w = lace_get_worker(); + LaceWorker* w = lace_get_worker(); if (w->split != w->head) { w->split = w->head; w->_public->ts.ts.split = w->head - w->dq; @@ -519,7 +519,7 @@ lace_make_all_shared(void) /** * Helper function for _SYNC implementations */ -int lace_sync(WorkerP *w, Task *head); +int lace_sync(LaceWorker *w, Task *head); ' # # Create macros for each arity @@ -533,15 +533,17 @@ if ((r)); then TASK_INIT="$TASK_INIT t->d.args.arg_$r = arg_$r;" if (( r == 1)); then MACRO_ARGS="ATYPE_$r, ARG_$r" - DECL_ARGS="ATYPE_1" - TASK_GET_FROM_t="t->d.args.arg_1" - FUN_ARGS="ATYPE_1 arg_1" - CALL_ARGS="arg_1" + DECL_ARGS=", ATYPE_1" + TASK_GET_FROM_t=", t->d.args.arg_1" + FUN_ARGS=", ATYPE_1 arg_1" + RUN_ARGS="ATYPE_1 arg_1" + CALL_ARGS=", arg_1" else MACRO_ARGS="$MACRO_ARGS, ATYPE_$r, ARG_$r" DECL_ARGS="$DECL_ARGS, ATYPE_$r" TASK_GET_FROM_t="$TASK_GET_FROM_t, t->d.args.arg_$r" FUN_ARGS="$FUN_ARGS, ATYPE_$r arg_$r" + RUN_ARGS="$RUN_ARGS, ATYPE_$r arg_$r" CALL_ARGS="$CALL_ARGS, arg_$r" fi ARGS_STRUCT="struct { $TASK_FIELDS } args;" @@ -588,21 +590,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1]; -$RTYPE NAME($DECL_ARGS); +$RTYPE NAME(LaceWorker*$DECL_ARGS); -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused))) { - $SAVE_RVAL NAME($TASK_GET_FROM_t); + $SAVE_RVAL NAME(lace_worker$TASK_GET_FROM_t); } static inline __attribute__((unused)) -void NAME##_SPAWN($FUN_ARGS) +void NAME##_SPAWN(LaceWorker* _lace_worker$FUN_ARGS) { PR_COUNTTASK(w); - WorkerP *w = lace_get_worker(); - Task *lace_head = w->head; - if (lace_head == w->end) lace_abort_stack_overflow(); + Task *lace_head = _lace_worker->head; + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); TD_##NAME *t; TailSplitNA ts; @@ -614,30 +615,30 @@ void NAME##_SPAWN($FUN_ARGS) $TASK_INIT atomic_thread_fence(memory_order_acquire); - Worker *wt = w->_public; - if (__builtin_expect(w->allstolen, 0)) { + Worker *wt = _lace_worker->_public; + if (__builtin_expect(_lace_worker->allstolen, 0)) { if (wt->movesplit) wt->movesplit = 0; - head = lace_head - w->dq; + head = lace_head - _lace_worker->dq; ts = (TailSplitNA){{head,head+1}}; wt->ts.v = ts.v; wt->allstolen = 0; - w->split = lace_head+1; - w->allstolen = 0; + _lace_worker->split = lace_head+1; + _lace_worker->allstolen = 0; } else if (__builtin_expect(wt->movesplit, 0)) { - head = lace_head - w->dq; - split = w->split - w->dq; + head = lace_head - _lace_worker->dq; + split = _lace_worker->split - _lace_worker->dq; newsplit = (split + head + 2)/2; wt->ts.ts.split = newsplit; - w->split = w->dq + newsplit; + _lace_worker->split = _lace_worker->dq + newsplit; wt->movesplit = 0; - PR_COUNTSPLITS(w, CTR_split_grow); + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); } - w->head = lace_head+1; + _lace_worker->head = lace_head+1; } static inline __attribute__((unused)) -$RTYPE NAME##_NEWFRAME($FUN_ARGS) +$RTYPE NAME##_NEWFRAME($RUN_ARGS) { Task _t; TD_##NAME *t = (TD_##NAME *)&_t; @@ -649,7 +650,7 @@ $RTYPE NAME##_NEWFRAME($FUN_ARGS) } static inline __attribute__((unused)) -void NAME##_TOGETHER($FUN_ARGS) +void NAME##_TOGETHER($RUN_ARGS) { Task _t; TD_##NAME *t = (TD_##NAME *)&_t; @@ -660,10 +661,11 @@ void NAME##_TOGETHER($FUN_ARGS) } static inline __attribute__((unused)) -$RTYPE NAME##_RUN($FUN_ARGS) +$RTYPE NAME##_RUN($RUN_ARGS) { - if (lace_is_worker()) { - return NAME($CALL_ARGS); + LaceWorker *worker = lace_get_worker(); + if (worker != NULL) { + return NAME(worker$CALL_ARGS); } Task _t; TD_##NAME *t = (TD_##NAME *)&_t; @@ -675,27 +677,26 @@ $RTYPE NAME##_RUN($FUN_ARGS) } static inline __attribute__((unused)) -$RTYPE NAME##_SYNC() +$RTYPE NAME##_SYNC(LaceWorker* _lace_worker) { - WorkerP* w = lace_get_worker(); - Task* head = w->head - 1; - w->head = head; + Task* head = _lace_worker->head - 1; + _lace_worker->head = head; /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ TD_##NAME *t = (TD_##NAME *)head; - if (__builtin_expect(0 == w->_public->movesplit, 1)) { - if (__builtin_expect(w->split <= head, 1)) { + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { + if (__builtin_expect(_lace_worker->split <= head, 1)) { atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); - return NAME($TASK_GET_FROM_t); + return NAME(_lace_worker$TASK_GET_FROM_t); } } - if (lace_sync(w, head)) { + if (lace_sync(_lace_worker, head)) { return $RETURN_RES; } else { atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); - return NAME($TASK_GET_FROM_t); + return NAME(_lace_worker$TASK_GET_FROM_t); } } diff --git a/src/lace14.c b/src/lace14.c index 080172f..08ec9b1 100644 --- a/src/lace14.c +++ b/src/lace14.c @@ -93,8 +93,8 @@ static unsigned int n_workers = 0; typedef struct { Worker worker_public; char pad1[PAD(sizeof(Worker), LINE_SIZE)]; - WorkerP worker_private; - char pad2[PAD(sizeof(WorkerP), LINE_SIZE)]; + LaceWorker worker_private; + char pad2[PAD(sizeof(LaceWorker), LINE_SIZE)]; Task deque[]; } worker_data; @@ -111,7 +111,7 @@ static size_t workers_memory_size = 0; /** * (Secret) holds pointer to private Worker data, just for stats collection at end */ -static WorkerP **workers_p; +static LaceWorker **workers_p; /** * Flag to signal all workers to quit. @@ -123,7 +123,7 @@ static atomic_uint workers_running = 0; * Thread-specific mechanism to access current worker data */ #ifdef __linux__ -__thread WorkerP *lace_thread_worker; +__thread LaceWorker *lace_thread_worker; #else pthread_key_t lace_thread_worker_key; #endif @@ -132,10 +132,10 @@ pthread_key_t lace_thread_worker_key; #define LACE_LEAP_RANDOM 1 #endif -Worker* lace_steal(WorkerP *self, Worker *victim); -int lace_shrink_shared(WorkerP *w); -void lace_leapfrog(WorkerP *__lace_worker); -void lace_drop_slow(WorkerP *w, Task *head); +Worker* lace_steal(LaceWorker *self, Worker *victim); +int lace_shrink_shared(LaceWorker *w); +void lace_leapfrog(LaceWorker *__lace_worker); +void lace_drop_slow(LaceWorker *w, Task *head); /** * Global newframe variable used for the implementation of NEWFRAME and TOGETHER @@ -242,7 +242,7 @@ lace_check_memory(void) { #if LACE_USE_HWLOC // get our current worker - WorkerP *w = lace_get_worker(); + LaceWorker *w = lace_get_worker(); void* mem = workers_memory[w->worker]; // get pinned PUs @@ -374,7 +374,7 @@ lace_init_worker(unsigned int worker) // Set pointers Worker *wt = workers[worker] = &workers_memory[worker]->worker_public; - WorkerP *w = workers_p[worker] = &workers_memory[worker]->worker_private; + LaceWorker *w = workers_p[worker] = &workers_memory[worker]->worker_private; w->dq = workers_memory[worker]->deque; w->head = w->dq; #ifdef __linux__ @@ -497,9 +497,9 @@ void lace_run_task(Task *task) { // check if we are really not in a Lace thread - WorkerP* self = lace_get_worker(); + LaceWorker* self = lace_get_worker(); if (self != 0) { - task->f(task); + task->f(self, task); } else { // if needed, wake up the workers lace_resume(); @@ -521,7 +521,7 @@ lace_run_task(Task *task) } static inline void -lace_steal_external(WorkerP *self) +lace_steal_external(LaceWorker *self) { ExtTask *stolen_task = atomic_exchange(&external_task, NULL); if (stolen_task != 0) { @@ -530,7 +530,7 @@ lace_steal_external(WorkerP *self) atomic_store_explicit(&stolen_task->task->thief, self->_public, memory_order_relaxed); lace_time_event(self, 1); // atomic_thread_fence(memory_order_relaxed); - stolen_task->task->f(stolen_task->task); + stolen_task->task->f(self, stolen_task->task); // atomic_thread_fence(memory_order_relaxed); lace_time_event(self, 2); // atomic_thread_fence(memory_order_relaxed); @@ -544,11 +544,9 @@ lace_steal_external(WorkerP *self) /** * (Try to) steal and execute a task from a random worker. */ -//VOID_TASK_0(lace_steal_random); -void lace_steal_random(void) +void lace_steal_random(LaceWorker *__lace_worker) { - WorkerP *__lace_worker = lace_get_worker(); - lace_check_yield(); + lace_check_yield(__lace_worker); if (__builtin_expect(atomic_load_explicit(&external_task, memory_order_acquire) != 0, 0)) { lace_steal_external(__lace_worker); @@ -571,17 +569,17 @@ void lace_steal_random(void) */ VOID_TASK_1(lace_steal_loop, atomic_int*, quit); -void lace_steal_loop(atomic_int* quit) +void lace_steal_loop(LaceWorker* lace_worker, atomic_int* quit) { // Determine who I am - const int worker_id = lace_get_worker()->worker; + const int worker_id = lace_worker->worker; // Prepare self, victim Worker ** const self = &workers[worker_id]; Worker ** victim = self; #if LACE_PIE_TIMES - __lace_worker->time = gethrtime(); + lace_worker->time = gethrtime(); #endif uint32_t seed = worker_id; @@ -602,16 +600,16 @@ void lace_steal_loop(atomic_int* quit) victim = workers + (rng(&seed, n-1) + worker_id + 1) % n; } - PR_COUNTSTEALS(__lace_worker, CTR_steal_tries); - Worker *res = lace_steal(lace_get_worker(), *victim); + PR_COUNTSTEALS(lace_worker, CTR_steal_tries); + Worker *res = lace_steal(lace_worker, *victim); if (res == LACE_STOLEN) { - PR_COUNTSTEALS(__lace_worker, CTR_steals); + PR_COUNTSTEALS(lace_worker, CTR_steals); } else if (res == LACE_BUSY) { - PR_COUNTSTEALS(__lace_worker, CTR_steal_busy); + PR_COUNTSTEALS(lace_worker, CTR_steal_busy); } } - lace_check_yield(); + lace_check_yield(lace_worker); if (__builtin_expect(atomic_load_explicit(&external_task, memory_order_acquire) != 0, 0)) { lace_steal_external(lace_get_worker()); @@ -648,7 +646,7 @@ lace_worker_thread(void* arg) workers_running += 1; // Run the steal loop - lace_steal_loop(&lace_quits); + lace_steal_loop(lace_get_worker(), &lace_quits); // Time worker exit event lace_time_event(__lace_worker, 9); @@ -1014,9 +1012,8 @@ void lace_stop() * 5) Restore the old frame */ void -lace_exec_in_new_frame(Task *root) +lace_exec_in_new_frame(LaceWorker* __lace_worker, Task *root) { - WorkerP *__lace_worker = lace_get_worker(); Task *__lace_dq_head = __lace_worker->head; TailSplitNA old; @@ -1046,7 +1043,7 @@ lace_exec_in_new_frame(Task *root) lace_barrier(); // execute task - root->f(root); + root->f(__lace_worker, root); // wait until all workers are back (else they may steal from previous frame) lace_barrier(); @@ -1066,7 +1063,7 @@ lace_exec_in_new_frame(Task *root) * Each Lace worker executes lace_yield to execute the task in a new frame. */ void -lace_yield(void) +lace_yield(LaceWorker *worker) { // make a local copy of the task Task _t; @@ -1075,7 +1072,7 @@ lace_yield(void) // wait until all workers have made a local copy lace_barrier(); - lace_exec_in_new_frame(&_t); + lace_exec_in_new_frame(worker, &_t); } /** @@ -1085,22 +1082,22 @@ lace_yield(void) VOID_TASK_2(lace_together_root, Task*, t, atomic_int*, finished); void -lace_together_root(Task* t, atomic_int* finished) +lace_together_root(LaceWorker* lace_worker, Task* t, atomic_int* finished) { // run the root task - t->f(t); + t->f(lace_worker, t); // signal out completion *finished -= 1; // while threads aren't done, steal randomly - while (*finished != 0) lace_steal_random(); + while (*finished != 0) lace_steal_random(lace_worker); } VOID_TASK_1(lace_wrap_together, Task*, task); void -lace_wrap_together(Task* task) +lace_wrap_together(LaceWorker* worker, Task* task) { /* synchronization integer (decrease by 1 when done...) */ atomic_int done = n_workers; @@ -1117,7 +1114,7 @@ lace_wrap_together(Task* task) while (1) { Task *expected = 0; if (atomic_compare_exchange_weak(&lace_newframe.t, &expected, &_t2)) break; - lace_yield(); + lace_yield(worker); } // wait until other workers have made a local copy @@ -1126,21 +1123,21 @@ lace_wrap_together(Task* task) // reset the newframe struct atomic_store_explicit(&lace_newframe.t, NULL, memory_order_relaxed); - lace_exec_in_new_frame(&_t2); + lace_exec_in_new_frame(worker, &_t2); } VOID_TASK_2(lace_newframe_root, Task*, t, atomic_int*, done); void -lace_newframe_root(Task* t, atomic_int *done) +lace_newframe_root(LaceWorker *lace_worker, Task* t, atomic_int *done) { - t->f(t); + t->f(lace_worker, t); *done = 1; } VOID_TASK_1(lace_wrap_newframe, Task*, task); void -lace_wrap_newframe(Task *task) +lace_wrap_newframe(LaceWorker* worker, Task* task) { /* synchronization integer (set to 1 when done...) */ atomic_int done = 0; @@ -1156,7 +1153,7 @@ lace_wrap_newframe(Task *task) while (1) { Task *expected = 0; if (atomic_compare_exchange_weak(&lace_newframe.t, &expected, &_s)) break; - lace_yield(); + lace_yield(worker); } // wait until other workers have made a local copy @@ -1173,15 +1170,15 @@ lace_wrap_newframe(Task *task) t2->d.args.arg_1 = task; t2->d.args.arg_2 = &done; - lace_exec_in_new_frame(&_t2); + lace_exec_in_new_frame(worker, &_t2); } void lace_run_together(Task *t) { - WorkerP* self = lace_get_worker(); + LaceWorker* self = lace_get_worker(); if (self != 0) { - lace_wrap_together(t); + lace_wrap_together(self, t); } else { lace_wrap_together_RUN(t); } @@ -1190,9 +1187,9 @@ lace_run_together(Task *t) void lace_run_newframe(Task *t) { - WorkerP* self = lace_get_worker(); + LaceWorker* self = lace_get_worker(); if (self != 0) { - lace_wrap_newframe(t); + lace_wrap_newframe(self, t); } else { lace_wrap_newframe_RUN(t); } @@ -1209,7 +1206,7 @@ lace_abort_stack_overflow(void) } Worker* -lace_steal(WorkerP *self, Worker *victim) +lace_steal(LaceWorker *self, Worker *victim) { if (victim != NULL && !victim->allstolen) { TailSplitNA ts; @@ -1223,7 +1220,7 @@ lace_steal(WorkerP *self, Worker *victim) Task *t = &victim->dq[ts.ts.tail]; atomic_store_explicit(&t->thief, self->_public, memory_order_relaxed); lace_time_event(self, 1); - t->f(t); + t->f(self, t); lace_time_event(self, 2); atomic_store_explicit(&t->thief, THIEF_COMPLETED, memory_order_release); lace_time_event(self, 8); @@ -1245,7 +1242,7 @@ lace_steal(WorkerP *self, Worker *victim) } int -lace_shrink_shared(WorkerP *w) +lace_shrink_shared(LaceWorker *w) { Worker *wt = w->_public; TailSplitNA ts; /* Use non-atomic version to emit better code */ @@ -1275,29 +1272,32 @@ lace_shrink_shared(WorkerP *w) } void -lace_leapfrog(WorkerP *__lace_worker) +lace_leapfrog(LaceWorker *lace_worker) { - lace_time_event(__lace_worker, 3); - Task *t = __lace_worker->head; + lace_time_event(lace_worker, 3); + Task *t = lace_worker->head; Worker *thief = t->thief; if (thief != THIEF_COMPLETED) { while ((size_t)thief <= 1) thief = t->thief; /* PRE-LEAP: increase head again */ - __lace_worker->head += 1; + lace_worker->head += 1; /* Now leapfrog */ int attempts = 32; while (thief != THIEF_COMPLETED) { - PR_COUNTSTEALS(__lace_worker, CTR_leap_tries); - Worker *res = lace_steal(__lace_worker, thief); + PR_COUNTSTEALS(lace_worker, CTR_leap_tries); + Worker *res = lace_steal(lace_worker, thief); if (res == LACE_NOWORK) { - lace_check_yield(); - if ((LACE_LEAP_RANDOM) && (--attempts == 0)) { lace_steal_random(); attempts = 32; } + lace_check_yield(lace_worker); + if ((LACE_LEAP_RANDOM) && (--attempts == 0)) { + lace_steal_random(lace_worker); + attempts = 32; + } } else if (res == LACE_STOLEN) { - PR_COUNTSTEALS(__lace_worker, CTR_leaps); + PR_COUNTSTEALS(lace_worker, CTR_leaps); } else if (res == LACE_BUSY) { - PR_COUNTSTEALS(__lace_worker, CTR_leap_busy); + PR_COUNTSTEALS(lace_worker, CTR_leap_busy); } atomic_thread_fence(memory_order_acquire); thief = t->thief; @@ -1305,24 +1305,24 @@ lace_leapfrog(WorkerP *__lace_worker) /* POST-LEAP: really pop the finished task */ atomic_thread_fence(memory_order_acquire); - if (__lace_worker->allstolen == 0) { + if (lace_worker->allstolen == 0) { /* Assume: tail = split = head (pre-pop) */ /* Now we do a real pop ergo either decrease tail,split,head or declare allstolen */ - Worker *wt = __lace_worker->_public; + Worker *wt = lace_worker->_public; wt->allstolen = 1; - __lace_worker->allstolen = 1; + lace_worker->allstolen = 1; } - __lace_worker->head -= 1; + lace_worker->head -= 1; } /*compiler_barrier();*/ atomic_thread_fence(memory_order_acquire); atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); - lace_time_event(__lace_worker, 4); + lace_time_event(lace_worker, 4); } int -lace_sync(WorkerP *w, Task *head) +lace_sync(LaceWorker *w, Task *head) { if ((w->allstolen) || (w->split > head && lace_shrink_shared(w))) { lace_leapfrog(w); @@ -1344,22 +1344,21 @@ lace_sync(WorkerP *w, Task *head) } void -lace_drop_slow(WorkerP *w, Task *head) +lace_drop_slow(LaceWorker *w, Task *head) { if ((w->allstolen) || (w->split > head && lace_shrink_shared(w))) lace_leapfrog(w); } void -lace_drop(void) +lace_drop(LaceWorker *_lace_worker) { - WorkerP *w = lace_get_worker(); - Task* lace_head = w->head - 1; - w->head = lace_head; - if (__builtin_expect(0 == w->_public->movesplit, 1)) { - if (__builtin_expect(w->split <= lace_head, 1)) { + Task* lace_head = _lace_worker->head - 1; + _lace_worker->head = lace_head; + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { + if (__builtin_expect(_lace_worker->split <= lace_head, 1)) { return; } } - lace_drop_slow(w, lace_head); + lace_drop_slow(_lace_worker, lace_head); } diff --git a/src/lace14.h b/src/lace14.h index e3f3add..31e66fa 100644 --- a/src/lace14.h +++ b/src/lace14.h @@ -44,10 +44,10 @@ extern "C" { /** * Type definitions used in the functions below. - * - WorkerP contains the (private) Worker data + * - LaceWorker contains the (private) Worker data * - Task contains a single Task */ -typedef struct _WorkerP WorkerP; +typedef struct _LaceWorker LaceWorker; typedef struct _Task Task; /* Typical cacheline size of system architectures */ @@ -69,7 +69,7 @@ typedef struct _Task Task; #endif #define TASK_COMMON_FIELDS(type) \ - void (*f)(struct type *); \ + void (*f)(LaceWorker *, struct type *); \ _Atomic(struct _Worker*) thief; struct __lace_common_fields_only { TASK_COMMON_FIELDS(_Task) }; @@ -109,7 +109,7 @@ typedef struct _Worker { uint8_t movesplit; } Worker; -typedef struct _WorkerP { +typedef struct _LaceWorker { Task *head; // my head Task *split; // same as dq+ts.ts.split Task *end; // dq+dq_size @@ -127,10 +127,10 @@ typedef struct _WorkerP { #endif int16_t pu; // my pu (for HWLOC) -} WorkerP; +} LaceWorker; #ifdef __linux__ -extern __thread WorkerP *lace_thread_worker; +extern __thread LaceWorker *lace_thread_worker; #else extern pthread_key_t lace_thread_worker_key; #endif @@ -186,7 +186,7 @@ void lace_stop(void); * Steal a random task. * Only use this from inside a Lace task. */ -void lace_steal_random(void); +void lace_steal_random(LaceWorker*); /** * Enter the Lace barrier. (all active workers must enter it before we can continue) @@ -204,13 +204,13 @@ unsigned int lace_worker_count(void); * Only run this from inside a Lace task. * (Used by LACE_VARS) */ -static inline WorkerP* +static inline LaceWorker* lace_get_worker(void) { #ifdef __linux__ return lace_thread_worker; #else - return (WorkerP*)pthread_getspecific(lace_thread_worker_key); + return (LaceWorker*)pthread_getspecific(lace_thread_worker_key); #endif } @@ -222,7 +222,7 @@ static inline int lace_is_worker(void) { return lace_get_worker() != NULL ? 1 : /** * Retrieve the current head of the deque of the worker. */ - static inline Task *lace_get_head(void) { return lace_get_worker()->head; } +static inline Task *lace_get_head(void) { return lace_get_worker()->head; } /** * Helper function to call from outside Lace threads. @@ -248,7 +248,7 @@ void lace_run_together(Task *task); /** * Instead of SYNCing on the next task, drop the task (unless stolen already) */ -void lace_drop(void); +void lace_drop(LaceWorker *lace_worker); /** * Get the current worker id. @@ -278,7 +278,7 @@ static inline int lace_is_completed_task(Task* t) { return ((size_t)(Worker*)t-> /** * Check if current tasks must be interrupted, and if so, interrupt. */ -static inline void lace_check_yield(void); +static inline void lace_check_yield(LaceWorker*); /** * Make all tasks of the current worker shared. @@ -379,7 +379,7 @@ typedef enum { #define LACE_NOWORK ((Worker*)2) #if LACE_PIE_TIMES -static void lace_time_event( WorkerP *w, int event ) +static void lace_time_event( LaceWorker *w, int event ) { uint64_t now = gethrtime(), prev = w->time; @@ -489,12 +489,12 @@ extern lace_newframe_t lace_newframe; /** * Interrupt the current worker and run a task in a new frame */ -void lace_yield(void); +void lace_yield(LaceWorker*); /** * Check if current tasks must be interrupted, and if so, interrupt. */ -static inline void lace_check_yield(void) { if (__builtin_expect(atomic_load_explicit(&lace_newframe.t, memory_order_relaxed) != NULL, 0)) lace_yield(); } +static inline void lace_check_yield(LaceWorker *w) { if (__builtin_expect(atomic_load_explicit(&lace_newframe.t, memory_order_relaxed) != NULL, 0)) lace_yield(w); } /** * Make all tasks of the current worker shared. @@ -502,7 +502,7 @@ static inline void lace_check_yield(void) { if (__builtin_expect(atomic_load_exp static inline void __attribute__((unused)) lace_make_all_shared(void) { - WorkerP* w = lace_get_worker(); + LaceWorker* w = lace_get_worker(); if (w->split != w->head) { w->split = w->head; w->_public->ts.ts.split = w->head - w->dq; @@ -512,7 +512,7 @@ lace_make_all_shared(void) /** * Helper function for _SYNC implementations */ -int lace_sync(WorkerP *w, Task *head); +int lace_sync(LaceWorker *w, Task *head); // Task macros for tasks of arity 0 @@ -527,21 +527,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(); \ +RTYPE NAME(LaceWorker*); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(); \ + t->d.res = NAME(lace_worker); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN() \ +void NAME##_SPAWN(LaceWorker* _lace_worker) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -553,26 +552,26 @@ void NAME##_SPAWN() \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -601,8 +600,9 @@ void NAME##_TOGETHER() static inline __attribute__((unused)) \ RTYPE NAME##_RUN() \ { \ - if (lace_is_worker()) { \ - return NAME(); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -614,27 +614,26 @@ RTYPE NAME##_RUN() } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(); \ + return NAME(_lace_worker); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(); \ + return NAME(_lace_worker); \ } \ } \ \ @@ -650,21 +649,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(); \ +void NAME(LaceWorker*); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(); \ + NAME(lace_worker); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN() \ +void NAME##_SPAWN(LaceWorker* _lace_worker) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -676,26 +674,26 @@ void NAME##_SPAWN() \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -724,8 +722,9 @@ void NAME##_TOGETHER() static inline __attribute__((unused)) \ void NAME##_RUN() \ { \ - if (lace_is_worker()) { \ - return NAME(); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -737,27 +736,26 @@ void NAME##_RUN() } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(); \ + return NAME(_lace_worker); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(); \ + return NAME(_lace_worker); \ } \ } \ \ @@ -776,21 +774,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1); \ +RTYPE NAME(LaceWorker*, ATYPE_1); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1); \ + t->d.res = NAME(lace_worker, t->d.args.arg_1); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -802,26 +799,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1) t->d.args.arg_1 = arg_1; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -850,8 +847,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1) static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -863,27 +861,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1) } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1); \ + return NAME(_lace_worker, t->d.args.arg_1); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1); \ + return NAME(_lace_worker, t->d.args.arg_1); \ } \ } \ \ @@ -899,21 +896,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1); \ +void NAME(LaceWorker*, ATYPE_1); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1); \ + NAME(lace_worker, t->d.args.arg_1); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -925,26 +921,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1) t->d.args.arg_1 = arg_1; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -973,8 +969,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1) static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -986,27 +983,26 @@ void NAME##_RUN(ATYPE_1 arg_1) } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1); \ + return NAME(_lace_worker, t->d.args.arg_1); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1); \ + return NAME(_lace_worker, t->d.args.arg_1); \ } \ } \ \ @@ -1025,21 +1021,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2); \ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1051,26 +1046,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1099,8 +1094,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2) static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1112,27 +1108,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ } \ \ @@ -1148,21 +1143,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2); \ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2) \ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1174,26 +1168,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1222,8 +1216,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2) static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1235,27 +1230,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2); \ } \ } \ \ @@ -1274,21 +1268,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1300,26 +1293,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1348,8 +1341,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1361,27 +1355,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ } \ } \ \ @@ -1397,21 +1390,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1423,26 +1415,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1471,8 +1463,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1484,27 +1477,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \ } \ } \ \ @@ -1523,21 +1515,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1549,26 +1540,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1597,8 +1588,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1610,27 +1602,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ } \ \ @@ -1646,21 +1637,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4); \ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1672,26 +1662,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1720,8 +1710,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1733,27 +1724,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\ } \ } \ \ @@ -1772,21 +1762,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1798,26 +1787,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1846,8 +1835,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1859,27 +1849,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ } \ \ @@ -1895,21 +1884,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -1921,26 +1909,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -1969,8 +1957,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -1982,27 +1971,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\ } \ } \ \ @@ -2021,21 +2009,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2047,26 +2034,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2095,8 +2082,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2108,27 +2096,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ } \ \ @@ -2144,21 +2131,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6); \ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2170,26 +2156,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2218,8 +2204,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2231,27 +2218,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\ } \ } \ \ @@ -2270,21 +2256,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2296,26 +2281,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2344,8 +2329,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2357,27 +2343,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ } \ } \ \ @@ -2393,21 +2378,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2419,26 +2403,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2467,8 +2451,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2480,27 +2465,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7);\ } \ } \ \ @@ -2519,21 +2503,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8); \ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2545,26 +2528,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2593,8 +2576,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2606,27 +2590,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ } \ } \ \ @@ -2642,21 +2625,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8); \ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2668,26 +2650,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2716,8 +2698,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8); \ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2729,27 +2712,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8);\ } \ } \ \ @@ -2768,21 +2750,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9);\ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2794,26 +2775,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2842,8 +2823,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2855,27 +2837,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ } \ } \ \ @@ -2891,21 +2872,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9);\ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -2917,26 +2897,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -2965,8 +2945,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9); \ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -2978,27 +2959,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9);\ } \ } \ \ @@ -3017,21 +2997,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10);\ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -3043,26 +3022,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -3091,8 +3070,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -3104,27 +3084,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ } \ } \ \ @@ -3140,21 +3119,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10);\ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -3166,26 +3144,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -3214,8 +3192,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -3227,27 +3206,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10);\ } \ } \ \ @@ -3266,21 +3244,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11);\ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -3292,26 +3269,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10; t->d.args.arg_11 = arg_11;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -3340,8 +3317,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -3353,27 +3331,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ } \ } \ \ @@ -3389,21 +3366,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11);\ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -3415,26 +3391,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10; t->d.args.arg_11 = arg_11;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -3463,8 +3439,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -3476,27 +3453,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11);\ } \ } \ \ @@ -3515,21 +3491,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12);\ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -3541,26 +3516,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10; t->d.args.arg_11 = arg_11; t->d.args.arg_12 = arg_12;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -3589,8 +3564,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -3602,27 +3578,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ } \ } \ \ @@ -3638,21 +3613,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12);\ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -3664,26 +3638,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10; t->d.args.arg_11 = arg_11; t->d.args.arg_12 = arg_12;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -3712,8 +3686,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -3725,27 +3700,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12);\ } \ } \ \ @@ -3764,21 +3738,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12, ATYPE_13);\ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12, ATYPE_13);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -3790,26 +3763,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10; t->d.args.arg_11 = arg_11; t->d.args.arg_12 = arg_12; t->d.args.arg_13 = arg_13;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -3838,8 +3811,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12, arg_13);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12, arg_13);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -3851,27 +3825,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ } \ } \ \ @@ -3887,21 +3860,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12, ATYPE_13);\ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12, ATYPE_13);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -3913,26 +3885,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10; t->d.args.arg_11 = arg_11; t->d.args.arg_12 = arg_12; t->d.args.arg_13 = arg_13;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -3961,8 +3933,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12, arg_13);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12, arg_13);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -3974,27 +3947,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13);\ } \ } \ \ @@ -4013,21 +3985,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -RTYPE NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12, ATYPE_13, ATYPE_14);\ +RTYPE NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12, ATYPE_13, ATYPE_14);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - t->d.res = NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ + t->d.res = NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13, ATYPE_14 arg_14)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13, ATYPE_14 arg_14)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -4039,26 +4010,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10; t->d.args.arg_11 = arg_11; t->d.args.arg_12 = arg_12; t->d.args.arg_13 = arg_13; t->d.args.arg_14 = arg_14;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -4087,8 +4058,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13, ATYPE_14 arg_14)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12, arg_13, arg_14);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12, arg_13, arg_14);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -4100,27 +4072,26 @@ RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATY } \ \ static inline __attribute__((unused)) \ -RTYPE NAME##_SYNC() \ +RTYPE NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ((TD_##NAME *)t)->d.res; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ } \ } \ \ @@ -4136,21 +4107,20 @@ typedef struct _TD_##NAME { /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\ typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\ \ -void NAME(ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12, ATYPE_13, ATYPE_14);\ +void NAME(LaceWorker*, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6, ATYPE_7, ATYPE_8, ATYPE_9, ATYPE_10, ATYPE_11, ATYPE_12, ATYPE_13, ATYPE_14);\ \ -static void NAME##_WRAP(TD_##NAME *t __attribute__((unused))) \ +static void NAME##_WRAP(LaceWorker* lace_worker, TD_##NAME *t __attribute__((unused)))\ { \ - NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ + NAME(lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ } \ \ static inline __attribute__((unused)) \ -void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13, ATYPE_14 arg_14)\ +void NAME##_SPAWN(LaceWorker* _lace_worker, ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13, ATYPE_14 arg_14)\ { \ PR_COUNTTASK(w); \ \ - WorkerP *w = lace_get_worker(); \ - Task *lace_head = w->head; \ - if (lace_head == w->end) lace_abort_stack_overflow(); \ + Task *lace_head = _lace_worker->head; \ + if (lace_head == _lace_worker->end) lace_abort_stack_overflow(); \ \ TD_##NAME *t; \ TailSplitNA ts; \ @@ -4162,26 +4132,26 @@ void NAME##_SPAWN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, AT t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6; t->d.args.arg_7 = arg_7; t->d.args.arg_8 = arg_8; t->d.args.arg_9 = arg_9; t->d.args.arg_10 = arg_10; t->d.args.arg_11 = arg_11; t->d.args.arg_12 = arg_12; t->d.args.arg_13 = arg_13; t->d.args.arg_14 = arg_14;\ atomic_thread_fence(memory_order_acquire); \ \ - Worker *wt = w->_public; \ - if (__builtin_expect(w->allstolen, 0)) { \ + Worker *wt = _lace_worker->_public; \ + if (__builtin_expect(_lace_worker->allstolen, 0)) { \ if (wt->movesplit) wt->movesplit = 0; \ - head = lace_head - w->dq; \ + head = lace_head - _lace_worker->dq; \ ts = (TailSplitNA){{head,head+1}}; \ wt->ts.v = ts.v; \ wt->allstolen = 0; \ - w->split = lace_head+1; \ - w->allstolen = 0; \ + _lace_worker->split = lace_head+1; \ + _lace_worker->allstolen = 0; \ } else if (__builtin_expect(wt->movesplit, 0)) { \ - head = lace_head - w->dq; \ - split = w->split - w->dq; \ + head = lace_head - _lace_worker->dq; \ + split = _lace_worker->split - _lace_worker->dq; \ newsplit = (split + head + 2)/2; \ wt->ts.ts.split = newsplit; \ - w->split = w->dq + newsplit; \ + _lace_worker->split = _lace_worker->dq + newsplit; \ wt->movesplit = 0; \ - PR_COUNTSPLITS(w, CTR_split_grow); \ + PR_COUNTSPLITS(_lace_worker, CTR_split_grow); \ } \ \ - w->head = lace_head+1; \ + _lace_worker->head = lace_head+1; \ } \ \ static inline __attribute__((unused)) \ @@ -4210,8 +4180,9 @@ void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, static inline __attribute__((unused)) \ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6, ATYPE_7 arg_7, ATYPE_8 arg_8, ATYPE_9 arg_9, ATYPE_10 arg_10, ATYPE_11 arg_11, ATYPE_12 arg_12, ATYPE_13 arg_13, ATYPE_14 arg_14)\ { \ - if (lace_is_worker()) { \ - return NAME(arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12, arg_13, arg_14);\ + LaceWorker *worker = lace_get_worker(); \ + if (worker != NULL) { \ + return NAME(worker, arg_1, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7, arg_8, arg_9, arg_10, arg_11, arg_12, arg_13, arg_14);\ } \ Task _t; \ TD_##NAME *t = (TD_##NAME *)&_t; \ @@ -4223,27 +4194,26 @@ void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYP } \ \ static inline __attribute__((unused)) \ -void NAME##_SYNC() \ +void NAME##_SYNC(LaceWorker* _lace_worker) \ { \ - WorkerP* w = lace_get_worker(); \ - Task* head = w->head - 1; \ - w->head = head; \ + Task* head = _lace_worker->head - 1; \ + _lace_worker->head = head; \ \ /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \ TD_##NAME *t = (TD_##NAME *)head; \ \ - if (__builtin_expect(0 == w->_public->movesplit, 1)) { \ - if (__builtin_expect(w->split <= head, 1)) { \ + if (__builtin_expect(0 == _lace_worker->_public->movesplit, 1)) { \ + if (__builtin_expect(_lace_worker->split <= head, 1)) { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ } \ } \ \ - if (lace_sync(w, head)) { \ + if (lace_sync(_lace_worker, head)) { \ return ; \ } else { \ atomic_store_explicit(&t->thief, THIEF_EMPTY, memory_order_relaxed); \ - return NAME(t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ + return NAME(_lace_worker, t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6, t->d.args.arg_7, t->d.args.arg_8, t->d.args.arg_9, t->d.args.arg_10, t->d.args.arg_11, t->d.args.arg_12, t->d.args.arg_13, t->d.args.arg_14);\ } \ } \ \