Skip to content
Permalink
Browse files

Refactoring: Isolating the matrix-vector product in gemm_accum()

  • Loading branch information...
jmvalin committed Nov 22, 2018
1 parent 054acff commit 9791b22b2c83980f6b4386c870cad58557c78007
Showing with 39 additions and 36 deletions.
  1. +39 −36 src/mlp.c
@@ -69,22 +69,29 @@ static OPUS_INLINE float sigmoid_approx(float x)
return .5f + .5f*tansig_approx(.5f*x);
}

void compute_dense(const DenseLayer *layer, float *output, const float *input)
static void gemm_accum(float *out, const opus_int8 *weights, int rows, int cols, int col_stride, const float *x)
{
int i, j;
for (i=0;i<rows;i++)
{
for (j=0;j<cols;j++)
out[i] += weights[j*col_stride + i]*x[j];
}
}

void compute_dense(const DenseLayer *layer, float *output, const float *input)
{
int i;
int N, M;
int stride;
M = layer->nb_inputs;
N = layer->nb_neurons;
stride = N;
for (i=0;i<N;i++)
{
/* Compute update gate. */
float sum = layer->bias[i];
for (j=0;j<M;j++)
sum += layer->input_weights[j*stride + i]*input[j];
output[i] = WEIGHTS_SCALE*sum;
}
output[i] = layer->bias[i];
gemm_accum(output, layer->input_weights, N, M, stride, input);
for (i=0;i<N;i++)
output[i] *= WEIGHTS_SCALE;
if (layer->sigmoid) {
for (i=0;i<N;i++)
output[i] = sigmoid_approx(output[i]);
@@ -96,45 +103,41 @@ void compute_dense(const DenseLayer *layer, float *output, const float *input)

void compute_gru(const GRULayer *gru, float *state, const float *input)
{
int i, j;
int i;
int N, M;
int stride;
float tmp[MAX_NEURONS];
float z[MAX_NEURONS];
float r[MAX_NEURONS];
float h[MAX_NEURONS];
M = gru->nb_inputs;
N = gru->nb_neurons;
stride = 3*N;
/* Compute update gate. */
for (i=0;i<N;i++)
{
/* Compute update gate. */
float sum = gru->bias[i];
for (j=0;j<M;j++)
sum += gru->input_weights[j*stride + i]*input[j];
for (j=0;j<N;j++)
sum += gru->recurrent_weights[j*stride + i]*state[j];
z[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
}
z[i] = gru->bias[i];
gemm_accum(z, gru->input_weights, N, M, stride, input);
gemm_accum(z, gru->recurrent_weights, N, N, stride, state);
for (i=0;i<N;i++)
{
/* Compute reset gate. */
float sum = gru->bias[N + i];
for (j=0;j<M;j++)
sum += gru->input_weights[N + j*stride + i]*input[j];
for (j=0;j<N;j++)
sum += gru->recurrent_weights[N + j*stride + i]*state[j];
r[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
}
z[i] = sigmoid_approx(WEIGHTS_SCALE*z[i]);

/* Compute reset gate. */
for (i=0;i<N;i++)
{
/* Compute output. */
float sum = gru->bias[2*N + i];
for (j=0;j<M;j++)
sum += gru->input_weights[2*N + j*stride + i]*input[j];
for (j=0;j<N;j++)
sum += gru->recurrent_weights[2*N + j*stride + i]*state[j]*r[j];
h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*sum);
}
r[i] = gru->bias[N + i];
gemm_accum(r, &gru->input_weights[N], N, M, stride, input);
gemm_accum(r, &gru->recurrent_weights[N], N, N, stride, state);
for (i=0;i<N;i++)
r[i] = sigmoid_approx(WEIGHTS_SCALE*r[i]);

/* Compute output. */
for (i=0;i<N;i++)
h[i] = gru->bias[2*N + i];
for (i=0;i<N;i++)
tmp[i] = state[i] * r[i];
gemm_accum(h, &gru->input_weights[2*N], N, M, stride, input);
gemm_accum(h, &gru->recurrent_weights[2*N], N, N, stride, tmp);
for (i=0;i<N;i++)
h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*h[i]);
for (i=0;i<N;i++)
state[i] = h[i];
}

0 comments on commit 9791b22

Please sign in to comment.
You can’t perform that action at this time.