diff --git a/lib_mic_array/api/mic_array_frame.h b/lib_mic_array/api/mic_array_frame.h index fc9654d6..d5340b34 100644 --- a/lib_mic_array/api/mic_array_frame.h +++ b/lib_mic_array/api/mic_array_frame.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015-2017, XMOS Ltd, All rights reserved +// Copyright (c) 2015-2019, XMOS Ltd, All rights reserved #ifndef MIC_ARRAY_FRAME_H_ #define MIC_ARRAY_FRAME_H_ @@ -6,6 +6,22 @@ #include "mic_array_conf.h" #include "dsp_fft.h" +#ifndef MIC_DUAL_ENABLED + #define MIC_DUAL_ENABLED (0) +#endif + +// MIC_DUAL_FRAME_SIZE has no meaning if MIC_DUAL_ENABLED is false. +// Only define MIC_DUAL_FRAME_SIZE to a default value if MIC_DUAL_ENABLED is true. +#ifndef MIC_DUAL_FRAME_SIZE + #if defined(MIC_DUAL_ENABLED) && (MIC_DUAL_ENABLED != 0) + #define MIC_DUAL_FRAME_SIZE (1) + #endif +#endif + +#ifndef MIC_ARRAY_FRAME_SIZE + #define MIC_ARRAY_FRAME_SIZE (1) +#endif + #ifndef MIC_ARRAY_WORD_LENGTH_SHORT #define MIC_ARRAY_WORD_LENGTH_SHORT 0 #endif diff --git a/lib_mic_array/src/decimate_to_pcm_4ch.S b/lib_mic_array/src/decimate_to_pcm_4ch.S index d15bf470..2f8a382d 100644 --- a/lib_mic_array/src/decimate_to_pcm_4ch.S +++ b/lib_mic_array/src/decimate_to_pcm_4ch.S @@ -1,359 +1,359 @@ // Copyright (c) 2015-2019, XMOS Ltd, All rights reserved - .section .dp.data,"awd",@progbits - .text - -#include "mic_array_conf.h" - -#define USE_SINGLE_CHAN_SAVING 0 - -#ifndef MIC_ARRAY_MAX_FRAME_SIZE_LOG2 - #error MIC_ARRAY_MAX_FRAME_SIZE_LOG2 is not defined in mic_array_conf.h -#endif - -#ifndef MIC_ARRAY_DC_OFFSET_LOG2 - #define MIC_ARRAY_DC_OFFSET_LOG2 8 + .section .dp.data,"awd",@progbits + .text + +#include "mic_array_conf.h" + +#define USE_SINGLE_CHAN_SAVING 0 + +#ifndef MIC_ARRAY_MAX_FRAME_SIZE_LOG2 + #error MIC_ARRAY_MAX_FRAME_SIZE_LOG2 is not defined in mic_array_conf.h +#endif + +#ifndef MIC_ARRAY_DC_OFFSET_LOG2 + #define MIC_ARRAY_DC_OFFSET_LOG2 8 #endif #ifndef MIC_ARRAY_FIXED_GAIN #define MIC_ARRAY_FIXED_GAIN 0 //x 6.02db. Apply a fixed gain to the outputs -#endif - -.xtacommand "analyse endpoints input_0_0_ep input_1_0_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_1_0_ep input_2_0_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_2_0_ep input_3_0_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_3_0_ep input_4_0_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_4_0_ep input_5_0_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_5_0_ep input_6_0_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_6_0_ep input_7_0_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_7_0_ep input_0_1_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_0_1_ep input_1_1_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_1_1_ep input_2_1_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_2_1_ep input_3_1_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_3_1_ep input_4_1_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_4_1_ep input_5_1_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_5_1_ep input_6_1_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_6_1_ep input_7_1_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ -.xtacommand "analyse endpoints input_7_1_ep input_0_0_ep","", __FILE__, __LINE__ -.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ - -#define SECOND_STAGE_COEF_COUNT 16 //This must reflect the implementation in mic_array_decimate_to_pcm_4ch_fir_impl -#define THIRD_STAGE_COEFS_PER_STAGE 32 //This must reflect the implementation in third_stage_fir_impl -#define THIRD_STAGE_COEFS_PER_ROW 63 //This must reflect the implementation in third_stage_fir_impl +#endif + +.xtacommand "analyse endpoints input_0_0_ep input_1_0_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_1_0_ep input_2_0_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_2_0_ep input_3_0_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_3_0_ep input_4_0_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_4_0_ep input_5_0_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_5_0_ep input_6_0_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_6_0_ep input_7_0_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_7_0_ep input_0_1_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_0_1_ep input_1_1_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_1_1_ep input_2_1_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_2_1_ep input_3_1_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_3_1_ep input_4_1_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_4_1_ep input_5_1_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_5_1_ep input_6_1_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_6_1_ep input_7_1_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ +.xtacommand "analyse endpoints input_7_1_ep input_0_0_ep","", __FILE__, __LINE__ +.xtacommand "set required - 2604 ns","", __FILE__, __LINE__ + +#define SECOND_STAGE_COEF_COUNT 16 //This must reflect the implementation in mic_array_decimate_to_pcm_4ch_fir_impl +#define THIRD_STAGE_COEFS_PER_STAGE 32 //This must reflect the implementation in third_stage_fir_impl +#define THIRD_STAGE_COEFS_PER_ROW 63 //This must reflect the implementation in third_stage_fir_impl #define MAX_DECIMATION_FACTOR 12 - -//derived defines -#define SECOND_STAGE_BUFFER_SIZE (SECOND_STAGE_COEF_COUNT*2) - -////////////////////////////////////////////////////////////////////////////////////// - -//This stuff is at the start of the stack for really quick access -#define S_STORAGE 0 - -#define S_UNUSED 0 -#define S_C_INPUT 1 - -#define S_C_OUTPUT 2 -#define S_CHAN_COUNT 3 - -#define S_D_DC_OFFSET_LOG2_CH01 2 -#define S_D_DC_OFFSET_LOG2_CH23 3 - -#define S_DC_OFFSET_LOG2_CH0 4 -#define S_DC_OFFSET_LOG2_CH1 5 -#define S_DC_OFFSET_LOG2_CH2 6 -#define S_DC_OFFSET_LOG2_CH3 7 - -#define S_DC_OFFSET_SAMPLE_NO 8 - + +//derived defines +#define SECOND_STAGE_BUFFER_SIZE (SECOND_STAGE_COEF_COUNT*2) + +////////////////////////////////////////////////////////////////////////////////////// + +//This stuff is at the start of the stack for really quick access +#define S_STORAGE 0 + +#define S_UNUSED 0 +#define S_C_INPUT 1 + +#define S_C_OUTPUT 2 +#define S_CHAN_COUNT 3 + +#define S_D_DC_OFFSET_LOG2_CH01 2 +#define S_D_DC_OFFSET_LOG2_CH23 3 + +#define S_DC_OFFSET_LOG2_CH0 4 +#define S_DC_OFFSET_LOG2_CH1 5 +#define S_DC_OFFSET_LOG2_CH2 6 +#define S_DC_OFFSET_LOG2_CH3 7 + +#define S_DC_OFFSET_SAMPLE_NO 8 + #define S_FIR_GAIN_COMP 9 - -#define S_FAR_END_CHANNEL_0 10 -#define S_FAR_END_CHANNEL_1 11 -#define S_FAR_END_CHANNEL_2 12 -#define S_FAR_END_CHANNEL_3 13 + +#define S_FAR_END_CHANNEL_0 10 +#define S_FAR_END_CHANNEL_1 11 +#define S_FAR_END_CHANNEL_2 12 +#define S_FAR_END_CHANNEL_3 13 #define S_ASYNC_INTERFACE 14 #define S_STORAGE_SIZE 16 - -////////////////////////////////////////////////////////////////////////////////////// -//this address must be double word aligned -#define S_SECOND_STAGE_DATA (S_STORAGE + S_STORAGE_SIZE) -#define S_SECOND_STAGE_DATA_SIZE (16*2*4) //16 words by 2 blocks by 4 channels - -////////////////////////////////////////////////////////////////////////////////////// - -#define S_THIRD_STAGE (S_SECOND_STAGE_DATA + S_SECOND_STAGE_DATA_SIZE) - -#define S_D_THIRD_STAGE_PHASE_AND_COUNT 0 //0 1 - //2 3 -#define S_D_THIRD_STAGE_POINTERS 1 //4 5 -#define S_D_THIRD_STAGE_CH0_ACC 3 //6 7 -#define S_D_THIRD_STAGE_CH1_ACC 4 //8 9 -#define S_D_THIRD_STAGE_CH2_ACC 5 //10 11 -#define S_D_THIRD_STAGE_CH3_ACC 6 //12 13 - -#define S_THIRD_STAGE_PHASE 0 //need to be 0-11 for divide_by_four -#define S_THIRD_STAGE_PHASE_COUNT 1 -#define S_THIRD_STAGE_DATA_POINTER 2 -#define S_THIRD_STAGE_COEFS_POINER 3 -#define S_THIRD_STAGE_COEFS_PHASE 4 -#define S_THIRD_STAGE_UNUSED 5 -#define S_THIRD_STAGE_SIZE (16) - -////////////////////////////////////////////////////////////////////////////////////// - -#define S_OUTPUT_STORAGE (S_THIRD_STAGE+S_THIRD_STAGE_SIZE) -#define S_D_OUTPUT_STORAGE_01 0 -#define S_D_OUTPUT_STORAGE_23 1 -#define S_OUTPUT_STORAGE_SIZE 4 - -////////////////////////////////////////////////////////////////////////////////////// - -//Note: this will survive a reconfig -#define S_DC_ELIMINATE (S_OUTPUT_STORAGE+S_OUTPUT_STORAGE_SIZE) -#define S_PREV_X_0 0 -#define S_PREV_X_1 1 -#define S_PREV_X_2 2 -#define S_PREV_X_3 3 - -#define S_D_PREV_X_01 0 -#define S_D_PREV_X_23 1 -#define S_D_PREV_Y_0 2 -#define S_D_PREV_Y_1 3 -#define S_D_PREV_Y_2 4 -#define S_D_PREV_Y_3 5 -#define DC_ELIMINATE_STACK_SIZE 12 - -//frame -#define S_FRAME_OFFSET (S_DC_ELIMINATE + DC_ELIMINATE_STACK_SIZE) + +////////////////////////////////////////////////////////////////////////////////////// +//this address must be double word aligned +#define S_SECOND_STAGE_DATA (S_STORAGE + S_STORAGE_SIZE) +#define S_SECOND_STAGE_DATA_SIZE (16*2*4) //16 words by 2 blocks by 4 channels + +////////////////////////////////////////////////////////////////////////////////////// + +#define S_THIRD_STAGE (S_SECOND_STAGE_DATA + S_SECOND_STAGE_DATA_SIZE) + +#define S_D_THIRD_STAGE_PHASE_AND_COUNT 0 //0 1 + //2 3 +#define S_D_THIRD_STAGE_POINTERS 1 //4 5 +#define S_D_THIRD_STAGE_CH0_ACC 3 //6 7 +#define S_D_THIRD_STAGE_CH1_ACC 4 //8 9 +#define S_D_THIRD_STAGE_CH2_ACC 5 //10 11 +#define S_D_THIRD_STAGE_CH3_ACC 6 //12 13 + +#define S_THIRD_STAGE_PHASE 0 //need to be 0-11 for divide_by_four +#define S_THIRD_STAGE_PHASE_COUNT 1 +#define S_THIRD_STAGE_DATA_POINTER 2 +#define S_THIRD_STAGE_COEFS_POINER 3 +#define S_THIRD_STAGE_COEFS_PHASE 4 +#define S_THIRD_STAGE_UNUSED 5 +#define S_THIRD_STAGE_SIZE (16) + +////////////////////////////////////////////////////////////////////////////////////// + +#define S_OUTPUT_STORAGE (S_THIRD_STAGE+S_THIRD_STAGE_SIZE) +#define S_D_OUTPUT_STORAGE_01 0 +#define S_D_OUTPUT_STORAGE_23 1 +#define S_OUTPUT_STORAGE_SIZE 4 + +////////////////////////////////////////////////////////////////////////////////////// + +//Note: this will survive a reconfig +#define S_DC_ELIMINATE (S_OUTPUT_STORAGE+S_OUTPUT_STORAGE_SIZE) +#define S_PREV_X_0 0 +#define S_PREV_X_1 1 +#define S_PREV_X_2 2 +#define S_PREV_X_3 3 + +#define S_D_PREV_X_01 0 +#define S_D_PREV_X_23 1 +#define S_D_PREV_Y_0 2 +#define S_D_PREV_Y_1 3 +#define S_D_PREV_Y_2 4 +#define S_D_PREV_Y_3 5 +#define DC_ELIMINATE_STACK_SIZE 12 + +//frame +#define S_FRAME_OFFSET (S_DC_ELIMINATE + DC_ELIMINATE_STACK_SIZE) #define S_D_FRAME_POINTER_AND_INDEX_0 3 #define S_D_FRAME_POINTER_AND_INDEX_1 4 #define S_D_FRAME_NO_OVERLAPPING 1 -#define S_D_METADATA_POINTERS 2 -#define S_FRAME_GAIN 0 //this is the current gain on the frame -#define S_FRAME_SIZE_LOG2 1 //this is the current frame size log2 -#define S_FRAME_NUMBER 2 //should this be presistant between configs? -#define S_OVERLAPPING_FRAMES 3 //0 for on overlap, 1 for overlapping -#define S_METADATA_POINTER_0 4 -#define S_METADATA_POINTER_1 5 -#define S_FRAME_POINTER_0 6 //pointer to the current frame -#define S_FRAME_0_INDEX 7 //index to write to the current frame -#define S_FRAME_POINTER_1 8 //pointer to the next frame -#define S_FRAME_1_INDEX 9 //index to write to the next frame -#define S_FRAME_SIZE 10 //this is the current frame size log2 -#define FRAME_STACK_SIZE 12 - -#define S_MIC_CALIB_OFFSET (S_FRAME_OFFSET+FRAME_STACK_SIZE) -#define S_MIC_CALIB_0 0 -#define S_MIC_CALIB_1 1 -#define S_D_MIC_CALIB_01 0 -#define S_MIC_CALIB_2 2 -#define S_MIC_CALIB_3 3 -#define S_D_MIC_CALIB_23 1 -#define MIC_CALIB_STACK_SIZE 4 - -#define S_SETTINGS_OFFSET (S_MIC_CALIB_OFFSET+MIC_CALIB_STACK_SIZE) -#define S_WINDOWING_ENABLED 0 //0 for disabled, pointer for enabled -#define S_INDEX_BITREVERSING_ENABLED 1 -#define S_DC_OFFSET_REMOVAL_ENABLED 2 -#define S_DECIMATION_FACTOR_is_now_S_THIRD_STAGE_PHASE_COUNT 3 -#define S_MIC_GAIN_COMP 4 -#define S_SETTINGS_SIZE 6 - -//This must be an even number -#define STACKWORDS (S_SETTINGS_OFFSET + S_SETTINGS_SIZE) - -//This is used for commenting within marcos -#define comment(X) - -//TODO - use this when everything works -//TODO then reduce the memory usage -//Note: there could be two implementations of this where one never writes to -//memory 28-32 as they are never used -#define INPUT_TO_OFFSET_12_15(OFFSET) \ - ldw r1, sp[S_C_INPUT]; \ - in r0, res[r1];\ - {stw r0, dp[0*SECOND_STAGE_BUFFER_SIZE + OFFSET]; in r0, res[r1]};\ - {stw r0, dp[1*SECOND_STAGE_BUFFER_SIZE + OFFSET]; in r0, res[r1]};\ - stw r0, dp[2*SECOND_STAGE_BUFFER_SIZE + OFFSET];\ - in r0, res[r1];\ - {stw r0, dp[3*SECOND_STAGE_BUFFER_SIZE + OFFSET]} - -//This inputs from the channel to the pdm interface and saves it twice in the -//second stage FIR at offset OFFSET and OFFSET + 16 -#define INPUT_TO_OFFSET(OFFSET) \ - ldw r1, sp[S_C_INPUT]; \ - in r0, res[r1];\ - {stw r0, dp[0*SECOND_STAGE_BUFFER_SIZE + OFFSET]};\ - {stw r0, dp[0*SECOND_STAGE_BUFFER_SIZE + SECOND_STAGE_COEF_COUNT + OFFSET]; in r0, res[r1]};\ - {stw r0, dp[1*SECOND_STAGE_BUFFER_SIZE + OFFSET]};\ - {stw r0, dp[1*SECOND_STAGE_BUFFER_SIZE + SECOND_STAGE_COEF_COUNT + OFFSET]; in r0, res[r1]};\ - {stw r0, dp[2*SECOND_STAGE_BUFFER_SIZE + OFFSET]};\ - {stw r0, dp[2*SECOND_STAGE_BUFFER_SIZE + SECOND_STAGE_COEF_COUNT + OFFSET]};\ - in r0, res[r1];\ - {stw r0, dp[3*SECOND_STAGE_BUFFER_SIZE + OFFSET]};\ - {stw r0, dp[3*SECOND_STAGE_BUFFER_SIZE + SECOND_STAGE_COEF_COUNT + OFFSET]} - - -//setup divider by N registers (coefs, data, accumulators) -//must avoid r0 and r1 for persistant state as they will be clobbered by INPUT_TO_OFFSET - //coef = coef[THIRD_STAGE_COEFS_PER_STAGE-1] ---this is done offline - //coef = coef[THIRD_STAGE_COEFS_PER_ROW*S_THIRD_STAGE_PHASE - S_THIRD_STAGE_COEFS_PHASE] - //data0 = data[THIRD_STAGE_COEFS_PER_STAGE*S_THIRD_STAGE_PHASE*4] - //data1 = data0[THIRD_STAGE_COEFS_PER_STAGE] -#define DIVIDE_BY_N_0()\ - ldaw r0, sp[S_THIRD_STAGE];\ - {ldw r7, r0[S_THIRD_STAGE_PHASE]; ldc r6, THIRD_STAGE_COEFS_PER_ROW};\ - mul r6, r6, r7;\ - {ldw r5, r0[S_THIRD_STAGE_COEFS_PHASE]; ldc r8, THIRD_STAGE_COEFS_PER_STAGE};\ - {sub r6, r6, r5; ldw r9, r0[S_THIRD_STAGE_COEFS_POINER]};\ - ldaw r9, r9[r6];\ - mul r7, r7, r8;\ - ldw r6, sp[S_CHAN_COUNT];\ - mul r7, r7, r6;\ - ldw r10, r0[S_THIRD_STAGE_DATA_POINTER];\ - ldaw r10, r10[r7];\ - ldaw r11, r10[r8];\ - ldd r8, r7, r0[S_D_THIRD_STAGE_CH0_ACC];\ - ldd r6, r5, r0[S_D_THIRD_STAGE_CH1_ACC];\ - bl third_stage_fir_impl;\ - ldaw r0, sp[S_THIRD_STAGE];\ - std r8, r7, r0[S_D_THIRD_STAGE_CH0_ACC];\ - std r6, r5, r0[S_D_THIRD_STAGE_CH1_ACC] - -#define N_DIVIDE_BY_N_1()\ - {ldc r0, THIRD_STAGE_COEFS_PER_STAGE};\ - ldaw r9, r9[-r0];\ - ldc r0, THIRD_STAGE_COEFS_PER_STAGE-24;\ - ldaw r10, r11[r0];\ - {ldc r0, THIRD_STAGE_COEFS_PER_STAGE};\ - ldaw r11, r10[r0];\ - ldaw r0, sp[S_THIRD_STAGE];\ - ldd r8, r7, r0[S_D_THIRD_STAGE_CH2_ACC];\ - ldd r6, r5, r0[S_D_THIRD_STAGE_CH3_ACC];\ - bl third_stage_fir_impl;\ - ldaw r0, sp[S_THIRD_STAGE];\ - std r8, r7, r0[S_D_THIRD_STAGE_CH2_ACC];\ - std r6, r5, r0[S_D_THIRD_STAGE_CH3_ACC] - -#define DC_OFFSET_REMOVAL(PREV_Y, PREV_X, REGISTER) \ - ldd r6, r5, r4[PREV_Y];\ - ldc r7, MIC_ARRAY_DC_OFFSET_LOG2;\ - ashr r9, r6, r7;\ - lextract r8, r6, r5, r7, 32;\ - lsub r11, r5, r5, r8, r4; comment(using the lowest bit of r4 which is always 0) \ - lsub r11, r6, r6, r9, r11; \ - {ldw r7, r4[PREV_X]};\ - ashr r9, REGISTER, 32;\ - ashr r10, r7, 32;\ - lsub r11, r8, REGISTER, r7, r4; comment(same here) \ - lsub r11, r9, r9, r10, r11;\ - {ldc r10, 16};\ - lextract r9, r9, r8, r10, 32;\ - {shl r8, r8, r10; stw REGISTER, r4[PREV_X]};\ - lsub r11, r5, r5, r8, r4; comment(same here) \ - lsub r11, r6, r6, r9, r11;\ - std r6, r5, r4[PREV_Y];\ - lextract REGISTER, r6, r5, r10, 32 - -//r6 is the AUDIO_POINTER_INDEX --note that on the second pass r0-r3 will need to be reloaded as this modified them -#define APPLY_WINDOW_FN(P)\ - ldaw r9, sp[S_SETTINGS_OFFSET];\ - ldw r9, r9[S_WINDOWING_ENABLED];\ - bf r9, done_windowing ## P;\ - apply_windowing ## P:;\ - ldw r5, r4[S_FRAME_SIZE_LOG2];\ - {shl r5, r6, 1; mkmsk r8, r5};\ - {lss r5, r8, r5; ldc r7, 31};\ - {bf r5, skip ## P ; mov r4, r6};\ - sub r4, r8, r6;\ - skip ## P:;\ - {ldc r10, 0; ldc r11, 0};\ - {ldw r4, r9[r4]};\ - maccs r10, r11, r4, r0;\ - lextract r0, r10, r11, r7, 32;\ - {ldc r10, 0; ldc r11, 0};\ - maccs r10, r11, r4, r1;\ - lextract r1, r10, r11, r7, 32;\ - {ldc r10, 0; ldc r11, 0};\ - maccs r10, r11, r4, r2;\ - lextract r2, r10, r11, r7, 32;\ - {ldc r10, 0; ldc r11, 0};\ - maccs r10, r11, r4, r3;\ - lextract r3, r10, r11, r7, 32;\ - done_windowing ## P: - -//r4 is the address of the S_FRAME_OFFSET_SECTION -//r6 is the AUDIO_POINTER_INDEX -#if MIC_ARRAY_WORD_LENGTH_SHORT - #define SAVE_SAMPLES_BIT_REVERSED(POINTER)\ - {ldw r5, r4[S_FRAME_SIZE_LOG2]; ldc r7, 32};\ - {bitrev r6, r6; ldc r8, 2};\ - {sub r9, r7, r5;ldw r11, sp[S_CHAN_COUNT]};\ - {lsu r11, r8, r11 ;ldc r8, (MIC_ARRAY_MAX_FRAME_SIZE_LOG2+2)};\ - {shl r8, r11, r8;ldw r7, r4[POINTER] };\ - {add r8, r7, r8;shr r6, r6, r9;};\ - {shr r0, r0, 16; shr r1, r1, 16};\ - {shr r2, r2, 16; shr r3, r3, 16};\ - {shl r1, r1, 16; shl r3, r3, 16};\ - {add r1, r1, r0; add r3, r3, r2};\ - stw r3, r8[r6];\ - stw r1, r7[r6] - -#else - #define SAVE_SAMPLES_BIT_REVERSED(POINTER)\ - {ldw r5, r4[S_FRAME_SIZE_LOG2]; ldc r7, 32};\ - {bitrev r6, r6; ldc r8, 2};\ - {sub r9, r7, r5;ldw r11, sp[S_CHAN_COUNT]};\ - {lsu r11, r8, r11 ;ldc r8, (MIC_ARRAY_MAX_FRAME_SIZE_LOG2+3)};\ - {shl r8, r11, r8;ldw r7, r4[POINTER] };\ - {add r8, r7, r8;shr r6, r6, r9;};\ - std r3, r2, r8[r6];\ - std r1, r0, r7[r6] - -#endif - -//r4 is the address of the S_FRAME_OFFSET_SECTION -//r6 is the AUDIO_POINTER_INDEX -#if MIC_ARRAY_WORD_LENGTH_SHORT - #define SAVE_SAMPLES_PACKED(POINTER)\ - {ldc r8, 2 ;ldw r11, sp[S_CHAN_COUNT]};\ - {lsu r11, r8, r11 ;ldc r8, (MIC_ARRAY_MAX_FRAME_SIZE_LOG2+2)};\ - {shl r8, r11, r8;ldw r7, r4[POINTER] };\ - {add r8, r7, r8; nop};\ - {shr r0, r0, 16; shr r1, r1, 16};\ - {shr r2, r2, 16; shr r3, r3, 16};\ - {shl r1, r1, 16; shl r3, r3, 16};\ - {add r1, r1, r0; add r3, r3, r2};\ - stw r3, r8[r6];\ - stw r1, r7[r6] - -#else - #define SAVE_SAMPLES_PACKED(POINTER)\ - {ldc r8, 2 ; ldw r11, sp[S_CHAN_COUNT]};\ - {lsu r11, r8, r11 ;ldc r8, (MIC_ARRAY_MAX_FRAME_SIZE_LOG2+3)};\ - {shl r8, r11, r8;ldw r7, r4[POINTER] };\ - {add r8, r7, r8; nop};\ - std r3, r2, r8[r6];\ - std r1, r0, r7[r6] - -#endif +#define S_D_METADATA_POINTERS 2 +#define S_FRAME_GAIN 0 //this is the current gain on the frame +#define S_FRAME_SIZE_LOG2 1 //this is the current frame size log2 +#define S_FRAME_NUMBER 2 //should this be presistant between configs? +#define S_OVERLAPPING_FRAMES 3 //0 for on overlap, 1 for overlapping +#define S_METADATA_POINTER_0 4 +#define S_METADATA_POINTER_1 5 +#define S_FRAME_POINTER_0 6 //pointer to the current frame +#define S_FRAME_0_INDEX 7 //index to write to the current frame +#define S_FRAME_POINTER_1 8 //pointer to the next frame +#define S_FRAME_1_INDEX 9 //index to write to the next frame +#define S_FRAME_SIZE 10 //this is the current frame size log2 +#define FRAME_STACK_SIZE 12 + +#define S_MIC_CALIB_OFFSET (S_FRAME_OFFSET+FRAME_STACK_SIZE) +#define S_MIC_CALIB_0 0 +#define S_MIC_CALIB_1 1 +#define S_D_MIC_CALIB_01 0 +#define S_MIC_CALIB_2 2 +#define S_MIC_CALIB_3 3 +#define S_D_MIC_CALIB_23 1 +#define MIC_CALIB_STACK_SIZE 4 + +#define S_SETTINGS_OFFSET (S_MIC_CALIB_OFFSET+MIC_CALIB_STACK_SIZE) +#define S_WINDOWING_ENABLED 0 //0 for disabled, pointer for enabled +#define S_INDEX_BITREVERSING_ENABLED 1 +#define S_DC_OFFSET_REMOVAL_ENABLED 2 +#define S_DECIMATION_FACTOR_is_now_S_THIRD_STAGE_PHASE_COUNT 3 +#define S_MIC_GAIN_COMP 4 +#define S_SETTINGS_SIZE 6 + +//This must be an even number +#define STACKWORDS (S_SETTINGS_OFFSET + S_SETTINGS_SIZE) + +//This is used for commenting within marcos +#define comment(X) + +//TODO - use this when everything works +//TODO then reduce the memory usage +//Note: there could be two implementations of this where one never writes to +//memory 28-32 as they are never used +#define INPUT_TO_OFFSET_12_15(OFFSET) \ + ldw r1, sp[S_C_INPUT]; \ + in r0, res[r1];\ + {stw r0, dp[0*SECOND_STAGE_BUFFER_SIZE + OFFSET]; in r0, res[r1]};\ + {stw r0, dp[1*SECOND_STAGE_BUFFER_SIZE + OFFSET]; in r0, res[r1]};\ + stw r0, dp[2*SECOND_STAGE_BUFFER_SIZE + OFFSET];\ + in r0, res[r1];\ + {stw r0, dp[3*SECOND_STAGE_BUFFER_SIZE + OFFSET]} + +//This inputs from the channel to the pdm interface and saves it twice in the +//second stage FIR at offset OFFSET and OFFSET + 16 +#define INPUT_TO_OFFSET(OFFSET) \ + ldw r1, sp[S_C_INPUT]; \ + in r0, res[r1];\ + {stw r0, dp[0*SECOND_STAGE_BUFFER_SIZE + OFFSET]};\ + {stw r0, dp[0*SECOND_STAGE_BUFFER_SIZE + SECOND_STAGE_COEF_COUNT + OFFSET]; in r0, res[r1]};\ + {stw r0, dp[1*SECOND_STAGE_BUFFER_SIZE + OFFSET]};\ + {stw r0, dp[1*SECOND_STAGE_BUFFER_SIZE + SECOND_STAGE_COEF_COUNT + OFFSET]; in r0, res[r1]};\ + {stw r0, dp[2*SECOND_STAGE_BUFFER_SIZE + OFFSET]};\ + {stw r0, dp[2*SECOND_STAGE_BUFFER_SIZE + SECOND_STAGE_COEF_COUNT + OFFSET]};\ + in r0, res[r1];\ + {stw r0, dp[3*SECOND_STAGE_BUFFER_SIZE + OFFSET]};\ + {stw r0, dp[3*SECOND_STAGE_BUFFER_SIZE + SECOND_STAGE_COEF_COUNT + OFFSET]} + + +//setup divider by N registers (coefs, data, accumulators) +//must avoid r0 and r1 for persistant state as they will be clobbered by INPUT_TO_OFFSET + //coef = coef[THIRD_STAGE_COEFS_PER_STAGE-1] ---this is done offline + //coef = coef[THIRD_STAGE_COEFS_PER_ROW*S_THIRD_STAGE_PHASE - S_THIRD_STAGE_COEFS_PHASE] + //data0 = data[THIRD_STAGE_COEFS_PER_STAGE*S_THIRD_STAGE_PHASE*4] + //data1 = data0[THIRD_STAGE_COEFS_PER_STAGE] +#define DIVIDE_BY_N_0()\ + ldaw r0, sp[S_THIRD_STAGE];\ + {ldw r7, r0[S_THIRD_STAGE_PHASE]; ldc r6, THIRD_STAGE_COEFS_PER_ROW};\ + mul r6, r6, r7;\ + {ldw r5, r0[S_THIRD_STAGE_COEFS_PHASE]; ldc r8, THIRD_STAGE_COEFS_PER_STAGE};\ + {sub r6, r6, r5; ldw r9, r0[S_THIRD_STAGE_COEFS_POINER]};\ + ldaw r9, r9[r6];\ + mul r7, r7, r8;\ + ldw r6, sp[S_CHAN_COUNT];\ + mul r7, r7, r6;\ + ldw r10, r0[S_THIRD_STAGE_DATA_POINTER];\ + ldaw r10, r10[r7];\ + ldaw r11, r10[r8];\ + ldd r8, r7, r0[S_D_THIRD_STAGE_CH0_ACC];\ + ldd r6, r5, r0[S_D_THIRD_STAGE_CH1_ACC];\ + bl third_stage_fir_impl;\ + ldaw r0, sp[S_THIRD_STAGE];\ + std r8, r7, r0[S_D_THIRD_STAGE_CH0_ACC];\ + std r6, r5, r0[S_D_THIRD_STAGE_CH1_ACC] + +#define N_DIVIDE_BY_N_1()\ + {ldc r0, THIRD_STAGE_COEFS_PER_STAGE};\ + ldaw r9, r9[-r0];\ + ldc r0, THIRD_STAGE_COEFS_PER_STAGE-24;\ + ldaw r10, r11[r0];\ + {ldc r0, THIRD_STAGE_COEFS_PER_STAGE};\ + ldaw r11, r10[r0];\ + ldaw r0, sp[S_THIRD_STAGE];\ + ldd r8, r7, r0[S_D_THIRD_STAGE_CH2_ACC];\ + ldd r6, r5, r0[S_D_THIRD_STAGE_CH3_ACC];\ + bl third_stage_fir_impl;\ + ldaw r0, sp[S_THIRD_STAGE];\ + std r8, r7, r0[S_D_THIRD_STAGE_CH2_ACC];\ + std r6, r5, r0[S_D_THIRD_STAGE_CH3_ACC] + +#define DC_OFFSET_REMOVAL(PREV_Y, PREV_X, REGISTER) \ + ldd r6, r5, r4[PREV_Y];\ + ldc r7, MIC_ARRAY_DC_OFFSET_LOG2;\ + ashr r9, r6, r7;\ + lextract r8, r6, r5, r7, 32;\ + lsub r11, r5, r5, r8, r4; comment(using the lowest bit of r4 which is always 0) \ + lsub r11, r6, r6, r9, r11; \ + {ldw r7, r4[PREV_X]};\ + ashr r9, REGISTER, 32;\ + ashr r10, r7, 32;\ + lsub r11, r8, REGISTER, r7, r4; comment(same here) \ + lsub r11, r9, r9, r10, r11;\ + {ldc r10, 16};\ + lextract r9, r9, r8, r10, 32;\ + {shl r8, r8, r10; stw REGISTER, r4[PREV_X]};\ + lsub r11, r5, r5, r8, r4; comment(same here) \ + lsub r11, r6, r6, r9, r11;\ + std r6, r5, r4[PREV_Y];\ + lextract REGISTER, r6, r5, r10, 32 + +//r6 is the AUDIO_POINTER_INDEX --note that on the second pass r0-r3 will need to be reloaded as this modified them +#define APPLY_WINDOW_FN(P)\ + ldaw r9, sp[S_SETTINGS_OFFSET];\ + ldw r9, r9[S_WINDOWING_ENABLED];\ + bf r9, done_windowing ## P;\ + apply_windowing ## P:;\ + ldw r5, r4[S_FRAME_SIZE_LOG2];\ + {shl r5, r6, 1; mkmsk r8, r5};\ + {lss r5, r8, r5; ldc r7, 31};\ + {bf r5, skip ## P ; mov r4, r6};\ + sub r4, r8, r6;\ + skip ## P:;\ + {ldc r10, 0; ldc r11, 0};\ + {ldw r4, r9[r4]};\ + maccs r10, r11, r4, r0;\ + lextract r0, r10, r11, r7, 32;\ + {ldc r10, 0; ldc r11, 0};\ + maccs r10, r11, r4, r1;\ + lextract r1, r10, r11, r7, 32;\ + {ldc r10, 0; ldc r11, 0};\ + maccs r10, r11, r4, r2;\ + lextract r2, r10, r11, r7, 32;\ + {ldc r10, 0; ldc r11, 0};\ + maccs r10, r11, r4, r3;\ + lextract r3, r10, r11, r7, 32;\ + done_windowing ## P: + +//r4 is the address of the S_FRAME_OFFSET_SECTION +//r6 is the AUDIO_POINTER_INDEX +#if MIC_ARRAY_WORD_LENGTH_SHORT + #define SAVE_SAMPLES_BIT_REVERSED(POINTER)\ + {ldw r5, r4[S_FRAME_SIZE_LOG2]; ldc r7, 32};\ + {bitrev r6, r6; ldc r8, 2};\ + {sub r9, r7, r5;ldw r11, sp[S_CHAN_COUNT]};\ + {lsu r11, r8, r11 ;ldc r8, (MIC_ARRAY_MAX_FRAME_SIZE_LOG2+2)};\ + {shl r8, r11, r8;ldw r7, r4[POINTER] };\ + {add r8, r7, r8;shr r6, r6, r9;};\ + {shr r0, r0, 16; shr r1, r1, 16};\ + {shr r2, r2, 16; shr r3, r3, 16};\ + {shl r1, r1, 16; shl r3, r3, 16};\ + {add r1, r1, r0; add r3, r3, r2};\ + stw r3, r8[r6];\ + stw r1, r7[r6] + +#else + #define SAVE_SAMPLES_BIT_REVERSED(POINTER)\ + {ldw r5, r4[S_FRAME_SIZE_LOG2]; ldc r7, 32};\ + {bitrev r6, r6; ldc r8, 2};\ + {sub r9, r7, r5;ldw r11, sp[S_CHAN_COUNT]};\ + {lsu r11, r8, r11 ;ldc r8, (MIC_ARRAY_MAX_FRAME_SIZE_LOG2+3)};\ + {shl r8, r11, r8;ldw r7, r4[POINTER] };\ + {add r8, r7, r8;shr r6, r6, r9;};\ + std r3, r2, r8[r6];\ + std r1, r0, r7[r6] + +#endif + +//r4 is the address of the S_FRAME_OFFSET_SECTION +//r6 is the AUDIO_POINTER_INDEX +#if MIC_ARRAY_WORD_LENGTH_SHORT + #define SAVE_SAMPLES_PACKED(POINTER)\ + {ldc r8, 2 ;ldw r11, sp[S_CHAN_COUNT]};\ + {lsu r11, r8, r11 ;ldc r8, (MIC_ARRAY_MAX_FRAME_SIZE_LOG2+2)};\ + {shl r8, r11, r8;ldw r7, r4[POINTER] };\ + {add r8, r7, r8; nop};\ + {shr r0, r0, 16; shr r1, r1, 16};\ + {shr r2, r2, 16; shr r3, r3, 16};\ + {shl r1, r1, 16; shl r3, r3, 16};\ + {add r1, r1, r0; add r3, r3, r2};\ + stw r3, r8[r6];\ + stw r1, r7[r6] + +#else + #define SAVE_SAMPLES_PACKED(POINTER)\ + {ldc r8, 2 ; ldw r11, sp[S_CHAN_COUNT]};\ + {lsu r11, r8, r11 ;ldc r8, (MIC_ARRAY_MAX_FRAME_SIZE_LOG2+3)};\ + {shl r8, r11, r8;ldw r7, r4[POINTER] };\ + {add r8, r7, r8; nop};\ + std r3, r2, r8[r6];\ + std r1, r0, r7[r6] + +#endif //r4 is the address of the S_FRAME_OFFSET_SECTION //r6 is the AUDIO_POINTER_INDEX @@ -396,19 +396,19 @@ eeu res[r3];\ internal_channel_## ID ##_done: - -.globl mic_array_decimate_to_pcm_4ch -.align 8 -.type mic_array_decimate_to_pcm_4ch,@function -.cc_top mic_array_decimate_to_pcm_4ch.function -mic_array_decimate_to_pcm_4ch: -.cfi_startproc -.issue_mode dual - DUALENTSP_lu6 STACKWORDS - - ldaw r3, sp[S_STORAGE] - stw r0, r3[S_C_INPUT] - stw r1, r3[S_C_OUTPUT] + +.globl mic_array_decimate_to_pcm_4ch +.align 8 +.type mic_array_decimate_to_pcm_4ch,@function +.cc_top mic_array_decimate_to_pcm_4ch.function +mic_array_decimate_to_pcm_4ch: +.cfi_startproc +.issue_mode dual + DUALENTSP_lu6 STACKWORDS + + ldaw r3, sp[S_STORAGE] + stw r0, r3[S_C_INPUT] + stw r1, r3[S_C_OUTPUT] clre //if r2 != 0 then store the internal channels to the stack @@ -426,38 +426,38 @@ mic_array_decimate_to_pcm_4ch: stw r2, sp[S_FAR_END_CHANNEL_3] internal_channel_setup_complete: - - //Set the dp to point to the data area for the second stage FIR - ldaw dp, sp[S_SECOND_STAGE_DATA] - - outct res[r1], 8 //we say we are ready - inct r2, res[r1] //they say CONFIGURE_DECIMATOR - - // Initialise the DC offset removal. - ldaw r0, sp[S_DC_ELIMINATE]; - ldc r1, 0 - std r1, r1, r0[S_D_PREV_X_01] - std r1, r1, r0[S_D_PREV_X_23] - std r1, r1, r0[S_D_PREV_Y_0] - std r1, r1, r0[S_D_PREV_Y_1] - std r1, r1, r0[S_D_PREV_Y_2] - std r1, r1, r0[S_D_PREV_Y_3] - ldw r1, sp[S_DC_OFFSET_SAMPLE_NO] - ldc r1, 8 // This cotrols the rate of convergence of the DC offset - // for the initial sample. It has little effect, i.e. don't change it. - ldw r1, sp[S_DC_OFFSET_LOG2_CH0] - ldw r1, sp[S_DC_OFFSET_LOG2_CH1] - ldw r1, sp[S_DC_OFFSET_LOG2_CH2] - ldw r1, sp[S_DC_OFFSET_LOG2_CH3] - -configure: - DUALENTSP_lu6 0 - //Initialise the stack -- TODO optimise this - ldc r0, (S_DC_ELIMINATE - S_SECOND_STAGE_DATA)/2 - {ldaw r1, sp[S_SECOND_STAGE_DATA];ldc r2, 0} -stack_init_loop: - std r2, r2, r1[r0] - {bt r0, stack_init_loop; sub r0, r0, 1} + + //Set the dp to point to the data area for the second stage FIR + ldaw dp, sp[S_SECOND_STAGE_DATA] + + outct res[r1], 8 //we say we are ready + inct r2, res[r1] //they say CONFIGURE_DECIMATOR + + // Initialise the DC offset removal. + ldaw r0, sp[S_DC_ELIMINATE]; + ldc r1, 0 + std r1, r1, r0[S_D_PREV_X_01] + std r1, r1, r0[S_D_PREV_X_23] + std r1, r1, r0[S_D_PREV_Y_0] + std r1, r1, r0[S_D_PREV_Y_1] + std r1, r1, r0[S_D_PREV_Y_2] + std r1, r1, r0[S_D_PREV_Y_3] + ldw r1, sp[S_DC_OFFSET_SAMPLE_NO] + ldc r1, 8 // This cotrols the rate of convergence of the DC offset + // for the initial sample. It has little effect, i.e. don't change it. + ldw r1, sp[S_DC_OFFSET_LOG2_CH0] + ldw r1, sp[S_DC_OFFSET_LOG2_CH1] + ldw r1, sp[S_DC_OFFSET_LOG2_CH2] + ldw r1, sp[S_DC_OFFSET_LOG2_CH3] + +configure: + DUALENTSP_lu6 0 + //Initialise the stack -- TODO optimise this + ldc r0, (S_DC_ELIMINATE - S_SECOND_STAGE_DATA)/2 + {ldaw r1, sp[S_SECOND_STAGE_DATA];ldc r2, 0} +stack_init_loop: + std r2, r2, r1[r0] + {bt r0, stack_init_loop; sub r0, r0, 1} ldc r3, 0 ldaw r4, sp[S_FRAME_OFFSET] @@ -467,17 +467,17 @@ initialise_frame_gain: {ldc r11, 32; ldc r10, (32 - MIC_ARRAY_FIXED_GAIN)} sub r10, r11, r10 stw r10, r4[S_FRAME_GAIN] - -take_config_from_application: - //get the pointer for the config over the output channel - ldw r0, sp[S_C_OUTPUT] - in r2, res[r0] //deciamtor_config - ldw r3, r2[0]; //r3 is the pointer to the decimator_config_common - -//copy the setting out of the struct - ldaw r0, sp[S_FRAME_OFFSET] - ldw r11, r3[0] //frame_size_log2 - here_is_frame_size_log2: + +take_config_from_application: + //get the pointer for the config over the output channel + ldw r0, sp[S_C_OUTPUT] + in r2, res[r0] //deciamtor_config + ldw r3, r2[0]; //r3 is the pointer to the decimator_config_common + +//copy the setting out of the struct + ldaw r0, sp[S_FRAME_OFFSET] + ldw r11, r3[0] //frame_size_log2 + here_is_frame_size_log2: shr r10, r11, 4 bt r10, actualLength stw r11, r0[S_FRAME_SIZE_LOG2] @@ -489,221 +489,221 @@ actualLength: {stw r11, r0[S_FRAME_SIZE] ;ldc r10, 33} clz r11, r11 sub r11, r10, r11 - stw r11, r0[S_FRAME_SIZE_LOG2] -lengthDone: - - - ldaw r0, sp[S_SETTINGS_OFFSET] - - ldw r11, r3[1] //apply_dc_offset - stw r11, r0[S_DC_OFFSET_REMOVAL_ENABLED] - - ldw r11, r3[2] //index_bit_reversal - stw r11, r0[S_INDEX_BITREVERSING_ENABLED] - - ldw r11, r3[3] //windowing_function - stw r11, r0[S_WINDOWING_ENABLED] - - ldw r11, r3[6] //apply mic gain compensation - stw r11, r0[S_MIC_GAIN_COMP] - - ldw r11, r3[7] //fir_gain_comp - stw r11, sp[S_FIR_GAIN_COMP] - - ldw r11, r3[4] //decimation factor - ldaw r10, sp[S_THIRD_STAGE] - stw r11, r10[S_THIRD_STAGE_PHASE_COUNT] - - here_is_the_coef_pointer: - {ldw r11, r3[5];} //coef pointer - ldc r9, (THIRD_STAGE_COEFS_PER_STAGE-1) //move the coef pointer along by (THIRD_STAGE_COEFS_PER_STAGE-1) - ldaw r11, r11[r9] - stw r11, r10[S_THIRD_STAGE_COEFS_POINER] - - here_is_the_data_pointer: - {ldw r11, r2[1];} //data pointer - stw r11, r10[S_THIRD_STAGE_DATA_POINTER] - - {ldw r11, r2[6];} //channel count - stw r11, sp[S_CHAN_COUNT] + stw r11, r0[S_FRAME_SIZE_LOG2] +lengthDone: + + + ldaw r0, sp[S_SETTINGS_OFFSET] + + ldw r11, r3[1] //apply_dc_offset + stw r11, r0[S_DC_OFFSET_REMOVAL_ENABLED] + + ldw r11, r3[2] //index_bit_reversal + stw r11, r0[S_INDEX_BITREVERSING_ENABLED] + + ldw r11, r3[3] //windowing_function + stw r11, r0[S_WINDOWING_ENABLED] + + ldw r11, r3[6] //apply mic gain compensation + stw r11, r0[S_MIC_GAIN_COMP] + + ldw r11, r3[7] //fir_gain_comp + stw r11, sp[S_FIR_GAIN_COMP] + + ldw r11, r3[4] //decimation factor + ldaw r10, sp[S_THIRD_STAGE] + stw r11, r10[S_THIRD_STAGE_PHASE_COUNT] + + here_is_the_coef_pointer: + {ldw r11, r3[5];} //coef pointer + ldc r9, (THIRD_STAGE_COEFS_PER_STAGE-1) //move the coef pointer along by (THIRD_STAGE_COEFS_PER_STAGE-1) + ldaw r11, r11[r9] + stw r11, r10[S_THIRD_STAGE_COEFS_POINER] + + here_is_the_data_pointer: + {ldw r11, r2[1];} //data pointer + stw r11, r10[S_THIRD_STAGE_DATA_POINTER] + + {ldw r11, r2[6];} //channel count + stw r11, sp[S_CHAN_COUNT] {ldw r11, r2[7];} // async interface enabled stw r11, sp[S_ASYNC_INTERFACE] - -load_mic_gain_calib: - ldaw r1, sp[S_MIC_CALIB_OFFSET] - ldw r0, r2[2] - stw r0, r1[S_MIC_CALIB_0] - ldw r0, r2[3] - stw r0, r1[S_MIC_CALIB_1] - ldw r0, r2[4] - stw r0, r1[S_MIC_CALIB_2] - ldw r0, r2[5] - stw r0, r1[S_MIC_CALIB_3] - -init_frame_index: - ldaw r11, sp[S_FRAME_OFFSET] - ldc r1, 0 - stw r1, r11[S_FRAME_0_INDEX] - - ldw r1, r11[S_FRAME_SIZE] - shr r1, r1, 1 - stw r1, r11[S_FRAME_1_INDEX] - -get_frame_pointer: - ldw r2, sp[S_C_OUTPUT] - rx_buffer_count: - in r5, res[r2] - sub r5, r5, 1 - - bt r5, overlapping - non_overlapping: - {in r1, res[r2];stw r5, r11[S_OVERLAPPING_FRAMES]} // frames pointer - {in r1, res[r2];stw r1, r11[S_FRAME_POINTER_0]} // metadata pointer - saving_non_overlapping_metadata_pointer: + +load_mic_gain_calib: + ldaw r1, sp[S_MIC_CALIB_OFFSET] + ldw r0, r2[2] + stw r0, r1[S_MIC_CALIB_0] + ldw r0, r2[3] + stw r0, r1[S_MIC_CALIB_1] + ldw r0, r2[4] + stw r0, r1[S_MIC_CALIB_2] + ldw r0, r2[5] + stw r0, r1[S_MIC_CALIB_3] + +init_frame_index: + ldaw r11, sp[S_FRAME_OFFSET] + ldc r1, 0 + stw r1, r11[S_FRAME_0_INDEX] + + ldw r1, r11[S_FRAME_SIZE] + shr r1, r1, 1 + stw r1, r11[S_FRAME_1_INDEX] + +get_frame_pointer: + ldw r2, sp[S_C_OUTPUT] + rx_buffer_count: + in r5, res[r2] + sub r5, r5, 1 + + bt r5, overlapping + non_overlapping: + {in r1, res[r2];stw r5, r11[S_OVERLAPPING_FRAMES]} // frames pointer + {in r1, res[r2];stw r1, r11[S_FRAME_POINTER_0]} // metadata pointer + saving_non_overlapping_metadata_pointer: {stw r1, r11[S_METADATA_POINTER_0];ldc r2, 0 } std r2, r2, r1[0] //init the sig bits - std r2, r2, r1[1] //init the sig bits - bu setup_phase_counters - - overlapping: - {in r1, res[r2];} - {in r1, res[r2];stw r1, r11[S_FRAME_POINTER_1]} - saving_overlapping_metadata_pointer_1: + std r2, r2, r1[1] //init the sig bits + bu setup_phase_counters + + overlapping: + {in r1, res[r2];} + {in r1, res[r2];stw r1, r11[S_FRAME_POINTER_1]} + saving_overlapping_metadata_pointer_1: {stw r1, r11[S_METADATA_POINTER_0]; ldc r3, 0} std r3, r3, r1[0] //init the sig bits std r3, r3, r1[1] //init the sig bits - - {in r1, res[r2];stw r5, r11[S_OVERLAPPING_FRAMES]} - {in r1, res[r2];stw r1, r11[S_FRAME_POINTER_0]} - saving_overlapping_metadata_pointer_0: + + {in r1, res[r2];stw r5, r11[S_OVERLAPPING_FRAMES]} + {in r1, res[r2];stw r1, r11[S_FRAME_POINTER_0]} + saving_overlapping_metadata_pointer_0: {stw r1, r11[S_METADATA_POINTER_1];ldc r2, 0 } std r2, r2, r1[0] //init the sig bits - std r2, r2, r1[1] //init the sig bits - -setup_phase_counters: - ldaw r10, sp[S_THIRD_STAGE] - ldw r9, r10[S_THIRD_STAGE_PHASE_COUNT] - sub r9, r9, 1 - stw r9, r10[S_THIRD_STAGE_PHASE] - - ldc r9, (THIRD_STAGE_COEFS_PER_STAGE-1) - stw r9, r10[S_THIRD_STAGE_COEFS_PHASE] - -confirm_init_complete: - ldw r4, sp[S_C_OUTPUT] - outct res[r4], 8 //WARNING: do not change this - -input_0_0: - INPUT_TO_OFFSET_12_15(15) - divide_by_n_0_0: - DIVIDE_BY_N_0() - -input_1_0: - INPUT_TO_OFFSET_12_15(14) - divide_by_n_0_1: - N_DIVIDE_BY_N_1() - -input_2_0: - INPUT_TO_OFFSET_12_15(13) - bl post_process - -input_3_0: - INPUT_TO_OFFSET_12_15(12) - ldaw r7, dp[12] - bl divide_by_four - -input_4_0: - INPUT_TO_OFFSET(11) - divide_by_n_1_0: - DIVIDE_BY_N_0(); - -input_5_0: - INPUT_TO_OFFSET(10) - divide_by_n_1_1: - N_DIVIDE_BY_N_1() - -input_6_0: - INPUT_TO_OFFSET(9) - bl post_process - -input_7_0: - INPUT_TO_OFFSET(8) - ldaw r7, dp[8] - bl divide_by_four - -input_0_1: - INPUT_TO_OFFSET(7) - divide_by_n_2_0: - DIVIDE_BY_N_0(); - -input_1_1: - INPUT_TO_OFFSET(6) - divide_by_n_2_1: - N_DIVIDE_BY_N_1() - -input_2_1: - INPUT_TO_OFFSET(5) - bl post_process - -input_3_1: - INPUT_TO_OFFSET(4) - ldaw r7, dp[4] - bl divide_by_four - -input_4_1: - INPUT_TO_OFFSET(3) - divide_by_n_3_0: - DIVIDE_BY_N_0(); - -input_5_1: - INPUT_TO_OFFSET(2) - divide_by_n_3_1: - N_DIVIDE_BY_N_1() - -input_6_1: - INPUT_TO_OFFSET(1) - bl post_process - -input_7_1: - INPUT_TO_OFFSET(0) - ldaw r7, dp[0] - bl divide_by_four - bu input_0_0 - - .cc_bottom mic_array_decimate_to_pcm_4ch.function - .set mic_array_decimate_to_pcm_4ch.nstackwords, STACKWORDS - .globl mic_array_decimate_to_pcm_4ch.nstackwords - .set mic_array_decimate_to_pcm_4ch.maxcores,1 - .globl mic_array_decimate_to_pcm_4ch.maxcores - .set mic_array_decimate_to_pcm_4ch.maxtimers,0 - .globl mic_array_decimate_to_pcm_4ch.maxtimers - .set mic_array_decimate_to_pcm_4ch.maxchanends,0 - .globl mic_array_decimate_to_pcm_4ch.maxchanends -.Lmic_array_decimate_to_pcm_4ch_tmp: - .size mic_array_decimate_to_pcm_4ch, .Lmic_array_decimate_to_pcm_4ch_tmp-mic_array_decimate_to_pcm_4ch - .cfi_endproc - -////////////////////////////////////////////////////////////////////////////////////////// - -.globl post_process -.align 8 -.type post_process,@function -.cc_top post_process.function -post_process: - .cfi_startproc - .issue_mode dual - - //increment THIRD_STAGE_PHASE mod THIRD_STAGE_PHASE_COUNT - ldaw r11, sp[S_THIRD_STAGE] - ldw r10, r11[S_THIRD_STAGE_PHASE]; - {add r9, r10, 1; ldw r8, r11[S_THIRD_STAGE_PHASE_COUNT]} - {stw r9, r11[S_THIRD_STAGE_PHASE];eq r8, r8, r9} - + std r2, r2, r1[1] //init the sig bits + +setup_phase_counters: + ldaw r10, sp[S_THIRD_STAGE] + ldw r9, r10[S_THIRD_STAGE_PHASE_COUNT] + sub r9, r9, 1 + stw r9, r10[S_THIRD_STAGE_PHASE] + + ldc r9, (THIRD_STAGE_COEFS_PER_STAGE-1) + stw r9, r10[S_THIRD_STAGE_COEFS_PHASE] + +confirm_init_complete: + ldw r4, sp[S_C_OUTPUT] + outct res[r4], 8 //WARNING: do not change this + +input_0_0: + INPUT_TO_OFFSET_12_15(15) + divide_by_n_0_0: + DIVIDE_BY_N_0() + +input_1_0: + INPUT_TO_OFFSET_12_15(14) + divide_by_n_0_1: + N_DIVIDE_BY_N_1() + +input_2_0: + INPUT_TO_OFFSET_12_15(13) + bl post_process + +input_3_0: + INPUT_TO_OFFSET_12_15(12) + ldaw r7, dp[12] + bl divide_by_four + +input_4_0: + INPUT_TO_OFFSET(11) + divide_by_n_1_0: + DIVIDE_BY_N_0(); + +input_5_0: + INPUT_TO_OFFSET(10) + divide_by_n_1_1: + N_DIVIDE_BY_N_1() + +input_6_0: + INPUT_TO_OFFSET(9) + bl post_process + +input_7_0: + INPUT_TO_OFFSET(8) + ldaw r7, dp[8] + bl divide_by_four + +input_0_1: + INPUT_TO_OFFSET(7) + divide_by_n_2_0: + DIVIDE_BY_N_0(); + +input_1_1: + INPUT_TO_OFFSET(6) + divide_by_n_2_1: + N_DIVIDE_BY_N_1() + +input_2_1: + INPUT_TO_OFFSET(5) + bl post_process + +input_3_1: + INPUT_TO_OFFSET(4) + ldaw r7, dp[4] + bl divide_by_four + +input_4_1: + INPUT_TO_OFFSET(3) + divide_by_n_3_0: + DIVIDE_BY_N_0(); + +input_5_1: + INPUT_TO_OFFSET(2) + divide_by_n_3_1: + N_DIVIDE_BY_N_1() + +input_6_1: + INPUT_TO_OFFSET(1) + bl post_process + +input_7_1: + INPUT_TO_OFFSET(0) + ldaw r7, dp[0] + bl divide_by_four + bu input_0_0 + + .cc_bottom mic_array_decimate_to_pcm_4ch.function + .set mic_array_decimate_to_pcm_4ch.nstackwords, STACKWORDS + .globl mic_array_decimate_to_pcm_4ch.nstackwords + .set mic_array_decimate_to_pcm_4ch.maxcores,1 + .globl mic_array_decimate_to_pcm_4ch.maxcores + .set mic_array_decimate_to_pcm_4ch.maxtimers,0 + .globl mic_array_decimate_to_pcm_4ch.maxtimers + .set mic_array_decimate_to_pcm_4ch.maxchanends,0 + .globl mic_array_decimate_to_pcm_4ch.maxchanends +.Lmic_array_decimate_to_pcm_4ch_tmp: + .size mic_array_decimate_to_pcm_4ch, .Lmic_array_decimate_to_pcm_4ch_tmp-mic_array_decimate_to_pcm_4ch + .cfi_endproc + +////////////////////////////////////////////////////////////////////////////////////////// + +.globl post_process +.align 8 +.type post_process,@function +.cc_top post_process.function +post_process: + .cfi_startproc + .issue_mode dual + + //increment THIRD_STAGE_PHASE mod THIRD_STAGE_PHASE_COUNT + ldaw r11, sp[S_THIRD_STAGE] + ldw r10, r11[S_THIRD_STAGE_PHASE]; + {add r9, r10, 1; ldw r8, r11[S_THIRD_STAGE_PHASE_COUNT]} + {stw r9, r11[S_THIRD_STAGE_PHASE];eq r8, r8, r9} + bt r8, output_phase //do this on phase S_THIRD_STAGE_PHASE_COUNT-1 (last phase) {sub r10, r10, 1; bf r10, process_phase} //do this on phase 0 - {sub r10, r10, 1; bf r10, analytics0} //do this on phase 1 - retsp 0 + {sub r10, r10, 1; bf r10, analytics0} //do this on phase 1 + retsp 0 analytics0: @@ -759,72 +759,72 @@ post_process: std r7, r6, r9[1] retsp 0 - - process_phase: - //r0, r1, r2, r3 are used as storage for the output of the 3rd stage FIR - //copy the accumulators in to registers - ldaw r4, sp[S_OUTPUT_STORAGE]; - ldd r0, r1, r4[S_D_OUTPUT_STORAGE_01]; - ldd r2, r3, r4[S_D_OUTPUT_STORAGE_23]; - - //DC offset removal - ldaw r4, sp[S_SETTINGS_OFFSET] - ldw r4, r4[S_DC_OFFSET_REMOVAL_ENABLED] - bf r4, dc_offset_removal_complete - - ldaw r4, sp[S_DC_ELIMINATE] - dc_offset_removal_ch0: - DC_OFFSET_REMOVAL(S_D_PREV_Y_0, S_PREV_X_0, r0); - dc_offset_removal_ch1: - DC_OFFSET_REMOVAL(S_D_PREV_Y_1, S_PREV_X_1, r1); - dc_offset_removal_ch2: - DC_OFFSET_REMOVAL(S_D_PREV_Y_2, S_PREV_X_2, r2); - dc_offset_removal_ch3: - DC_OFFSET_REMOVAL(S_D_PREV_Y_3, S_PREV_X_3, r3); - -dc_offset_removal_complete: - -#define COMP(REG)\ - {ldc r5, 0; ldc r6, 0};\ - maccs r5, r6, REG, r4;\ - lsats r5, r6, r7;\ - lextract REG, r5, r6, r7, 32 - - //FIR gain compensation - fir_compensation: // This normally turns the volume up to compensate for the FIR - ldw r4, sp[S_FIR_GAIN_COMP] //load the fir gain comp, it is in 1.4.27 format - {bf r4, gain_compensation;ldc r7, 27} - COMP(r0) - COMP(r1) - COMP(r2) - COMP(r3) - - gain_compensation: // This always turns the volume down - ldaw r4, sp[S_SETTINGS_OFFSET] - ldw r4, r4[S_MIC_GAIN_COMP] - {bf r4, gain_comp_complete;ldc r7, 31} - apply_mic_gain_compensation: - ldaw r4, sp[S_MIC_CALIB_OFFSET] - ldd r6, r5, r4[S_D_MIC_CALIB_01] - {ldc r8, 0; ldc r9, 0} - maccs r8, r9, r0, r5 - lextract r0, r8, r9, r7, 32 - {ldc r8, 0; ldc r9, 0} - maccs r8, r9, r1, r6 - lextract r1, r8, r9, r7, 32 - ldd r6, r5, r4[S_D_MIC_CALIB_23] - {ldc r8, 0; ldc r9, 0} - maccs r8, r9, r2, r5 - lextract r2, r8, r9, r7, 32 - {ldc r8, 0; ldc r9, 0} - maccs r8, r9, r3, r6 - lextract r3, r8, r9, r7, 32 + + process_phase: + //r0, r1, r2, r3 are used as storage for the output of the 3rd stage FIR + //copy the accumulators in to registers + ldaw r4, sp[S_OUTPUT_STORAGE]; + ldd r0, r1, r4[S_D_OUTPUT_STORAGE_01]; + ldd r2, r3, r4[S_D_OUTPUT_STORAGE_23]; + + //DC offset removal + ldaw r4, sp[S_SETTINGS_OFFSET] + ldw r4, r4[S_DC_OFFSET_REMOVAL_ENABLED] + bf r4, dc_offset_removal_complete + + ldaw r4, sp[S_DC_ELIMINATE] + dc_offset_removal_ch0: + DC_OFFSET_REMOVAL(S_D_PREV_Y_0, S_PREV_X_0, r0); + dc_offset_removal_ch1: + DC_OFFSET_REMOVAL(S_D_PREV_Y_1, S_PREV_X_1, r1); + dc_offset_removal_ch2: + DC_OFFSET_REMOVAL(S_D_PREV_Y_2, S_PREV_X_2, r2); + dc_offset_removal_ch3: + DC_OFFSET_REMOVAL(S_D_PREV_Y_3, S_PREV_X_3, r3); + +dc_offset_removal_complete: + +#define COMP(REG)\ + {ldc r5, 0; ldc r6, 0};\ + maccs r5, r6, REG, r4;\ + lsats r5, r6, r7;\ + lextract REG, r5, r6, r7, 32 + + //FIR gain compensation + fir_compensation: // This normally turns the volume up to compensate for the FIR + ldw r4, sp[S_FIR_GAIN_COMP] //load the fir gain comp, it is in 1.4.27 format + {bf r4, gain_compensation;ldc r7, 27} + COMP(r0) + COMP(r1) + COMP(r2) + COMP(r3) + + gain_compensation: // This always turns the volume down + ldaw r4, sp[S_SETTINGS_OFFSET] + ldw r4, r4[S_MIC_GAIN_COMP] + {bf r4, gain_comp_complete;ldc r7, 31} + apply_mic_gain_compensation: + ldaw r4, sp[S_MIC_CALIB_OFFSET] + ldd r6, r5, r4[S_D_MIC_CALIB_01] + {ldc r8, 0; ldc r9, 0} + maccs r8, r9, r0, r5 + lextract r0, r8, r9, r7, 32 + {ldc r8, 0; ldc r9, 0} + maccs r8, r9, r1, r6 + lextract r1, r8, r9, r7, 32 + ldd r6, r5, r4[S_D_MIC_CALIB_23] + {ldc r8, 0; ldc r9, 0} + maccs r8, r9, r2, r5 + lextract r2, r8, r9, r7, 32 + {ldc r8, 0; ldc r9, 0} + maccs r8, r9, r3, r6 + lextract r3, r8, r9, r7, 32 gain_comp_complete: -internal_channel_overwrite_begin: +internal_channel_overwrite_begin: setsr 0x1 - nop + nop {bu internal_channel_overwrite_complete; clrsr 0x1} internal_channel_0_event_vector: get r11, ed @@ -841,168 +841,168 @@ internal_channel_overwrite_begin: internal_channel_3_event_vector: get r11, ed in r3, res[r11] //input from the channel (to overwrite channel 3) - {out res[r11], r3; bu internal_channel_overwrite_begin} //output another token to the channel to let it know we have more space - internal_channel_overwrite_complete: - - ldaw r4, sp[S_OUTPUT_STORAGE]; - std r0, r1, r4[S_D_OUTPUT_STORAGE_01]; - std r2, r3, r4[S_D_OUTPUT_STORAGE_23]; - retsp 0 - - output_phase: - -#define EXCHANGE_BUFFERS 0 -#define CONFIGURE_DECIMATOR 1 - - ldaw r4, sp[S_OUTPUT_STORAGE]; - ldd r0, r1, r4[S_D_OUTPUT_STORAGE_01]; - ldd r2, r3, r4[S_D_OUTPUT_STORAGE_23]; - - ldaw r4, sp[S_FRAME_OFFSET] - - ldw r6, r4[S_FRAME_0_INDEX] - - APPLY_WINDOW_FN(0) - ldaw r4, sp[S_FRAME_OFFSET] - - ldaw r5, sp[S_SETTINGS_OFFSET] - ldw r5, r5[S_INDEX_BITREVERSING_ENABLED] - { bt r5, index_bitrev_order_0 ; eq r5, r5, 2 } - index_normal_order_0: - SAVE_SAMPLES_NORMAL(S_FRAME_POINTER_0, 0); - bu save_to_frame_complete_0 - index_bitrev_order_0: - bt r5, index_packed_0 - SAVE_SAMPLES_BIT_REVERSED(S_FRAME_POINTER_0); - bu save_to_frame_complete_0 - index_packed_0: - SAVE_SAMPLES_PACKED(S_FRAME_POINTER_0); - save_to_frame_complete_0: - - ldw r5, r4[S_FRAME_0_INDEX] - {add r5, r5, 1;ldw r6, r4[S_OVERLAPPING_FRAMES]} - stw r5, r4[S_FRAME_0_INDEX] + {out res[r11], r3; bu internal_channel_overwrite_begin} //output another token to the channel to let it know we have more space + internal_channel_overwrite_complete: + + ldaw r4, sp[S_OUTPUT_STORAGE]; + std r0, r1, r4[S_D_OUTPUT_STORAGE_01]; + std r2, r3, r4[S_D_OUTPUT_STORAGE_23]; + retsp 0 + + output_phase: + +#define EXCHANGE_BUFFERS 0 +#define CONFIGURE_DECIMATOR 1 + + ldaw r4, sp[S_OUTPUT_STORAGE]; + ldd r0, r1, r4[S_D_OUTPUT_STORAGE_01]; + ldd r2, r3, r4[S_D_OUTPUT_STORAGE_23]; + + ldaw r4, sp[S_FRAME_OFFSET] + + ldw r6, r4[S_FRAME_0_INDEX] + + APPLY_WINDOW_FN(0) + ldaw r4, sp[S_FRAME_OFFSET] + + ldaw r5, sp[S_SETTINGS_OFFSET] + ldw r5, r5[S_INDEX_BITREVERSING_ENABLED] + { bt r5, index_bitrev_order_0 ; eq r5, r5, 2 } + index_normal_order_0: + SAVE_SAMPLES_NORMAL(S_FRAME_POINTER_0, 0); + bu save_to_frame_complete_0 + index_bitrev_order_0: + bt r5, index_packed_0 + SAVE_SAMPLES_BIT_REVERSED(S_FRAME_POINTER_0); + bu save_to_frame_complete_0 + index_packed_0: + SAVE_SAMPLES_PACKED(S_FRAME_POINTER_0); + save_to_frame_complete_0: + + ldw r5, r4[S_FRAME_0_INDEX] + {add r5, r5, 1;ldw r6, r4[S_OVERLAPPING_FRAMES]} + stw r5, r4[S_FRAME_0_INDEX] ldw r11, sp[S_ASYNC_INTERFACE] bt r11, async_send_samples - - bt r6, overlapping_frames - no_overlapping_frames: - - ldw r6, r4[S_FRAME_SIZE] - {eq r5, r5, r6; ldc r6, 0} - bf r5, do_the_rest; - - reset_the_frame_index: - stw r6, r4[S_FRAME_0_INDEX] - - write_the_current_frames_metadata: - - ldw r5, r4[S_METADATA_POINTER_0] - ldw r3, r4[S_FRAME_NUMBER] - {stw r3, r5[4]; add r3, r3, 1} //if metadata layout changes then this needs to change too + + bt r6, overlapping_frames + no_overlapping_frames: + + ldw r6, r4[S_FRAME_SIZE] + {eq r5, r5, r6; ldc r6, 0} + bf r5, do_the_rest; + + reset_the_frame_index: + stw r6, r4[S_FRAME_0_INDEX] + + write_the_current_frames_metadata: + + ldw r5, r4[S_METADATA_POINTER_0] + ldw r3, r4[S_FRAME_NUMBER] + {stw r3, r5[4]; add r3, r3, 1} //if metadata layout changes then this needs to change too stw r3, r4[S_FRAME_NUMBER] //and save the new frame number back - - exchange_buffers: - ldw r6, sp[S_C_OUTPUT] - outct res[r6], 8 //WARNING: do not change this - inct r3, res[r6] - - eq r5, r3, EXCHANGE_BUFFERS //if the incomming token is EXCHANGE_BUFFERS then do that - {bt r5, do_the_exchange; eq r5, r3, CONFIGURE_DECIMATOR} //if the incomming token is CONFIGURE_DECIMATOR then do that - - ldap r11, configure - stw r11, sp[0] - ldw lr, sp[0] - retsp 0 - - do_the_exchange: - {in r3, res[r6]; ldc r0, 0} - {std r0, r3, r4[S_D_FRAME_POINTER_AND_INDEX_0]} - - get_the_new_metadata_pointer: - {in r3, res[r6]} + + exchange_buffers: + ldw r6, sp[S_C_OUTPUT] + outct res[r6], 8 //WARNING: do not change this + inct r3, res[r6] + + eq r5, r3, EXCHANGE_BUFFERS //if the incomming token is EXCHANGE_BUFFERS then do that + {bt r5, do_the_exchange; eq r5, r3, CONFIGURE_DECIMATOR} //if the incomming token is CONFIGURE_DECIMATOR then do that + + ldap r11, configure + stw r11, sp[0] + ldw lr, sp[0] + retsp 0 + + do_the_exchange: + {in r3, res[r6]; ldc r0, 0} + {std r0, r3, r4[S_D_FRAME_POINTER_AND_INDEX_0]} + + get_the_new_metadata_pointer: + {in r3, res[r6]} {stw r3, r4[S_METADATA_POINTER_0]} std r0, r0, r3[0] //set the frame sig bits to 0 - std r0, r0, r3[1] //set the frame sig bits to 0 - - ldw r4, sp[S_C_OUTPUT] - outct res[r4], 8 //WARNING: do not change this - no_overlapping_frames_done: - bu do_the_rest - - overlapping_frames: - - ldaw r4, sp[S_FRAME_OFFSET] - ldw r6, r4[S_FRAME_1_INDEX] //load second frame index - - ldaw r5, sp[S_OUTPUT_STORAGE]; - ldd r0, r1, r5[S_D_OUTPUT_STORAGE_01]; - ldd r2, r3, r5[S_D_OUTPUT_STORAGE_23]; - - APPLY_WINDOW_FN(1) - ldaw r4, sp[S_FRAME_OFFSET] - ldw r6, r4[S_FRAME_1_INDEX] //load second frame index - - ldaw r5, sp[S_SETTINGS_OFFSET] - ldw r5, r5[S_INDEX_BITREVERSING_ENABLED] - { bt r5, index_bitrev_order_1 ; eq r5, r5, 2 } - index_normal_order_1: - SAVE_SAMPLES_NORMAL(S_FRAME_POINTER_1, 1); - bu save_to_frame_complete_1 - index_bitrev_order_1: - bt r5, index_packed_1 - SAVE_SAMPLES_BIT_REVERSED(S_FRAME_POINTER_1); - bu save_to_frame_complete_1 - index_packed_1: - SAVE_SAMPLES_PACKED(S_FRAME_POINTER_1); - save_to_frame_complete_1: - - ldw r5, r4[S_FRAME_1_INDEX] - {add r5, r5, 1;ldw r6, r4[S_FRAME_SIZE]} - stw r5, r4[S_FRAME_1_INDEX] - - - {eq r5, r5, r6; ldc r6, 0} - bf r5, do_the_rest; - at_the_end_of_the_overlapping_half_frame: - - //set the metadata - ldw r5, r4[S_METADATA_POINTER_0] - ldw r3, r4[S_FRAME_NUMBER] + std r0, r0, r3[1] //set the frame sig bits to 0 + + ldw r4, sp[S_C_OUTPUT] + outct res[r4], 8 //WARNING: do not change this + no_overlapping_frames_done: + bu do_the_rest + + overlapping_frames: + + ldaw r4, sp[S_FRAME_OFFSET] + ldw r6, r4[S_FRAME_1_INDEX] //load second frame index + + ldaw r5, sp[S_OUTPUT_STORAGE]; + ldd r0, r1, r5[S_D_OUTPUT_STORAGE_01]; + ldd r2, r3, r5[S_D_OUTPUT_STORAGE_23]; + + APPLY_WINDOW_FN(1) + ldaw r4, sp[S_FRAME_OFFSET] + ldw r6, r4[S_FRAME_1_INDEX] //load second frame index + + ldaw r5, sp[S_SETTINGS_OFFSET] + ldw r5, r5[S_INDEX_BITREVERSING_ENABLED] + { bt r5, index_bitrev_order_1 ; eq r5, r5, 2 } + index_normal_order_1: + SAVE_SAMPLES_NORMAL(S_FRAME_POINTER_1, 1); + bu save_to_frame_complete_1 + index_bitrev_order_1: + bt r5, index_packed_1 + SAVE_SAMPLES_BIT_REVERSED(S_FRAME_POINTER_1); + bu save_to_frame_complete_1 + index_packed_1: + SAVE_SAMPLES_PACKED(S_FRAME_POINTER_1); + save_to_frame_complete_1: + + ldw r5, r4[S_FRAME_1_INDEX] + {add r5, r5, 1;ldw r6, r4[S_FRAME_SIZE]} + stw r5, r4[S_FRAME_1_INDEX] + + + {eq r5, r5, r6; ldc r6, 0} + bf r5, do_the_rest; + at_the_end_of_the_overlapping_half_frame: + + //set the metadata + ldw r5, r4[S_METADATA_POINTER_0] + ldw r3, r4[S_FRAME_NUMBER] {stw r3, r5[4]; add r3, r3, 1} //if metadata layout changes then this needs to change too - stw r3, r4[S_FRAME_NUMBER] //and save the new frame number back - - ldw r6, sp[S_C_OUTPUT] - outct res[r6], 8 //WARNING: do not change this - - stw r3, r4[S_FRAME_NUMBER] //and save the new frame number back - inct r11, res[r6] - - eq r5, r11, EXCHANGE_BUFFERS //if the incomming token is EXCHANGE_BUFFERS then do that - {bt r5, flip_the_frames; eq r5, r11, CONFIGURE_DECIMATOR} //if the incomming token is CONFIGURE_DECIMATOR then do that - - ldap r11, configure - stw r11, sp[0] - ldw lr, sp[0] - retsp 0 - -.align 8 - flip_the_frames: - ldd r3, r5, r4[S_D_FRAME_POINTER_AND_INDEX_0] - std r3, r5, r4[S_D_FRAME_POINTER_AND_INDEX_1] - - ldw r5, r4[S_METADATA_POINTER_1] - {in r3, res[r6]; ldc r0, 0} - stw r5, r4[S_METADATA_POINTER_0] - - {std r0, r3, r4[S_D_FRAME_POINTER_AND_INDEX_0]} - - get_the_new_metadata_pointer2: - {in r5, res[r6];ldw r7, sp[S_C_OUTPUT]} + stw r3, r4[S_FRAME_NUMBER] //and save the new frame number back + + ldw r6, sp[S_C_OUTPUT] + outct res[r6], 8 //WARNING: do not change this + + stw r3, r4[S_FRAME_NUMBER] //and save the new frame number back + inct r11, res[r6] + + eq r5, r11, EXCHANGE_BUFFERS //if the incomming token is EXCHANGE_BUFFERS then do that + {bt r5, flip_the_frames; eq r5, r11, CONFIGURE_DECIMATOR} //if the incomming token is CONFIGURE_DECIMATOR then do that + + ldap r11, configure + stw r11, sp[0] + ldw lr, sp[0] + retsp 0 + +.align 8 + flip_the_frames: + ldd r3, r5, r4[S_D_FRAME_POINTER_AND_INDEX_0] + std r3, r5, r4[S_D_FRAME_POINTER_AND_INDEX_1] + + ldw r5, r4[S_METADATA_POINTER_1] + {in r3, res[r6]; ldc r0, 0} + stw r5, r4[S_METADATA_POINTER_0] + + {std r0, r3, r4[S_D_FRAME_POINTER_AND_INDEX_0]} + + get_the_new_metadata_pointer2: + {in r5, res[r6];ldw r7, sp[S_C_OUTPUT]} {outct res[r7], 8; stw r5, r4[S_METADATA_POINTER_1]}//WARNING: do not change this std r0, r0, r5[0] //reset the frame sig bits to 0 - std r0, r0, r5[1] //reset the frame sig bits to 0 + std r0, r0, r5[1] //reset the frame sig bits to 0 async_send_samples: ldc r6, 0 @@ -1019,13 +1019,13 @@ internal_channel_overwrite_begin: out res[r9], r11 bu do_the_rest - - do_the_rest: + + do_the_rest: ldaw r11, sp[S_THIRD_STAGE] - ldaw r4, sp[S_FRAME_OFFSET] + ldaw r4, sp[S_FRAME_OFFSET] + - //copy accumulators to store {ldw r10, r4[S_FRAME_GAIN]; ldc r7, 0} {lss r5, r10, r7; ldc r7, 32} @@ -1057,322 +1057,322 @@ frame_gain_complete: ldaw r4, sp[S_OUTPUT_STORAGE]; std r0, r1, r4[S_D_OUTPUT_STORAGE_01]; std r2, r3, r4[S_D_OUTPUT_STORAGE_23]; - - //increment S_THIRD_STAGE_COEFS_PHASE mod THIRD_STAGE_COEFS_PER_STAGE - {ldw r1, r11[S_THIRD_STAGE_COEFS_PHASE]; ldc r9, THIRD_STAGE_COEFS_PER_STAGE} + + //increment S_THIRD_STAGE_COEFS_PHASE mod THIRD_STAGE_COEFS_PER_STAGE + {ldw r1, r11[S_THIRD_STAGE_COEFS_PHASE]; ldc r9, THIRD_STAGE_COEFS_PER_STAGE} //set S_THIRD_STAGE_PHASE to 0 - {add r1, r1, 1} - {stw r10, r11[S_THIRD_STAGE_PHASE];lsu r9, r1, r9} - //reset accumulators - - std r10, r10, r11[S_D_THIRD_STAGE_CH0_ACC] - std r10, r10, r11[S_D_THIRD_STAGE_CH1_ACC] - mul r1, r1, r9 - std r10, r10, r11[S_D_THIRD_STAGE_CH2_ACC] - std r10, r10, r11[S_D_THIRD_STAGE_CH3_ACC] - - stw r1, r11[S_THIRD_STAGE_COEFS_PHASE] - retsp 0 - -.cc_bottom post_process.function - .set post_process.nstackwords, 0 - .globl post_process.nstackwords - .set post_process.maxcores,1 - .globl post_process.maxcores - .set post_process.maxtimers,0 - .globl post_process.maxtimers - .set post_process.maxchanends,0 - .globl post_process.maxchanends -.Lpost_process_tmp: - .size post_process, .Lpost_process_tmp-post_process - .cfi_endproc - -/////////////////////////////////////////////////////////////////////////////////////////// - -.globl divide_by_four -.align 8 -.type divide_by_four,@function -.cc_top divide_by_four.function -divide_by_four: - .cfi_startproc - .issue_mode dual - - ldaw r10, sp[S_THIRD_STAGE] - {ldw r8, sp[S_CHAN_COUNT];ldc r11, THIRD_STAGE_COEFS_PER_STAGE} //r11 = 32 - mul r8, r8, r11 - {ldw r11, r10[S_THIRD_STAGE_PHASE]; shl r6, r11, 2} - mul r8, r8, r11 - ldw r11, r10[S_THIRD_STAGE_COEFS_PHASE] - {ldw r10, r10[S_THIRD_STAGE_DATA_POINTER];add r8, r8, r11} - ldaw r10, r10[r8] - - ldw r8, sp[S_CHAN_COUNT] - {dualentsp 1; eq r9, r8, 4} - ldaw r11, cp[g_second_stage_fir] //------------------------------------------------------------------------------------------------------------ This needs fixing - - {bt r9, mic_array_decimate_to_pcm_4ch_0; eq r9, r8, 3} - {bt r9, mic_array_decimate_to_pcm_4ch_1; eq r9, r8, 2} - {bt r9, mic_array_decimate_to_pcm_4ch_2; eq r9, r8, 1} - {bt r9, mic_array_decimate_to_pcm_4ch_3;} - bu divide_by_four_complete - - //r11 = coefs, r10 = pointer to where to put result, r7 = data, r6 = THIRD_STAGE_COEFS_PER_STAGE*sizeof(int) -mic_array_decimate_to_pcm_4ch_0: - {bl mic_array_decimate_to_pcm_4ch_fir_impl ; }// data, coefs need to be set; h, l, c0, c1, d0, d1; return h(r0), l(r1); - {stw r0, r10[0]; add r10, r10, r6} -mic_array_decimate_to_pcm_4ch_1: - {bl mic_array_decimate_to_pcm_4ch_fir_impl ; add r7, r7, r6}// data, coefs need to be set; h, l, c0, c1, d0, d1; return h(r0), l(r1); - {stw r0, r10[0]; add r10, r10, r6} -mic_array_decimate_to_pcm_4ch_2: - {bl mic_array_decimate_to_pcm_4ch_fir_impl ; add r7, r7, r6}// data, coefs need to be set; h, l, c0, c1, d0, d1; return h(r0), l(r1); - {stw r0, r10[0]; add r10, r10, r6} -mic_array_decimate_to_pcm_4ch_3: - {bl mic_array_decimate_to_pcm_4ch_fir_impl ; add r7, r7, r6}// data, coefs need to be set; h, l, c0, c1, d0, d1; return h(r0), l(r1); - stw r0, r10[0] - -divide_by_four_complete: - retsp 1 - .cc_bottom divide_by_four.function - .set divide_by_four.nstackwords, 1 - .globl divide_by_four.nstackwords - .set divide_by_four.maxcores,1 - .globl divide_by_four.maxcores - .set divide_by_four.maxtimers,0 - .globl divide_by_four.maxtimers - .set divide_by_four.maxchanends,0 - .globl divide_by_four.maxchanends -.Ldivide_by_four_tmp: - .size divide_by_four, .Ldivide_by_four_tmp-divide_by_four - .cfi_endproc - -/////////////////////////////////////////////////////////////////////////////////////////// - -////This is the actual implementation of the third stage FIR - -.globl third_stage_fir_impl -.align 8 -.type third_stage_fir_impl,@function -.cc_top third_stage_fir_impl.function -third_stage_fir_impl: - .cfi_startproc - .issue_mode dual - -#define TWO_TAPS(I, COEFS, H0, L0, DATA0, H1, L1, DATA1)\ - {ldw r0, COEFS[0]; add COEFS, COEFS, 4};\ - ldd r2, r1, DATA0[I];\ - ldd r4, r3, DATA1[I];\ - maccs H0, L0, r1, r0;\ - maccs H1, L1, r3, r0;\ - {ldw r0, COEFS[0]; add COEFS, COEFS, 4};\ - maccs H0, L0, r2, r0;\ - maccs H1, L1, r4, r0 - - TWO_TAPS(0, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(1, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(2, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(3, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(4, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(5, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(6, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(7, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(8, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(9, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(10, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(11, r9, r8, r7, r10, r6, r5, r11) - ldc r0, 4*24 - {add r10, r10, r0; add r11, r11, r0} - TWO_TAPS(0, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(1, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(2, r9, r8, r7, r10, r6, r5, r11) - TWO_TAPS(3, r9, r8, r7, r10, r6, r5, r11) - retsp 0 - .cc_bottom third_stage_fir_impl.function - .set third_stage_fir_impl.nstackwords, 0 - .globl third_stage_fir_impl.nstackwords - .set third_stage_fir_impl.maxcores,1 - .globl third_stage_fir_impl.maxcores - .set third_stage_fir_impl.maxtimers,0 - .globl third_stage_fir_impl.maxtimers - .set third_stage_fir_impl.maxchanends,0 - .globl third_stage_fir_impl.maxchanends -.Lthird_stage_fir_impl_tmp: - .size third_stage_fir_impl, .Lthird_stage_fir_impl_tmp-third_stage_fir_impl - .cfi_endproc - -/////////////////////////////////////////////////////////////////////////////////////////// - -////This is the actual implementation of the second stage FIR - -.globl mic_array_decimate_to_pcm_4ch_fir_impl -.align 8 -.type mic_array_decimate_to_pcm_4ch_fir_impl,@function -.cc_top mic_array_decimate_to_pcm_4ch_fir_impl.function -mic_array_decimate_to_pcm_4ch_fir_impl: - .cfi_startproc - .issue_mode dual - - //r0, r1, r2, r3, r4, r5, r6 = 31, r7, r10, r11, - {ldc r0, 0; ldc r1, 0} //Initialise the accumulators - - ldd r3, r2, r11[0] - ldd r5, r4, r7[0] - maccs r0, r1, r4, r2 - maccs r0, r1, r5, r3 - ldd r5, r4, r7[7] - maccs r0, r1, r4, r3 - maccs r0, r1, r5, r2 - - ldd r3, r2, r11[1] - ldd r5, r4, r7[1] - maccs r0, r1, r4, r2 - maccs r0, r1, r5, r3 - ldd r5, r4, r7[6] - maccs r0, r1, r4, r3 - maccs r0, r1, r5, r2 - - ldd r3, r2, r11[2] - ldd r5, r4, r7[2] - maccs r0, r1, r4, r2 - maccs r0, r1, r5, r3 - ldd r5, r4, r7[5] - maccs r0, r1, r4, r3 - maccs r0, r1, r5, r2 - - ldd r3, r2, r11[3] - ldd r5, r4, r7[3] - maccs r0, r1, r4, r2 - maccs r0, r1, r5, r3 - ldd r5, r4, r7[4] - maccs r0, r1, r4, r3 - maccs r0, r1, r5, r2 - - retsp 0 - .cc_bottom mic_array_decimate_to_pcm_4ch_fir_impl.function - .set mic_array_decimate_to_pcm_4ch_fir_impl.nstackwords, 0 - .globl mic_array_decimate_to_pcm_4ch_fir_impl.nstackwords - .set mic_array_decimate_to_pcm_4ch_fir_impl.maxcores,1 - .globl mic_array_decimate_to_pcm_4ch_fir_impl.maxcores - .set mic_array_decimate_to_pcm_4ch_fir_impl.maxtimers,0 - .globl mic_array_decimate_to_pcm_4ch_fir_impl.maxtimers - .set mic_array_decimate_to_pcm_4ch_fir_impl.maxchanends,0 - .globl mic_array_decimate_to_pcm_4ch_fir_impl.maxchanends -.Lmic_array_decimate_to_pcm_4ch_fir_impl_tmp: - .size mic_array_decimate_to_pcm_4ch_fir_impl, .Lmic_array_decimate_to_pcm_4ch_fir_impl_tmp-mic_array_decimate_to_pcm_4ch_fir_impl - -.cfi_endproc - -//This is for testing the DC offset elimination -//void dc_eliminate_tester(int &x, long long &y) -.globl dc_eliminate_tester -.align 8 -.type dc_eliminate_tester,@function -.cc_top dc_eliminate_tester.function -dc_eliminate_tester: - .cfi_startproc - .issue_mode dual - dualentsp 16 - - std r4, r5, sp[2] - std r6, r7, sp[3] - std r8, r9, sp[4] - std r10, r11, sp[5] - - ldw r2, r0[0] - mov r4, r1 - DC_OFFSET_REMOVAL(S_D_PREV_Y_0, S_PREV_X_0, r2); //TODO setup the last arg - stw r2, r0[0] - - ldd r4, r5, sp[2] - ldd r6, r7, sp[3] - ldd r8, r9, sp[4] - ldd r10, r11, sp[5] - - retsp 16 - .cc_bottom dc_eliminate_tester.function - .set dc_eliminate_tester.nstackwords, 16 - .globl dc_eliminate_tester.nstackwords - .set dc_eliminate_tester.maxcores,1 - .globl dc_eliminate_tester.maxcores - .set dc_eliminate_tester.maxtimers,0 - .globl dc_eliminate_tester.maxtimers - .set dc_eliminate_tester.maxchanends,0 - .globl dc_eliminate_tester.maxchanends -.Ldc_eliminate_tester_tmp: - .size dc_eliminate_tester, .Ldc_eliminate_tester_tmp-dc_eliminate_tester - .cfi_endproc - - //endpoint section - -#define STRINGIZE(S) #S - -#define EP_MARCO(ID, PASS) \ - .ascii STRINGIZE( input_ ## ID ## _ ## PASS ## _ep ) ;\ - .byte 0 ;\ - .ascii "" ;\ - .byte 0 ;\ - .long 0 ;\ - .long .Laddr_end ## ID ## _ ## PASS ## -.Laddr_start ## ID ## _ ## PASS;\ - .Laddr_start ## ID ## _ ## PASS : ;\ - .cc_top cc_ ## ID ## _ ## PASS ## ,input_ ## ID ## _ ## PASS ;\ - .long input_ ## ID ## _ ## PASS ;\ - .byte 0 ;\ - .cc_bottom cc_ ## ID ## _ ## PASS ;\ - .Laddr_end ## ID ## _ ## PASS : - -#define EPT_MARCO(ID, PASS) \ -.cc_top cc_a ## ID ## _ ## PASS,input_ ## ID ## _ ## PASS ;\ - .ascii "";\ - .byte 0;\ - .long 0;\ - .long input_ ## ID ## _ ## PASS;\ -.cc_bottom cc_a ## ID ## _ ## PASS - - .section .xtaendpoint,"",@progbits -.Lentries_start0: - .long .Lentries_end0-.Lentries_start0 - .long 1 - .ascii "" - .byte 0 - EP_MARCO(0, 0) - EP_MARCO(1, 0) - EP_MARCO(2, 0) - EP_MARCO(3, 0) - EP_MARCO(4, 0) - EP_MARCO(5, 0) - EP_MARCO(6, 0) - EP_MARCO(7, 0) - EP_MARCO(0, 1) - EP_MARCO(1, 1) - EP_MARCO(2, 1) - EP_MARCO(3, 1) - EP_MARCO(4, 1) - EP_MARCO(5, 1) - EP_MARCO(6, 1) - EP_MARCO(7, 1) -.Laddr_end9: - -.Lentries_end0: - - .section .xtaendpointtable,"",@progbits -.Lentries_start1: - .long .Lentries_end1-.Lentries_start1 - .long 0 - .ascii "" - .byte 0 - EPT_MARCO(0, 0) - EPT_MARCO(1, 0) - EPT_MARCO(2, 0) - EPT_MARCO(3, 0) - EPT_MARCO(4, 0) - EPT_MARCO(5, 0) - EPT_MARCO(6, 0) - EPT_MARCO(7, 0) - EPT_MARCO(0, 1) - EPT_MARCO(1, 1) - EPT_MARCO(2, 1) - EPT_MARCO(3, 1) - EPT_MARCO(4, 1) - EPT_MARCO(5, 1) - EPT_MARCO(6, 1) - EPT_MARCO(7, 1) -.Lentries_end1: + {add r1, r1, 1} + {stw r10, r11[S_THIRD_STAGE_PHASE];lsu r9, r1, r9} + //reset accumulators + + std r10, r10, r11[S_D_THIRD_STAGE_CH0_ACC] + std r10, r10, r11[S_D_THIRD_STAGE_CH1_ACC] + mul r1, r1, r9 + std r10, r10, r11[S_D_THIRD_STAGE_CH2_ACC] + std r10, r10, r11[S_D_THIRD_STAGE_CH3_ACC] + + stw r1, r11[S_THIRD_STAGE_COEFS_PHASE] + retsp 0 + +.cc_bottom post_process.function + .set post_process.nstackwords, 0 + .globl post_process.nstackwords + .set post_process.maxcores,1 + .globl post_process.maxcores + .set post_process.maxtimers,0 + .globl post_process.maxtimers + .set post_process.maxchanends,0 + .globl post_process.maxchanends +.Lpost_process_tmp: + .size post_process, .Lpost_process_tmp-post_process + .cfi_endproc + +/////////////////////////////////////////////////////////////////////////////////////////// + +.globl divide_by_four +.align 8 +.type divide_by_four,@function +.cc_top divide_by_four.function +divide_by_four: + .cfi_startproc + .issue_mode dual + + ldaw r10, sp[S_THIRD_STAGE] + {ldw r8, sp[S_CHAN_COUNT];ldc r11, THIRD_STAGE_COEFS_PER_STAGE} //r11 = 32 + mul r8, r8, r11 + {ldw r11, r10[S_THIRD_STAGE_PHASE]; shl r6, r11, 2} + mul r8, r8, r11 + ldw r11, r10[S_THIRD_STAGE_COEFS_PHASE] + {ldw r10, r10[S_THIRD_STAGE_DATA_POINTER];add r8, r8, r11} + ldaw r10, r10[r8] + + ldw r8, sp[S_CHAN_COUNT] + {dualentsp 1; eq r9, r8, 4} + ldaw r11, cp[g_second_stage_fir] //------------------------------------------------------------------------------------------------------------ This needs fixing + + {bt r9, mic_array_decimate_to_pcm_4ch_0; eq r9, r8, 3} + {bt r9, mic_array_decimate_to_pcm_4ch_1; eq r9, r8, 2} + {bt r9, mic_array_decimate_to_pcm_4ch_2; eq r9, r8, 1} + {bt r9, mic_array_decimate_to_pcm_4ch_3;} + bu divide_by_four_complete + + //r11 = coefs, r10 = pointer to where to put result, r7 = data, r6 = THIRD_STAGE_COEFS_PER_STAGE*sizeof(int) +mic_array_decimate_to_pcm_4ch_0: + {bl mic_array_decimate_to_pcm_4ch_fir_impl ; }// data, coefs need to be set; h, l, c0, c1, d0, d1; return h(r0), l(r1); + {stw r0, r10[0]; add r10, r10, r6} +mic_array_decimate_to_pcm_4ch_1: + {bl mic_array_decimate_to_pcm_4ch_fir_impl ; add r7, r7, r6}// data, coefs need to be set; h, l, c0, c1, d0, d1; return h(r0), l(r1); + {stw r0, r10[0]; add r10, r10, r6} +mic_array_decimate_to_pcm_4ch_2: + {bl mic_array_decimate_to_pcm_4ch_fir_impl ; add r7, r7, r6}// data, coefs need to be set; h, l, c0, c1, d0, d1; return h(r0), l(r1); + {stw r0, r10[0]; add r10, r10, r6} +mic_array_decimate_to_pcm_4ch_3: + {bl mic_array_decimate_to_pcm_4ch_fir_impl ; add r7, r7, r6}// data, coefs need to be set; h, l, c0, c1, d0, d1; return h(r0), l(r1); + stw r0, r10[0] + +divide_by_four_complete: + retsp 1 + .cc_bottom divide_by_four.function + .set divide_by_four.nstackwords, 1 + .globl divide_by_four.nstackwords + .set divide_by_four.maxcores,1 + .globl divide_by_four.maxcores + .set divide_by_four.maxtimers,0 + .globl divide_by_four.maxtimers + .set divide_by_four.maxchanends,0 + .globl divide_by_four.maxchanends +.Ldivide_by_four_tmp: + .size divide_by_four, .Ldivide_by_four_tmp-divide_by_four + .cfi_endproc + +/////////////////////////////////////////////////////////////////////////////////////////// + +////This is the actual implementation of the third stage FIR + +.globl third_stage_fir_impl +.align 8 +.type third_stage_fir_impl,@function +.cc_top third_stage_fir_impl.function +third_stage_fir_impl: + .cfi_startproc + .issue_mode dual + +#define TWO_TAPS(I, COEFS, H0, L0, DATA0, H1, L1, DATA1)\ + {ldw r0, COEFS[0]; add COEFS, COEFS, 4};\ + ldd r2, r1, DATA0[I];\ + ldd r4, r3, DATA1[I];\ + maccs H0, L0, r1, r0;\ + maccs H1, L1, r3, r0;\ + {ldw r0, COEFS[0]; add COEFS, COEFS, 4};\ + maccs H0, L0, r2, r0;\ + maccs H1, L1, r4, r0 + + TWO_TAPS(0, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(1, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(2, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(3, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(4, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(5, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(6, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(7, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(8, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(9, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(10, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(11, r9, r8, r7, r10, r6, r5, r11) + ldc r0, 4*24 + {add r10, r10, r0; add r11, r11, r0} + TWO_TAPS(0, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(1, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(2, r9, r8, r7, r10, r6, r5, r11) + TWO_TAPS(3, r9, r8, r7, r10, r6, r5, r11) + retsp 0 + .cc_bottom third_stage_fir_impl.function + .set third_stage_fir_impl.nstackwords, 0 + .globl third_stage_fir_impl.nstackwords + .set third_stage_fir_impl.maxcores,1 + .globl third_stage_fir_impl.maxcores + .set third_stage_fir_impl.maxtimers,0 + .globl third_stage_fir_impl.maxtimers + .set third_stage_fir_impl.maxchanends,0 + .globl third_stage_fir_impl.maxchanends +.Lthird_stage_fir_impl_tmp: + .size third_stage_fir_impl, .Lthird_stage_fir_impl_tmp-third_stage_fir_impl + .cfi_endproc + +/////////////////////////////////////////////////////////////////////////////////////////// + +////This is the actual implementation of the second stage FIR + +.globl mic_array_decimate_to_pcm_4ch_fir_impl +.align 8 +.type mic_array_decimate_to_pcm_4ch_fir_impl,@function +.cc_top mic_array_decimate_to_pcm_4ch_fir_impl.function +mic_array_decimate_to_pcm_4ch_fir_impl: + .cfi_startproc + .issue_mode dual + + //r0, r1, r2, r3, r4, r5, r6 = 31, r7, r10, r11, + {ldc r0, 0; ldc r1, 0} //Initialise the accumulators + + ldd r3, r2, r11[0] + ldd r5, r4, r7[0] + maccs r0, r1, r4, r2 + maccs r0, r1, r5, r3 + ldd r5, r4, r7[7] + maccs r0, r1, r4, r3 + maccs r0, r1, r5, r2 + + ldd r3, r2, r11[1] + ldd r5, r4, r7[1] + maccs r0, r1, r4, r2 + maccs r0, r1, r5, r3 + ldd r5, r4, r7[6] + maccs r0, r1, r4, r3 + maccs r0, r1, r5, r2 + + ldd r3, r2, r11[2] + ldd r5, r4, r7[2] + maccs r0, r1, r4, r2 + maccs r0, r1, r5, r3 + ldd r5, r4, r7[5] + maccs r0, r1, r4, r3 + maccs r0, r1, r5, r2 + + ldd r3, r2, r11[3] + ldd r5, r4, r7[3] + maccs r0, r1, r4, r2 + maccs r0, r1, r5, r3 + ldd r5, r4, r7[4] + maccs r0, r1, r4, r3 + maccs r0, r1, r5, r2 + + retsp 0 + .cc_bottom mic_array_decimate_to_pcm_4ch_fir_impl.function + .set mic_array_decimate_to_pcm_4ch_fir_impl.nstackwords, 0 + .globl mic_array_decimate_to_pcm_4ch_fir_impl.nstackwords + .set mic_array_decimate_to_pcm_4ch_fir_impl.maxcores,1 + .globl mic_array_decimate_to_pcm_4ch_fir_impl.maxcores + .set mic_array_decimate_to_pcm_4ch_fir_impl.maxtimers,0 + .globl mic_array_decimate_to_pcm_4ch_fir_impl.maxtimers + .set mic_array_decimate_to_pcm_4ch_fir_impl.maxchanends,0 + .globl mic_array_decimate_to_pcm_4ch_fir_impl.maxchanends +.Lmic_array_decimate_to_pcm_4ch_fir_impl_tmp: + .size mic_array_decimate_to_pcm_4ch_fir_impl, .Lmic_array_decimate_to_pcm_4ch_fir_impl_tmp-mic_array_decimate_to_pcm_4ch_fir_impl + +.cfi_endproc + +//This is for testing the DC offset elimination +//void dc_eliminate_tester(int &x, long long &y) +.globl dc_eliminate_tester +.align 8 +.type dc_eliminate_tester,@function +.cc_top dc_eliminate_tester.function +dc_eliminate_tester: + .cfi_startproc + .issue_mode dual + dualentsp 16 + + std r4, r5, sp[2] + std r6, r7, sp[3] + std r8, r9, sp[4] + std r10, r11, sp[5] + + ldw r2, r0[0] + mov r4, r1 + DC_OFFSET_REMOVAL(S_D_PREV_Y_0, S_PREV_X_0, r2); //TODO setup the last arg + stw r2, r0[0] + + ldd r4, r5, sp[2] + ldd r6, r7, sp[3] + ldd r8, r9, sp[4] + ldd r10, r11, sp[5] + + retsp 16 + .cc_bottom dc_eliminate_tester.function + .set dc_eliminate_tester.nstackwords, 16 + .globl dc_eliminate_tester.nstackwords + .set dc_eliminate_tester.maxcores,1 + .globl dc_eliminate_tester.maxcores + .set dc_eliminate_tester.maxtimers,0 + .globl dc_eliminate_tester.maxtimers + .set dc_eliminate_tester.maxchanends,0 + .globl dc_eliminate_tester.maxchanends +.Ldc_eliminate_tester_tmp: + .size dc_eliminate_tester, .Ldc_eliminate_tester_tmp-dc_eliminate_tester + .cfi_endproc + + //endpoint section + +#define STRINGIZE(S) #S + +#define EP_MARCO(ID, PASS) \ + .ascii STRINGIZE( input_ ## ID ## _ ## PASS ## _ep ) ;\ + .byte 0 ;\ + .ascii "" ;\ + .byte 0 ;\ + .long 0 ;\ + .long .Laddr_end ## ID ## _ ## PASS ## -.Laddr_start ## ID ## _ ## PASS;\ + .Laddr_start ## ID ## _ ## PASS : ;\ + .cc_top cc_ ## ID ## _ ## PASS ## ,input_ ## ID ## _ ## PASS ;\ + .long input_ ## ID ## _ ## PASS ;\ + .byte 0 ;\ + .cc_bottom cc_ ## ID ## _ ## PASS ;\ + .Laddr_end ## ID ## _ ## PASS : + +#define EPT_MARCO(ID, PASS) \ +.cc_top cc_a ## ID ## _ ## PASS,input_ ## ID ## _ ## PASS ;\ + .ascii "";\ + .byte 0;\ + .long 0;\ + .long input_ ## ID ## _ ## PASS;\ +.cc_bottom cc_a ## ID ## _ ## PASS + + .section .xtaendpoint,"",@progbits +.Lentries_start0: + .long .Lentries_end0-.Lentries_start0 + .long 1 + .ascii "" + .byte 0 + EP_MARCO(0, 0) + EP_MARCO(1, 0) + EP_MARCO(2, 0) + EP_MARCO(3, 0) + EP_MARCO(4, 0) + EP_MARCO(5, 0) + EP_MARCO(6, 0) + EP_MARCO(7, 0) + EP_MARCO(0, 1) + EP_MARCO(1, 1) + EP_MARCO(2, 1) + EP_MARCO(3, 1) + EP_MARCO(4, 1) + EP_MARCO(5, 1) + EP_MARCO(6, 1) + EP_MARCO(7, 1) +.Laddr_end9: + +.Lentries_end0: + + .section .xtaendpointtable,"",@progbits +.Lentries_start1: + .long .Lentries_end1-.Lentries_start1 + .long 0 + .ascii "" + .byte 0 + EPT_MARCO(0, 0) + EPT_MARCO(1, 0) + EPT_MARCO(2, 0) + EPT_MARCO(3, 0) + EPT_MARCO(4, 0) + EPT_MARCO(5, 0) + EPT_MARCO(6, 0) + EPT_MARCO(7, 0) + EPT_MARCO(0, 1) + EPT_MARCO(1, 1) + EPT_MARCO(2, 1) + EPT_MARCO(3, 1) + EPT_MARCO(4, 1) + EPT_MARCO(5, 1) + EPT_MARCO(6, 1) + EPT_MARCO(7, 1) +.Lentries_end1: diff --git a/lib_mic_array/src/mic_array_dual.xc b/lib_mic_array/src/mic_array_dual.xc index 1fab984d..193deb3c 100644 --- a/lib_mic_array/src/mic_array_dual.xc +++ b/lib_mic_array/src/mic_array_dual.xc @@ -9,15 +9,12 @@ extern const int [[aligned(8)]] g_third_stage_div_6_fir_dual[192]; //From fir_coefs_dual.xc. We make a LL aligned copy of this #include "dsp_qformat.h" //Gain compensation - #if (defined(MIC_DUAL_ENABLED) && (MIC_DUAL_ENABLED == 0)) #undef MIC_DUAL_ENABLED #endif #ifndef MIC_DUAL_ENABLED - #ifndef MIC_DUAL_FRAME_SIZE - #define MIC_DUAL_FRAME_SIZE 1 - #else + #ifdef MIC_DUAL_FRAME_SIZE #error "Set MIC_DUAL_ENABLED to 1." #endif #endif @@ -306,13 +303,6 @@ static int dc_eliminate(int x, int &prev_x, long long &state){ #pragma unsafe arrays void mic_dual_pdm_rx_decimate(buffered in port:32 p_pdm_mic, streaming chanend c_2x_pdm_mic, streaming chanend c_ref_audio[]){ -#else - -#pragma unsafe arrays -void mic_dual_pdm_rx_decimate_DEFAULT_DEFINES(buffered in port:32 p_pdm_mic, streaming chanend c_2x_pdm_mic, streaming chanend c_ref_audio[]){ - -#endif - //Send initial request to UBM c_ref_audio[0] <: 0; c_ref_audio[1] <: 0; @@ -454,3 +444,4 @@ void mic_dual_pdm_rx_decimate_DEFAULT_DEFINES(buffered in port:32 p_pdm_mic, str //printintln(t1-t0); } } +#endif