00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include "arm_math.h"
00033
00034
00065 void arm_fir_sparse_q7(
00066 arm_fir_sparse_instance_q7 * S,
00067 q7_t * pSrc,
00068 q7_t * pDst,
00069 q7_t * pScratchIn,
00070 q31_t * pScratchOut,
00071 uint32_t blockSize)
00072 {
00073
00074 q7_t *pState = S->pState;
00075 q7_t *pCoeffs = S->pCoeffs;
00076 q7_t *px;
00077 q7_t *py = pState;
00078 q7_t *pb = pScratchIn;
00079 q7_t *pOut = pDst;
00080 int32_t *pTapDelay = S->pTapDelay;
00081 uint32_t delaySize = S->maxDelay + blockSize;
00082 uint16_t numTaps = S->numTaps;
00083 int32_t readIndex;
00084 uint32_t tapCnt, blkCnt;
00085 q7_t coeff = *pCoeffs++;
00086 q31_t *pScr2 = pScratchOut;
00087 q31_t in;
00088
00089
00090 #ifndef ARM_MATH_CM0
00091
00092
00093
00094 q7_t in1, in2, in3, in4;
00095
00096
00097
00098 arm_circularWrite_q7(py, (int32_t) delaySize, &S->stateIndex, 1, pSrc, 1,
00099 blockSize);
00100
00101
00102 tapCnt = numTaps;
00103
00104
00105 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
00106
00107
00108 if(readIndex < 0)
00109 {
00110 readIndex += (int32_t) delaySize;
00111 }
00112
00113
00114 py = pState;
00115
00116
00117 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb,
00118 (int32_t) blockSize, 1, blockSize);
00119
00120
00121 px = pb;
00122
00123
00124 pScratchOut = pScr2;
00125
00126
00127
00128 blkCnt = blockSize >> 2;
00129
00130 while(blkCnt > 0u)
00131 {
00132
00133 *pScratchOut++ = ((q31_t) * px++ * coeff);
00134 *pScratchOut++ = ((q31_t) * px++ * coeff);
00135 *pScratchOut++ = ((q31_t) * px++ * coeff);
00136 *pScratchOut++ = ((q31_t) * px++ * coeff);
00137
00138
00139 blkCnt--;
00140 }
00141
00142
00143
00144 blkCnt = blockSize % 0x4u;
00145
00146 while(blkCnt > 0u)
00147 {
00148
00149 *pScratchOut++ = ((q31_t) * px++ * coeff);
00150
00151
00152 blkCnt--;
00153 }
00154
00155
00156
00157 coeff = *pCoeffs++;
00158
00159
00160 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
00161
00162
00163 if(readIndex < 0)
00164 {
00165 readIndex += (int32_t) delaySize;
00166 }
00167
00168
00169 tapCnt = (uint32_t) numTaps - 1u;
00170
00171 while(tapCnt > 0u)
00172 {
00173
00174 py = pState;
00175
00176
00177 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb,
00178 (int32_t) blockSize, 1, blockSize);
00179
00180
00181 px = pb;
00182
00183
00184 pScratchOut = pScr2;
00185
00186
00187
00188 blkCnt = blockSize >> 2;
00189
00190 while(blkCnt > 0u)
00191 {
00192
00193 in = *pScratchOut + ((q31_t) * px++ * coeff);
00194 *pScratchOut++ = in;
00195 in = *pScratchOut + ((q31_t) * px++ * coeff);
00196 *pScratchOut++ = in;
00197 in = *pScratchOut + ((q31_t) * px++ * coeff);
00198 *pScratchOut++ = in;
00199 in = *pScratchOut + ((q31_t) * px++ * coeff);
00200 *pScratchOut++ = in;
00201
00202
00203 blkCnt--;
00204 }
00205
00206
00207
00208 blkCnt = blockSize % 0x4u;
00209
00210 while(blkCnt > 0u)
00211 {
00212
00213 in = *pScratchOut + ((q31_t) * px++ * coeff);
00214 *pScratchOut++ = in;
00215
00216
00217 blkCnt--;
00218 }
00219
00220
00221
00222 coeff = *pCoeffs++;
00223
00224
00225 readIndex = ((int32_t) S->stateIndex -
00226 (int32_t) blockSize) - *pTapDelay++;
00227
00228
00229 if(readIndex < 0)
00230 {
00231 readIndex += (int32_t) delaySize;
00232 }
00233
00234
00235 tapCnt--;
00236 }
00237
00238
00239
00240
00241 blkCnt = blockSize >> 2;
00242
00243 while(blkCnt > 0u)
00244 {
00245 in1 = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00246 in2 = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00247 in3 = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00248 in4 = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00249
00250 *__SIMD32(pOut)++ = __PACKq7(in1, in2, in3, in4);
00251
00252
00253 blkCnt--;
00254 }
00255
00256
00257
00258 blkCnt = blockSize % 0x4u;
00259
00260 while(blkCnt > 0u)
00261 {
00262 *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00263
00264
00265 blkCnt--;
00266 }
00267
00268 #else
00269
00270
00271
00272
00273
00274 arm_circularWrite_q7(py, (int32_t) delaySize, &S->stateIndex, 1, pSrc, 1,
00275 blockSize);
00276
00277
00278 tapCnt = numTaps;
00279
00280
00281 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
00282
00283
00284 if(readIndex < 0)
00285 {
00286 readIndex += (int32_t) delaySize;
00287 }
00288
00289
00290 py = pState;
00291
00292
00293 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb,
00294 (int32_t) blockSize, 1, blockSize);
00295
00296
00297 px = pb;
00298
00299
00300 pScratchOut = pScr2;
00301
00302
00303 blkCnt = blockSize;
00304
00305 while(blkCnt > 0u)
00306 {
00307
00308 *pScratchOut++ = ((q31_t) * px++ * coeff);
00309
00310
00311 blkCnt--;
00312 }
00313
00314
00315
00316 coeff = *pCoeffs++;
00317
00318
00319 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
00320
00321
00322 if(readIndex < 0)
00323 {
00324 readIndex += (int32_t) delaySize;
00325 }
00326
00327
00328 tapCnt = (uint32_t) numTaps - 1u;
00329
00330 while(tapCnt > 0u)
00331 {
00332
00333 py = pState;
00334
00335
00336 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb,
00337 (int32_t) blockSize, 1, blockSize);
00338
00339
00340 px = pb;
00341
00342
00343 pScratchOut = pScr2;
00344
00345
00346 blkCnt = blockSize;
00347
00348 while(blkCnt > 0u)
00349 {
00350
00351 in = *pScratchOut + ((q31_t) * px++ * coeff);
00352 *pScratchOut++ = in;
00353
00354
00355 blkCnt--;
00356 }
00357
00358
00359
00360 coeff = *pCoeffs++;
00361
00362
00363 readIndex =
00364 ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
00365
00366
00367 if(readIndex < 0)
00368 {
00369 readIndex += (int32_t) delaySize;
00370 }
00371
00372
00373 tapCnt--;
00374 }
00375
00376
00377
00378
00379 blkCnt = blockSize;
00380
00381 while(blkCnt > 0u)
00382 {
00383 *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00384
00385
00386 blkCnt--;
00387 }
00388
00389 #endif
00390
00391 }
00392