diff options
Diffstat (limited to 'drivers/misc/echo/echo.c')
| -rw-r--r-- | drivers/misc/echo/echo.c | 73 | 
1 files changed, 0 insertions, 73 deletions
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c index 9597e9523cac..8a5adc0d2e88 100644 --- a/drivers/misc/echo/echo.c +++ b/drivers/misc/echo/echo.c @@ -115,78 +115,6 @@  /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ -#ifdef __bfin__ -static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) -{ -	int i; -	int offset1; -	int offset2; -	int factor; -	int exp; -	int16_t *phist; -	int n; - -	if (shift > 0) -		factor = clean << shift; -	else -		factor = clean >> -shift; - -	/* Update the FIR taps */ - -	offset2 = ec->curr_pos; -	offset1 = ec->taps - offset2; -	phist = &ec->fir_state_bg.history[offset2]; - -	/* st: and en: help us locate the assembler in echo.s */ - -	/* asm("st:"); */ -	n = ec->taps; -	for (i = 0; i < n; i++) { -		exp = *phist++ * factor; -		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); -	} -	/* asm("en:"); */ - -	/* Note the asm for the inner loop above generated by Blackfin gcc -	   4.1.1 is pretty good (note even parallel instructions used): - -	   R0 = W [P0++] (X); -	   R0 *= R2; -	   R0 = R0 + R3 (NS) || -	   R1 = W [P1] (X) || -	   nop; -	   R0 >>>= 15; -	   R0 = R0 + R1; -	   W [P1++] = R0; - -	   A block based update algorithm would be much faster but the -	   above can't be improved on much.  Every instruction saved in -	   the loop above is 2 MIPs/ch!  The for loop above is where the -	   Blackfin spends most of it's time - about 17 MIPs/ch measured -	   with speedtest.c with 256 taps (32ms).  Write-back and -	   Write-through cache gave about the same performance. -	 */ -} - -/* -   IDEAS for further optimisation of lms_adapt_bg(): - -   1/ The rounding is quite costly.  Could we keep as 32 bit coeffs -   then make filter pluck the MS 16-bits of the coeffs when filtering? -   However this would lower potential optimisation of filter, as I -   think the dual-MAC architecture requires packed 16 bit coeffs. - -   2/ Block based update would be more efficient, as per comments above, -   could use dual MAC architecture. - -   3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC -   packing. - -   4/ Execute the whole e/c in a block of say 20ms rather than sample -   by sample.  Processing a few samples every ms is inefficient. -*/ - -#else  static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)  {  	int i; @@ -215,7 +143,6 @@ static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)  		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);  	}  } -#endif  static inline int top_bit(unsigned int bits)  {  | 
