2-D FIR Implementation #2 on C6x
; registers: A5=&a(0,0) B5=&x(n1,n2) A2=M B7=M2 B8=N2 fir2d2 SUB .D2 B8,B7,B9 ; byte offset between rows
|| ZERO .L1 A4 ; initialize accumulator|| SUB .L2 B7,1,B7 ; B7 = numFilCols - 1|| ZERO .S2 B2 ; offset into image data
fir2 LDBU .D1 *A5++,A6 ; load a(m1,m2), zero fill
|| LDBU .D2 *B6[B2],B6 ; load x(n1-m1,n2-m2)
|| MPYU .M1X A6,B6,A3 ; A3=a(m1,m2) x(n1-m1,n2-m2)
|| ADD .L1 A3,A4,A4 ; y(n1,n2) += A3
|| CMPLT.L2 B2,B7,B1 ; need to go to next row?|| ADD .S2 B2,1,B2 ; incr offset into image
[!B1] ADD .L2 B2,B9,B2 ; move offset to next row||[A2] SUB .S1 A2,1,A2 ; decrement loop counter
||[A2] B .S2 fir2 ; if A2 != 0, then branch