; Version: @(#)fir2d3.asm 1.4 07/07/99 ; ; Compute y(n1,n2) = h(n1,n2) ** x(n1,n2) at one index n1, n2. ; The filter h(n1,n2) is an M1 x M2 filter having M = M1 M2 ; coefficients. The image x(n1,n2) is N1 x N2. ; ; Registers: A5=&a(0,0) B5=&x(n1,n2) A2=M B7=M2 B8=N2 fir2d3 ZERO .D1 A4 ; initialize accumulator #1 || SUB .D2 B8,B7,B9 ; index offset between rows || ZERO .L2 B2 ; offset into image data || MVKH .S1 0xFF,A8 ; mask to get lowest 8 bits || SHR .S2 B7,1,B7 ; divide by 2 (indexed 16-bit address) ZERO .D2 B4 ; initialize accumulator #2 || ZERO .L1 A6 ; current coefficient value || ZERO .L2 B6 ; current image value || SHR .S1 A2,1,A2 ; divide by 2 (indexed 16-bit address) || SHR .S2 B9,1,B9 ; divide by 2 (indexed 16-bit address) fir3 LDHU .D1 *A5++,A6 ; load a(m1,m2) a(m1+1,m2+1) || LDHU .D2 *B6[B2],B6 ; load two pixels of image x || CMPLT.L2 B2,B7,B1 ; need to go to next row? || ADD .S2 B2,1,B2 ; incr offset into image AND .L1 A6,A8,A6 ; extract a(m1,m2) || AND .L2 B6,A8,B6 ; extract x(n1-m1,n2-m2) || EXTU .S1 A6,0,8,A9 ; extract a(m1+1,m2+1) || EXTU .S2 B6,0,8,B9 ; extract x(n1-m1+1,n2-m2+1) MPYHU .M1X A6,B6,A3 ; A3=a(m1,m2) x(n1-m1,n2-m2) || MPYHU .M2X A9,B9,B3 ; B3=a*x offset by 1 index || ADD .L1 A3,A4,A4 ; y(n1,n2) += A3 || ADD .L2 B3,B4,B4 ; y(n1+1,n2+1) += B3 ||[!B1] ADD .D2 B2,B9,B2 ; move offset to next row ||[A2] SUB .S1 A2,1,A2 ; decrement loop counter ||[A2] B .S2 fir3 ; if A2 != 0, then branch ; A4=y(n1,n2) and B4=y(n1+1,n2+1)