/*=============================================================================== * * TEXAS INSTRUMENTS ,INC. * * DISCRETE COSINE TRANSFORM - 2D, 8x8, 16-BIT INPUT, NO ROUNDING * * REVISION DATE: 05/30/97 * * USAGE This routine is C callable and can be called as * * void dct(short *d, short *r) * d = array of 8x8 inputs/outputs in raster scan order * r = set of coefficients used in the DCT * * Where *r = 0xADFD, 0xC13B, 0xE333, 0xF384, 0x098E, 0x6254, * 0x41B3, 0x300B, 0x25A1, 0x187E, 0x1151, 0xC4DF * * If the routine is not to be used as a C callable function, * then all instructions relating to stack should be removed. * Refer to comments of individual instructions. You will also * need to initialize values for all the values passed as these * are assumed to be in registers as defined by the calling * convention of the compiler, (refer to the C compiler reference * guide.) * * C CODE * This is the C equivalent of the Assembly Code without the * assumptions listed below. Note that the assembly code is hand * optimized and assumptions apply. * * SOURCE - Independent JPEG Group, Thomas G. Lane * */ void dctac(short *d, short *r) { int t[12]; short i, j, k, m, n, p; for (k = 1, m = 0, n = 13, p = 8; k <= 8; k += 7, m += 3, n += 3, p -= 7, d -= 64) { for (i = 0; i < 8; i++, d += p) { for (j = 0; j < 4; j++) { t[j] = d[k * j] + d[k * (7 - j)]; t[7 - j] = d[k * j] - d[k * (7 - j)]; } t[8] = t[0] + t[3]; t[9] = t[0] - t[3]; t[10] = t[1] + t[2]; t[11] = t[1] - t[2]; d[0] = t[8] + t[10] >> m; d[4*k] = t[8] - t[10] >> m; t[8] = (short) (t[11] + t[9]) * r[10]; d[2*k] = t[8] + (short) t[9] * r[9] >> n; d[6*k] = t[8] + (short) t[11] * r[11] >> n; t[0] = (short) (t[4] + t[7]) * r[2]; t[1] = (short) (t[5] + t[6]) * r[0]; t[2] = t[4] + t[6]; t[3] = t[5] + t[7]; t[8] = (short) (t[2] + t[3]) * r[8]; t[2] = (short) t[2] * r[1] + t[8]; t[3] = (short) t[3] * r[3] + t[8]; d[7*k] = (short) t[4] * r[4] + t[0] + t[2] >> n; d[5*k] = (short) t[5] * r[6] + t[1] + t[3] >> n; d[3*k] = (short) t[6] * r[5] + t[1] + t[2] >> n; d[1*k] = (short) t[7] * r[7] + t[0] + t[3] >> n; } } } /* * * DESCRIPTION * This routine is used to compute the DCT of an 8x8 matrix of * pixels which have been aranged in raster order. The data size * of the input pixels and coefficients is 16 bits. * * TECHNIQUES * The outer loop (k loop) is unrolled giving two inner loops (i * loops). The two inner loops are LOOP1 and LOOP2 which process * the rows and columns respectively. These loops require two * passes to fully prime the loop thus execute 10 times each. * * ASSUMPTIONS * Coefficients, *r, must be aligned on a word boundary * * MEMORY NOTE * This code has no memory hits regardless of where d and r are * located in memory. * * CYCLES 226 * *=============================================================================== *** BEGIN Benchmark Timing *** *** END Benchmark Timing *** */