?? discrete cosine transform.txt
字號:
* *
* Q1 = q1 + q0; Q0 = q1 - q0; *
* S1 = s1 + s0; S0 = s1 - s0; *
* *
* /* ---------------------------------------------------- */ *
* /* Stage 4 */ *
* /* ---------------------------------------------------- */ *
* F0 = P0; F4 = P1; *
* F2 = R1; F6 = R0; *
* *
* F1 = c7 * Q1 + c1 * S1; F7 = c7 * S1 - c1 * Q1; *
* F5 = c3 * Q0 + c5 * S0; F3 = c3 * S0 - c5 * Q0; *
* *
* /* ---------------------------------------------------- */ *
* /* Round and truncate values. */ *
* /* */ *
* /* Note: F0 and F4 have different rounding since no */ *
* /* MPYs have been applied to either term. Also, F0's */ *
* /* rounding is slightly different to offset the */ *
* /* truncation effects from the horizontal pass (which */ *
* /* does not round). */ *
* /* ---------------------------------------------------- */ *
* F0r = (F0 + 0x0006) >> 3; *
* F1r = (F1 + 0x7FFF) >> 16; *
* F2r = (F2 + 0x7FFF) >> 16; *
* F3r = (F3 + 0x7FFF) >> 16; *
* F4r = (F4 + 0x0004) >> 3; *
* F5r = (F5 + 0x7FFF) >> 16; *
* F6r = (F6 + 0x7FFF) >> 16; *
* F7r = (F7 + 0x7FFF) >> 16; *
* *
* /* ---------------------------------------------------- */ *
* /* Store the results */ *
* /* ---------------------------------------------------- */ *
* dct_io_ptr[0] = F0r; *
* dct_io_ptr[1] = F1r; *
* dct_io_ptr[2] = F2r; *
* dct_io_ptr[3] = F3r; *
* dct_io_ptr[4] = F4r; *
* dct_io_ptr[5] = F5r; *
* dct_io_ptr[6] = F6r; *
* dct_io_ptr[7] = F7r; *
* *
* /* ---------------------------------------------------- */ *
* /* Update pointer to next FDCT row. */ *
* /* ---------------------------------------------------- */ *
* dct_io_ptr += 8; *
* } *
* *
* return; *
* } *
* *
* *
* Note: This code guarantees correct operation, even in the case *
* that 'num_fdcts == 0'. In this case, the function runs for only *
* 13 cycles (counting 6 cycles of function-call overhead), due to *
* early-exit code. The early-exit case performs no accesses to the *
* fdct_data[] array and minimal access to the stack. *
* *
* TECHNIQUES *
* The loop nest in the vertical pass has been collapsed into a *
* single-level loop. Both vertical and horizontal loops have *
* been software pipelined. *
* *
* For performance, portions of the code outside the loops have been *
* inter-scheduled with the prolog and epilog code of the loops. *
* Also, twin stack-pointers are used to accelerate stack accesses. *
* Finally, pointer values and cosine term registers are reused *
* between the horizontal and vertical loops to reduce the impact of *
* pointer and constant reinitialization. *
* *
* To save codesize, prolog and epilog collapsing have been performed *
* to the extent that it does not impact performance. Also, code *
* outside the loops has been scheduled to pack as tightly into *
* fetch packets as possible to avoid alignment padding NOPs. *
* *
* To reduce register pressure and save some code, the horizontal *
* loop uses the same pair of pointer register for both reading and *
* writing. The pointer increments are on the LDs to permit prolog *
* and epilog collapsing, since LDs can be speculated. *
* *
* Additional section-specific optimization notes are provided below. *
* *
* ASSUMPTIONS *
* Stack is aligned to a word boundary. *
* *
* MEMORY NOTE *
* No bank conflicts occur, regardless of fdct_data[]'s alignment. *
* *
* The code requires 16 words of stack space to save Save-On-Entry *
* (SOE) registers, CSR, IRP, and a spill value. *
* *
* Bank usage on C6201: 1 of 4 banks for 40% of loop cycles *
* 2 of 4 banks for 60% of loop cycles *
* *
* Nearly every cycle of this function performs at least one *
* memory access. *
* *
* NOTES *
* This code masks interrupts for nearly its entire duration. *
* Interrupts are locked out for '40 + 160 * num_fdcts' cycles. As *
* a result, the code is interrupt-tolerant, but not interruptible. *
* *
* The cosine terms have all been scaled by sqrt(2), so that the *
* "c4" term is basically an even power of 2. *
* *
* The code is completely endian neutral. *
* *
* SOURCE *
* Chen FDCT. *
* *
* ------------------------------------------------------------------------- *
* Copyright (c) 1999 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.sect ".data:copyright_h"
_Copyright: .string "Copyright (C) 1999 Texas Instruments Incorporated. "
.string "All Rights Reserved.",0
.sect ".text:hand"
.global _fdct_8x8_asm
_fdct_8x8_asm:
; ========================== SYMBOLIC CONSTANTS =========================== ;
.asg 0xB505, cst_c0 ; Cosine term c0
.asg 0x2C62, cst_c1 ; Cosine term c1
.asg 0x29CF, cst_c2 ; Cosine term c2
.asg 0x25A0, cst_c3 ; Cosine term c3
.asg 0x1924, cst_c5 ; Cosine term c5
.asg 0x1151, cst_c6 ; Cosine term c6
.asg 0x08D4, cst_c7 ; Cosine term c7
; =============== SYMBOLIC REGISTER ASSIGNMENTS: VERT LOOP ================ ;
.asg A11, A_k1c0 ; 1, Cosine term c0 (packed)
.asg A12, A_c1c7 ; Cosine terms c1, c7 (packed)
.asg A13, A_c2c6 ; Cosine terms c2, c6 (packed)
.asg B11, B_k1c0 ; 1, Cosine term c0 (packed)
.asg B12, B_c1c7 ; Cosine terms c1, c7 (packed)
.asg B13, B_c2c6 ; Cosine terms c2, c6 (packed)
.asg B14, B_c3c5 ; Cosine terms c3, c5 (packed)
.asg A4, A_i_ptr ; Input pointer
.asg B10, B_o_ptr ; Output pointer
.asg A9, A_f0 ; Spatial domain sample f0
.asg B8, B_f1 ; Spatial domain sample f1
.asg B6, B_f2 ; Spatial domain sample f2
.asg A5, A_f3 ; Spatial domain sample f3
.asg A7, A_f4 ; Spatial domain sample f4
.asg B7, B_f5 ; Spatial domain sample f5
.asg B15, B_f6 ; Spatial domain sample f6
.asg A6, A_f7 ; Spatial domain sample f7
.asg A6, A_g0 ; Node g0 in flow graph
.asg B8, B_g1 ; Node g1 in flow graph
.asg B6, B_h1 ; Node h1 in flow graph
.asg A7, A_h0 ; Node h0 in flow graph
.asg A0, A_s1 ; Node s1 (h2) in flow graph
.asg B4, B_h3 ; Node h3 in flow graph
.asg B15, B_g3 ; Node g3 in flow graph
.asg A15, A_q1 ; Node q1 (g2) in flow graph
.asg A6, A_p0 ; Node p0 in flow graph
.asg B6, B_p1 ; Node p1 in flow graph
.asg B15, B_s0a ; Node s0 intermediate result
.asg B5, B_s0b ; Node s0 intermediate result
.asg B15, B_s0 ; Node s0 in flow graph
.asg A3, A_r0 ; Node r0 in flow graph
.asg B15, B_r1 ; Node r1 in flow graph
.asg B4, B_q0a ; Node q0 intermediate result
.asg A14, A_q0b ; Node q0 intermediate result
.asg A3, A_q0 ; Node q0 in flow graph
.asg A10, A_Q1 ; Node Q1 in flow graph
.asg B5, B_S1 ; Node S1 in flow graph
.asg A3, A_Q0 ; Node Q0 in flow graph
.asg B4, B_S0 ; Node S0 in flow graph
.asg A14, A_c1Q1 ; Intermediate value c1 * Q1
.asg A6, A_c2r0 ; Intermediate value c2 * r0
.asg A7, A_c3Q0 ; Intermediate value c3 * Q0
.asg A3, A_c5Q0 ; Intermediate value c5 * Q0
.asg A14, A_c6r0 ; Intermediate value c6 * r0
.asg A8, A_c7Q1 ; Intermediate value c7 * Q1
.asg B5, B_c1S1 ; Intermediate value c1 * S1
.asg B0, B_c2r1 ; Intermediate value c2 * r1
.asg B0, B_c3S0 ; Intermediate value c3 * S0
.asg B3, B_c5S0 ; Intermediate value c5 * S0
.asg B6, B_c6r1 ; Intermediate value c6 * r1
.asg B5, B_c7S1 ; Intermediate value c7 * S1
.asg B9, B_F0 ; Frequency domain term F0
.asg A8, A_F1 ; Frequency domain term F1
.asg A5, A_F2 ; Frequency domain term F2
.asg B4, B_F3 ; Frequency domain term F3
.asg B3, B_F4 ; Frequency domain term F4
.asg A9, A_F5 ; Frequency domain term F5
.asg A10, A_F6 ; Frequency domain term F6
.asg B4, B_F7 ; Frequency domain term F7
.asg A8, A_F1t ; Truncated result for F1
.asg A5, A_F2t ; Truncated result for F2
.asg B7, B_F3t ; Truncated result for F3
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -