?? sobel1.sa
字號:
* ------------------------------------------------------------------------- *
* Copyright (c) 2002 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.text
.global _IMG_sobel1
_IMG_sobel1: .cproc A_in1, A_in2,A_in3, B_out, A_w;, B_h
.no_mdep
* ========================================================================= *
* Array access *
* ========================================================================= *
.reg B_in1, B_in2,B_in3,A_out
;.reg A_wD4, A_wD8
;.reg B_wD4, B_wD8
* ========================================================================= *
* Coeffcients *
* ========================================================================= *
.reg A_mult1, A_mult2, A_mult1_b, A_mult2_b
.reg B_mult1, B_mult2, B_mult1_b, B_mult2_b
.reg A_f1, A_f2
.reg B_f1, B_f2
* ========================================================================= *
* Input pixels *
* ========================================================================= *
.reg A_in1_h:A_in1_l ; |x|x|x|x|x|x|x|x| | |
.reg B_tmp2:B_tmp1 ; | | |x|x|x|x|x|x|x|x|
.reg A_in2_l2:A_in2_l ; |x|x|x|x|x|x|x|x| | |
.reg B_in2_h2:B_in2_h ; | | |x|x|x|x|x|x|x|x|
.reg A_in3_h:A_in3_l ; |x|x|x|x|x|x|x|x| | |
.reg B_tmp4:B_tmp3 ; | | |x|x|x|x|x|x|x|x|
* ========================================================================= *
* Temporary values *
* ========================================================================= *
.reg A_t1, A_t2, A_t3, A_t4, A_t5, A_t6, A_t7, A_t8, A_t9, A_t10
.reg A_t11, A_t12
.reg B_t1, B_t2, B_t3, B_t4, B_t5, B_t6, B_t7, B_t8, B_t9, B_t10
.reg B_t11, B_t12
.reg A_H, B_H3, A_H5, B_H7
.reg A_V2, B_V4, A_V6, B_V8
.reg A_b1, A_b2, A_b3, A_b4, A_b5, A_b6
.reg A_u1, A_u2, A_u3, A_u4, A_u5, A_u6, A_u7, A_u8, A_u9, A_u10
.reg A_u11, A_u12
.reg B_b1, B_b2, B_b3, B_b4, B_b5, B_b6, B_b7, B_b8
.reg B_u1, B_u2, B_u3, B_u4, B_u5, B_u6, B_u7, B_u8, B_u9, B_u10
.reg B_u11, B_u12
.reg A_b10_h:A_b10_l, A_b11_h:A_b11_l, A_b12_h:A_b12_l
.reg B_b14_h:B_b14_l, B_b15_h:B_b15_l, B_b13_h:B_b13_l
.reg A_r9, B_r10, A_r11, B_r12, B_r14:B_r13, B_r15, B_r16
* ========================================================================= *
* Control *
* ========================================================================= *
.reg A_cnt ; Loop counter
.reg B_final
* ========================================================================= *
* ========================================================================= *
* Setup *
* ========================================================================= *
MVKL 0xFEFF, A_mult1
MVKLH 0x00FF, A_mult1 ; 0, -1, -2, -1
MV A_mult1, B_mult1
MVKL 0x0201, A_mult2
MVKLH 0x0001, A_mult2 ; 0, 1, 2, 1
MV A_mult2, B_mult2
MVKL 0xFF00, A_mult1_b
MVKLH 0xFFFE, A_mult1_b ; -1, -2, -1, 0
MV A_mult1_b,B_mult1_b
MVKL 0x0100, A_mult2_b
MVKLH 0x0102, A_mult2_b ; 1, 2, 1, 0
MV A_mult2_b,B_mult2_b
MVKL 0x0101, A_f1
MVKLH 0x0101, A_f1 ; 1, 1, 1, 1
MV A_f1, B_f1
MVKL 0x0202, A_f2
MVKLH 0x0202, A_f2 ; 2, 2, 2, 2
MV A_f2, B_f2
;SHR A_w, 3, A_wD8 ; offset for DW load: line+1
;SHR A_w, 2, A_wD4 ; offset for DW load: line+2
;MV A_wD8, B_wD8
;MV A_wD4, B_wD4
;SUB B_h, 2, B_h
;MPY A_w, B_h, A_cnt
MV A_w, A_cnt
SHRU A_cnt, 3, A_cnt
MV A_cnt, B_final
SUB A_cnt, 2, A_cnt
ADD A_in1, 2, B_in1
ADD A_in2, 2, B_in2
ADD A_in3, 2, B_in3
ADD B_out, 1, A_out
* ========================================================================= *
* Loop *
* ========================================================================= *
loop: .trip 4
LDNDW.D1T1 *A_in3++, A_in3_h:A_in3_l ; A load, line 3
LDNDW.D1T1 *A_in2++, A_in2_l2:A_in2_l ; A load, line 2
LDNDW.D1T1 *A_in1++, A_in1_h:A_in1_l ; A load, line 1
LDNDW.D2T2 *B_in3++, B_tmp4:B_tmp3 ; B load, line 3
LDNDW.D2T2 *B_in2++, B_in2_h2:B_in2_h ; B load, line 2
LDNDW.D2T2 *B_in1++, B_tmp2:B_tmp1 ; B load, line 1
; Horizontal filter mask:
DOTPSU4.M1 A_mult1_b, A_in1_l, A_t1
DOTPSU4.M1 A_mult1, A_in1_l, A_t2
PACK2 A_t1, A_t2, A_t3
DOTPSU4.M1 A_mult2_b, A_in3_l, A_t4
DOTPSU4.M1 A_mult2, A_in3_l, A_t5
PACK2 A_t4, A_t5, A_t6
ADD2 A_t3, A_t6, A_H
DOTPSU4.M2 B_mult1_b, B_tmp1, B_t1
DOTPSU4.M2 B_mult1, B_tmp1, B_t2
PACK2 B_t1, B_t2, B_t3
DOTPSU4.M2 B_mult2_b, B_tmp3, B_t4
DOTPSU4.M2 B_mult2, B_tmp3, B_t5
PACK2 B_t4, B_t5, B_t6
ADD2 B_t3, B_t6, B_H3
DOTPSU4.M1 A_mult1_b, A_in1_h, A_t7
DOTPSU4.M1 A_mult1, A_in1_h, A_t8
PACK2 A_t7, A_t8, A_t9
DOTPSU4.M1 A_mult2_b, A_in3_h, A_t10
DOTPSU4.M1 A_mult2, A_in3_h, A_t11
PACK2 A_t10, A_t11, A_t12
ADD2 A_t9, A_t12, A_H5
DOTPSU4.M2 B_mult1_b, B_tmp2, B_t7
DOTPSU4.M2 B_mult1, B_tmp2, B_t8
PACK2 B_t7, B_t8, B_t9
DOTPSU4.M2 B_mult2_b, B_tmp4, B_t10
DOTPSU4.M2 B_mult2, B_tmp4, B_t11
PACK2 B_t10, B_t11, B_t12
ADD2 B_t9, B_t12, B_H7
ABS2 A_H, A_H
ABS2 B_H3, B_H3
ABS2 A_H5, A_H5
ABS2 B_H7, B_H7
; Vertical filter mask:
MPYU4.M1 A_in2_l2, A_f2, A_b10_h:A_b10_l
MPYU4.M1 A_in1_h, A_f1, A_b11_h:A_b11_l
MPYU4.M1 A_in2_l, A_f2, A_b12_h:A_b12_l
MPYU4.M2 B_in2_h, B_f2, B_b13_h:B_b13_l
MPYU4.M2 B_in2_h2, B_f2, B_b14_h:B_b14_l
MPYU4.M2 B_tmp2, B_f1, B_b15_h:B_b15_l
UNPKLU4 A_in1_l, A_u1
ADD2 A_u1, A_b12_l, A_u2
UNPKLU4 A_in3_l, A_u3
ADD2 A_u2, A_u3, A_b1
UNPKHU4 A_in1_l, A_u4
ADD2 A_u4, A_b12_h, A_u5
UNPKHU4 A_in3_l, A_u6
ADD2 A_u5, A_u6, A_b2
UNPKLU4 B_tmp1, B_u1
ADD2 B_u1, B_b13_l, B_u2
UNPKLU4 B_tmp3, B_u3
ADD2 B_u2, B_u3, B_b3
UNPKHU4 B_tmp1, B_u4
ADD2 B_u4, B_b13_h, B_u5
UNPKHU4 B_tmp3, B_u6
ADD2 B_u5, B_u6, B_b4
ADD2 A_b11_l, A_b10_l, A_u7
UNPKLU4 A_in3_h, A_u8
ADD2 A_u7, A_u8, A_b5
ADD2 A_b11_h, A_b10_h, A_u9
UNPKHU4 A_in3_h, A_u10
ADD2 A_u9, A_u10, A_b6
ADD2 B_b15_l, B_b14_l, B_u7
UNPKLU4 B_tmp4, B_u8
ADD2 B_u7, B_u8, B_b7
ADD2 B_b15_h, B_b14_h, B_u9
UNPKHU4 B_tmp4, B_u10
ADD2 B_u9, B_u10, B_b8
SUB2 A_b2, A_b1, A_u11
ABS2 A_u11, A_V2
SUB2 B_b4, B_b3, B_u11
ABS2 B_u11, B_V4
SUB2 A_b6, A_b5, A_u12
ABS2 A_u12, A_V6
SUB2 B_b8, B_b7, B_u12
ABS2 B_u12, B_V8
ADD2 A_H, A_V2, A_r9
ADD2 B_H3, B_V4, B_r10
ADD2 A_H5, A_V6, A_r11
ADD2 B_H7, B_V8, B_r12
SPACKU4 B_r10, A_r9, B_r13
SPACKU4 B_r12, A_r11, B_r14
SUB B_final, 1, B_final
[B_final] STNDW.D2T1 B_r14:B_r13, *A_out++
BDEC loop, A_cnt
STNW B_r13, *A_out
EXTU B_r14, 24, 24, B_r15
STB B_r15, *++A_out[4]
EXTU B_r14, 16, 24, B_r16
STB B_r16, *++A_out
.return
.endproc
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -