ref: a808dfe3f2473032fd26eb3d9e1dc4d69970add4
parent: bf168d54abe7472455a01ac751eb576106abd287
parent: 540d910350121895597dab8c9c219aa28c0f64e4
author: Jingning Han <[email protected]>
date: Tue Jun 3 12:43:49 EDT 2014
Merge "Fix potential overflow issue in SSSE3 forward 8x8 2D-DCT"
--- a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
@@ -23,6 +23,7 @@
pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1
%endmacro
+TRANSFORM_COEFFS 11585, 11585
TRANSFORM_COEFFS 15137, 6270
TRANSFORM_COEFFS 16069, 3196
TRANSFORM_COEFFS 9102, 13623
@@ -83,7 +84,7 @@
%endmacro
; 1D forward 8x8 DCT transform
-%macro FDCT8_1D 0
+%macro FDCT8_1D 1
SUM_SUB 0, 7, 9
SUM_SUB 1, 6, 9
SUM_SUB 2, 5, 9
@@ -92,14 +93,21 @@
SUM_SUB 0, 3, 9
SUM_SUB 1, 2, 9
SUM_SUB 6, 5, 9
+%if %1 == 0
SUM_SUB 0, 1, 9
+%endif
BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10
pmulhrsw m6, m12
pmulhrsw m5, m12
+%if %1 == 0
pmulhrsw m0, m12
pmulhrsw m1, m12
+%else
+ BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10
+ SWAP 0, 1
+%endif
SUM_SUB 4, 5, 9
SUM_SUB 7, 6, 9
@@ -150,10 +158,10 @@
psllw m7, 2
; column transform
- FDCT8_1D
+ FDCT8_1D 0
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
- FDCT8_1D
+ FDCT8_1D 1
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
DIVIDE_ROUND_2X 0, 1, 9, 10