shithub: libvpx

--- a/vp9/common/arm/neon/vp9_idct16x16_neon.c

+++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c

@@ -20,10 +20,10 @@

                                                int16_t skip_adding,

                                                uint8_t *dest,

                                                int dest_stride);

-extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,

+extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input,

                                                int16_t *output,

                                                int output_stride);

-extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,

+extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,

                                                int16_t *output,

                                                int16_t *pass1Output,

                                                int16_t skip_adding,

@@ -107,7 +107,7 @@

   return;

-void vp9_short_idct10_16x16_add_neon(int16_t *input,

+void vp9_short_idct16x16_10_add_neon(int16_t *input,

                                   uint8_t *dest, int dest_stride) {

   int16_t pass1_output[16*16] = {0};

   int16_t row_idct_output[16*16] = {0};

@@ -118,12 +118,12 @@

   /* Parallel idct on the upper 8 rows */

   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   // stage 6 result in pass1_output.

-  vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);

+  vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8);

   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   // with result in pass1(pass1_output) to calculate final result in stage 7

   // which will be saved into row_idct_output.

-  vp9_short_idct10_16x16_add_neon_pass2(input+1,

+  vp9_short_idct16x16_10_add_neon_pass2(input+1,

                                         row_idct_output,

                                         pass1_output,

0,

--- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm

+++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm

@@ -10,8 +10,8 @@

     EXPORT  |vp9_short_idct16x16_add_neon_pass1|

     EXPORT  |vp9_short_idct16x16_add_neon_pass2|

-    EXPORT  |vp9_short_idct10_16x16_add_neon_pass1|

-    EXPORT  |vp9_short_idct10_16x16_add_neon_pass2|

+    EXPORT  |vp9_short_idct16x16_10_add_neon_pass1|

+    EXPORT  |vp9_short_idct16x16_10_add_neon_pass2|

     EXPORT  |save_neon_registers|

     EXPORT  |restore_neon_registers|

ARM

@@ -788,7 +788,7 @@

     bx              lr

     ENDP  ; |vp9_short_idct16x16_add_neon_pass2|

-;void |vp9_short_idct10_16x16_add_neon_pass1|(int16_t *input,

+;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input,

 ;                                             int16_t *output, int output_stride)

 ; r0  int16_t input

@@ -798,7 +798,7 @@

 ; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output

 ; will be stored back into q8-q15 registers. This function will touch q0-q7

 ; registers and use them as buffer during calculation.

-|vp9_short_idct10_16x16_add_neon_pass1| PROC

+|vp9_short_idct16x16_10_add_neon_pass1| PROC

     ; TODO(hkuang): Find a better way to load the elements.

     ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15

@@ -907,9 +907,9 @@

     vst1.64         {d31}, [r1], r2

     bx              lr

-    ENDP  ; |vp9_short_idct10_16x16_add_neon_pass1|

+    ENDP  ; |vp9_short_idct16x16_10_add_neon_pass1|

-;void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,

+;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,

 ;                                           int16_t *output,

 ;                                           int16_t *pass1Output,

 ;                                           int16_t skip_adding,

@@ -926,7 +926,7 @@

 ; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output

 ; will be stored back into q8-q15 registers. This function will touch q0-q7

 ; registers and use them as buffer during calculation.

-|vp9_short_idct10_16x16_add_neon_pass2| PROC

+|vp9_short_idct16x16_10_add_neon_pass2| PROC

     push            {r3-r9}

     ; TODO(hkuang): Find a better way to load the elements.

@@ -1177,7 +1177,7 @@

 end_idct10_16x16_pass2

     pop             {r3-r9}

     bx              lr

-    ENDP  ; |vp9_short_idct10_16x16_add_neon_pass2|

+    ENDP  ; |vp9_short_idct16x16_10_add_neon_pass2|

 ;void |save_neon_registers|()

 |save_neon_registers| PROC

     vpush           {d8-d15}

--- a/vp9/common/vp9_idct.c

+++ b/vp9/common/vp9_idct.c

@@ -838,7 +838,7 @@

                                   + dest[j * dest_stride + i]);  }

-void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,

+void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest,

                                   int dest_stride) {

   int16_t out[16 * 16] = { 0 };

   int16_t *outptr = out;

--- a/vp9/common/vp9_rtcd_defs.sh

+++ b/vp9/common/vp9_rtcd_defs.sh

@@ -315,8 +315,8 @@

 prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride"

 specialize vp9_short_idct16x16_add sse2 neon

-prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride"

-specialize vp9_short_idct10_16x16_add sse2 neon

+prototype void vp9_short_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"

+specialize vp9_short_idct16x16_10_add sse2 neon

 prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"

 specialize vp9_short_idct32x32_add sse2 neon

--- a/vp9/common/x86/vp9_idct_intrin_sse2.c

+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c

@@ -2456,7 +2456,7 @@

   write_buffer_8x16(dest, in1, stride);

-void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest,

+void vp9_short_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,

                                      int stride) {

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1<<5);

--- a/vp9/decoder/vp9_idct_blk.c

+++ b/vp9/decoder/vp9_idct_blk.c

@@ -126,7 +126,7 @@

       vp9_short_idct16x16_1_add(input, dest, stride);

       input[0] = 0;

     } else if (eob <= 10) {

-      vp9_short_idct10_16x16_add(input, dest, stride);

+      vp9_short_idct16x16_10_add(input, dest, stride);

       vpx_memset(input, 0, 512);

     } else {

       vp9_short_idct16x16_add(input, dest, stride);

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -64,7 +64,7 @@

   if (eob <= 1)

     vp9_short_idct16x16_1_add(dqcoeff, dest, stride);

   else if (eob <= 10)

-    vp9_short_idct10_16x16_add(dqcoeff, dest, stride);

+    vp9_short_idct16x16_10_add(dqcoeff, dest, stride);

   else

     vp9_short_idct16x16_add(dqcoeff, dest, stride);