ref: 15a36a0a0ddaa154c6ab3c3c32a86a71f07c640e
parent: 831d72ac5f21ea53d4a466f411fd9b6a5af605fc
author: Dmitry Kovalev <[email protected]>
date: Thu Sep 26 10:01:25 EDT 2013
Renaming vp9_short_idct10_16x16 to vp9_short_idct16x16_10. Making function name consistent with vp9_short_idct16x16 and vp9_short_idct16x16_1. Change-Id: I70e54be9e6b9a1dddab0de470686591e96d05517
--- a/vp9/common/arm/neon/vp9_idct16x16_neon.c
+++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c
@@ -20,10 +20,10 @@
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
-extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,
+extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input,
int16_t *output,
int output_stride);
-extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
+extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
@@ -107,7 +107,7 @@
return;
}
-void vp9_short_idct10_16x16_add_neon(int16_t *input,
+void vp9_short_idct16x16_10_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
@@ -118,12 +118,12 @@
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);
+ vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_short_idct10_16x16_add_neon_pass2(input+1,
+ vp9_short_idct16x16_10_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
--- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
@@ -10,8 +10,8 @@
EXPORT |vp9_short_idct16x16_add_neon_pass1|
EXPORT |vp9_short_idct16x16_add_neon_pass2|
- EXPORT |vp9_short_idct10_16x16_add_neon_pass1|
- EXPORT |vp9_short_idct10_16x16_add_neon_pass2|
+ EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
+ EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
EXPORT |save_neon_registers|
EXPORT |restore_neon_registers|
ARM
@@ -788,7 +788,7 @@
bx lr
ENDP ; |vp9_short_idct16x16_add_neon_pass2|
-;void |vp9_short_idct10_16x16_add_neon_pass1|(int16_t *input,
+;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
@@ -798,7 +798,7 @@
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct10_16x16_add_neon_pass1| PROC
+|vp9_short_idct16x16_10_add_neon_pass1| PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
@@ -907,9 +907,9 @@
vst1.64 {d31}, [r1], r2
bx lr
- ENDP ; |vp9_short_idct10_16x16_add_neon_pass1|
+ ENDP ; |vp9_short_idct16x16_10_add_neon_pass1|
-;void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
+;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
@@ -926,7 +926,7 @@
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct10_16x16_add_neon_pass2| PROC
+|vp9_short_idct16x16_10_add_neon_pass2| PROC
push {r3-r9}
; TODO(hkuang): Find a better way to load the elements.
@@ -1177,7 +1177,7 @@
end_idct10_16x16_pass2
pop {r3-r9}
bx lr
- ENDP ; |vp9_short_idct10_16x16_add_neon_pass2|
+ ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
;void |save_neon_registers|()
|save_neon_registers| PROC
vpush {d8-d15}
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -838,7 +838,7 @@
+ dest[j * dest_stride + i]); }
}
-void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
+void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[16 * 16] = { 0 };
int16_t *outptr = out;
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -315,8 +315,8 @@
prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_add sse2 neon
-prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct10_16x16_add sse2 neon
+prototype void vp9_short_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_short_idct16x16_10_add sse2 neon
prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct32x32_add sse2 neon
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -2456,7 +2456,7 @@
write_buffer_8x16(dest, in1, stride);
}
-void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest,
+void vp9_short_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -126,7 +126,7 @@
vp9_short_idct16x16_1_add(input, dest, stride);
input[0] = 0;
} else if (eob <= 10) {
- vp9_short_idct10_16x16_add(input, dest, stride);
+ vp9_short_idct16x16_10_add(input, dest, stride);
vpx_memset(input, 0, 512);
} else {
vp9_short_idct16x16_add(input, dest, stride);
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -64,7 +64,7 @@
if (eob <= 1)
vp9_short_idct16x16_1_add(dqcoeff, dest, stride);
else if (eob <= 10)
- vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
+ vp9_short_idct16x16_10_add(dqcoeff, dest, stride);
else
vp9_short_idct16x16_add(dqcoeff, dest, stride);
}