shithub: libvpx

--- a/test/dct16x16_test.cc

+++ b/test/dct16x16_test.cc

@@ -277,7 +277,7 @@

 void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,

                    int /*tx_type*/) {

-  vp9_idct16x16_256_add_c(in, dest, stride);

+  vpx_idct16x16_256_add_c(in, dest, stride);

 void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,

@@ -292,11 +292,11 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);

+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);

 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);

+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);

 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,

@@ -318,28 +318,28 @@

 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);

+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);

 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);

+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);

 #if HAVE_SSE2

 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);

+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);

 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);

+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);

 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);

+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);

 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);

+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);

 #endif  // HAVE_SSE2

 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -824,12 +824,12 @@

     ::testing::Values(

         make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),

         make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),

-        make_tuple(&vpx_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));

+        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));

 #else

 INSTANTIATE_TEST_CASE_P(

     C, Trans16x16DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));

+        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH

@@ -863,7 +863,7 @@

     NEON, Trans16x16DCT,

     ::testing::Values(

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));

+                   &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));

 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -871,7 +871,7 @@

     SSE2, Trans16x16DCT,

     ::testing::Values(

         make_tuple(&vpx_fdct16x16_sse2,

-                   &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));

+                   &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(

     SSE2, Trans16x16HT,

     ::testing::Values(

@@ -898,7 +898,7 @@

         make_tuple(&vpx_highbd_fdct16x16_c,

                    &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),

         make_tuple(&vpx_fdct16x16_sse2,

-                   &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));

+                   &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(

     SSE2, Trans16x16HT,

     ::testing::Values(

@@ -927,7 +927,7 @@

     MSA, Trans16x16DCT,

     ::testing::Values(

         make_tuple(&vpx_fdct16x16_msa,

-                   &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));

+                   &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(

     MSA, Trans16x16HT,

     ::testing::Values(

--- a/test/dct32x32_test.cc

+++ b/test/dct32x32_test.cc

@@ -82,15 +82,15 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8);

+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8);

 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10);

+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);

 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);

+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -324,17 +324,17 @@

         make_tuple(&vpx_highbd_fdct32x32_rd_c,

                    &idct32x32_12, 1, VPX_BITS_12),

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),

+                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),

         make_tuple(&vpx_fdct32x32_rd_c,

-                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));

+                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));

 #else

 INSTANTIATE_TEST_CASE_P(

     C, Trans32x32Test,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),

+                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),

         make_tuple(&vpx_fdct32x32_rd_c,

-                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));

+                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -342,9 +342,9 @@

     NEON, Trans32x32Test,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),

+                   &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),

         make_tuple(&vpx_fdct32x32_rd_c,

-                   &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));

+                   &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));

 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -352,9 +352,9 @@

     SSE2, Trans32x32Test,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_sse2,

-                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),

+                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),

         make_tuple(&vpx_fdct32x32_rd_sse2,

-                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));

+                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));

 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -367,9 +367,9 @@

         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),

         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,

                    VPX_BITS_12),

-        make_tuple(&vpx_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0,

+        make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,

                    VPX_BITS_8),

-        make_tuple(&vpx_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,

+        make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,

                    VPX_BITS_8)));

 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -378,9 +378,9 @@

     AVX2, Trans32x32Test,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_avx2,

-                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),

+                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),

         make_tuple(&vpx_fdct32x32_rd_avx2,

-                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));

+                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));

 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -388,8 +388,8 @@

     MSA, Trans32x32Test,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_msa,

-                   &vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8),

+                   &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),

         make_tuple(&vpx_fdct32x32_rd_msa,

-                   &vp9_idct32x32_1024_add_msa, 1, VPX_BITS_8)));

+                   &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));

 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 }  // namespace

--- a/test/fdct4x4_test.cc

+++ b/test/fdct4x4_test.cc

@@ -55,11 +55,11 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct4x4_16_add_c(in, out, stride, 10);

+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);

 void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct4x4_16_add_c(in, out, stride, 12);

+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);

 void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {

@@ -71,20 +71,20 @@

 void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 10);

+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);

 void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12);

+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);

 #if HAVE_SSE2

 void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10);

+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);

 void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);

+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);

 #endif  // HAVE_SSE2

 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -421,12 +421,12 @@

     ::testing::Values(

         make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),

         make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),

-        make_tuple(&vpx_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));

+        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));

 #else

 INSTANTIATE_TEST_CASE_P(

     C, Trans4x4DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));

+        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH

@@ -461,12 +461,12 @@

     ::testing::Values(

         make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),

         make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),

-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));

+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));

 #else

 INSTANTIATE_TEST_CASE_P(

     C, Trans4x4WHT,

     ::testing::Values(

-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));

+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -474,7 +474,7 @@

     NEON, Trans4x4DCT,

     ::testing::Values(

         make_tuple(&vpx_fdct4x4_c,

-                   &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));

+                   &vpx_idct4x4_16_add_neon, 0, VPX_BITS_8)));

 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -492,7 +492,7 @@

 INSTANTIATE_TEST_CASE_P(

     MMX, Trans4x4WHT,

     ::testing::Values(

-        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));

+        make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));

 #endif

 #if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \

@@ -500,7 +500,7 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, Trans4x4WHT,

     ::testing::Values(

-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));

+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));

 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -508,7 +508,7 @@

     SSE2, Trans4x4DCT,

     ::testing::Values(

         make_tuple(&vpx_fdct4x4_sse2,

-                   &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8)));

+                   &vpx_idct4x4_16_add_sse2, 0, VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(

     SSE2, Trans4x4HT,

     ::testing::Values(

@@ -526,7 +526,7 @@

         make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),

         make_tuple(&vpx_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),

         make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),

-        make_tuple(&vpx_fdct4x4_sse2,      &vp9_idct4x4_16_add_c, 0,

+        make_tuple(&vpx_fdct4x4_sse2,      &vpx_idct4x4_16_add_c, 0,

                    VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(

@@ -542,7 +542,7 @@

 INSTANTIATE_TEST_CASE_P(

     MSA, Trans4x4DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct4x4_msa, &vp9_idct4x4_16_add_msa, 0, VPX_BITS_8)));

+        make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 0, VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(

     MSA, Trans4x4HT,

     ::testing::Values(

--- a/test/fdct8x8_test.cc

+++ b/test/fdct8x8_test.cc

@@ -92,11 +92,11 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct8x8_64_add_c(in, out, stride, 10);

+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);

 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct8x8_64_add_c(in, out, stride, 12);

+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);

 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {

@@ -108,28 +108,28 @@

 void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct8x8_10_add_c(in, out, stride, 10);

+  vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);

 void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct8x8_10_add_c(in, out, stride, 12);

+  vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);

 #if HAVE_SSE2

 void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10);

+  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);

 void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12);

+  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12);

 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10);

+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);

 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {

-  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);

+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);

 #endif  // HAVE_SSE2

 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -658,7 +658,7 @@

 INSTANTIATE_TEST_CASE_P(

     C, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),

+        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),

         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),

         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));

 #else

@@ -665,7 +665,7 @@

 INSTANTIATE_TEST_CASE_P(

     C, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));

+        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8)));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH

@@ -698,7 +698,7 @@

 INSTANTIATE_TEST_CASE_P(

     NEON, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,

+        make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0,

                    VPX_BITS_8)));

 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -716,7 +716,7 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0,

+        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 0,

                    VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(

     SSE2, FwdTrans8x8HT,

@@ -731,7 +731,7 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),

+        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),

         make_tuple(&vpx_highbd_fdct8x8_c,

                    &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),

         make_tuple(&vpx_highbd_fdct8x8_sse2,

@@ -769,7 +769,7 @@

 INSTANTIATE_TEST_CASE_P(

     SSSE3, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,

+        make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_ssse3, 0,

                    VPX_BITS_8)));

 #endif

@@ -777,7 +777,7 @@

 INSTANTIATE_TEST_CASE_P(

     MSA, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vpx_fdct8x8_msa, &vp9_idct8x8_64_add_msa, 0, VPX_BITS_8)));

+        make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 0, VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(

     MSA, FwdTrans8x8HT,

     ::testing::Values(

--- a/test/idct8x8_test.cc

+++ b/test/idct8x8_test.cc

@@ -124,7 +124,7 @@

     reference_dct_2d(input, output_r);

     for (int j = 0; j < 64; ++j)

       coeff[j] = round(output_r[j]);

-    vp9_idct8x8_64_add_c(coeff, dst, 8);

+    vpx_idct8x8_64_add_c(coeff, dst, 8);

     for (int j = 0; j < 64; ++j) {

       const int diff = dst[j] - src[j];

       const int error = diff * diff;

--- a/test/partial_idct_test.cc

+++ b/test/partial_idct_test.cc

@@ -203,32 +203,32 @@

     C, PartialIDctTest,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c,

-                   &vp9_idct32x32_34_add_c,

+                   &vpx_idct32x32_1024_add_c,

+                   &vpx_idct32x32_34_add_c,

                    TX_32X32, 34),

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c,

-                   &vp9_idct32x32_1_add_c,

+                   &vpx_idct32x32_1024_add_c,

+                   &vpx_idct32x32_1_add_c,

                    TX_32X32, 1),

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_c,

-                   &vp9_idct16x16_10_add_c,

+                   &vpx_idct16x16_256_add_c,

+                   &vpx_idct16x16_10_add_c,

                    TX_16X16, 10),

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_c,

-                   &vp9_idct16x16_1_add_c,

+                   &vpx_idct16x16_256_add_c,

+                   &vpx_idct16x16_1_add_c,

                    TX_16X16, 1),

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_12_add_c,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_12_add_c,

                    TX_8X8, 12),

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_1_add_c,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_1_add_c,

                    TX_8X8, 1),

         make_tuple(&vpx_fdct4x4_c,

-                   &vp9_idct4x4_16_add_c,

-                   &vp9_idct4x4_1_add_c,

+                   &vpx_idct4x4_16_add_c,

+                   &vpx_idct4x4_1_add_c,

                    TX_4X4, 1)));

 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -236,28 +236,28 @@

     NEON, PartialIDctTest,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c,

-                   &vp9_idct32x32_1_add_neon,

+                   &vpx_idct32x32_1024_add_c,

+                   &vpx_idct32x32_1_add_neon,

                    TX_32X32, 1),

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_c,

-                   &vp9_idct16x16_10_add_neon,

+                   &vpx_idct16x16_256_add_c,

+                   &vpx_idct16x16_10_add_neon,

                    TX_16X16, 10),

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_c,

-                   &vp9_idct16x16_1_add_neon,

+                   &vpx_idct16x16_256_add_c,

+                   &vpx_idct16x16_1_add_neon,

                    TX_16X16, 1),

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_12_add_neon,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_12_add_neon,

                    TX_8X8, 12),

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_1_add_neon,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_1_add_neon,

                    TX_8X8, 1),

         make_tuple(&vpx_fdct4x4_c,

-                   &vp9_idct4x4_16_add_c,

-                   &vp9_idct4x4_1_add_neon,

+                   &vpx_idct4x4_16_add_c,

+                   &vpx_idct4x4_1_add_neon,

                    TX_4X4, 1)));

 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -266,32 +266,32 @@

     SSE2, PartialIDctTest,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c,

-                   &vp9_idct32x32_34_add_sse2,

+                   &vpx_idct32x32_1024_add_c,

+                   &vpx_idct32x32_34_add_sse2,

                    TX_32X32, 34),

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c,

-                   &vp9_idct32x32_1_add_sse2,

+                   &vpx_idct32x32_1024_add_c,

+                   &vpx_idct32x32_1_add_sse2,

                    TX_32X32, 1),

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_c,

-                   &vp9_idct16x16_10_add_sse2,

+                   &vpx_idct16x16_256_add_c,

+                   &vpx_idct16x16_10_add_sse2,

                    TX_16X16, 10),

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_c,

-                   &vp9_idct16x16_1_add_sse2,

+                   &vpx_idct16x16_256_add_c,

+                   &vpx_idct16x16_1_add_sse2,

                    TX_16X16, 1),

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_12_add_sse2,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_12_add_sse2,

                    TX_8X8, 12),

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_1_add_sse2,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_1_add_sse2,

                    TX_8X8, 1),

         make_tuple(&vpx_fdct4x4_c,

-                   &vp9_idct4x4_16_add_c,

-                   &vp9_idct4x4_1_add_sse2,

+                   &vpx_idct4x4_16_add_c,

+                   &vpx_idct4x4_1_add_sse2,

                    TX_4X4, 1)));

 #endif

@@ -301,8 +301,8 @@

     SSSE3_64, PartialIDctTest,

     ::testing::Values(

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_12_add_ssse3,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_12_add_ssse3,

                    TX_8X8, 12)));

 #endif

@@ -311,32 +311,32 @@

     MSA, PartialIDctTest,

     ::testing::Values(

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c,

-                   &vp9_idct32x32_34_add_msa,

+                   &vpx_idct32x32_1024_add_c,

+                   &vpx_idct32x32_34_add_msa,

                    TX_32X32, 34),

         make_tuple(&vpx_fdct32x32_c,

-                   &vp9_idct32x32_1024_add_c,

-                   &vp9_idct32x32_1_add_msa,

+                   &vpx_idct32x32_1024_add_c,

+                   &vpx_idct32x32_1_add_msa,

                    TX_32X32, 1),

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_c,

-                   &vp9_idct16x16_10_add_msa,

+                   &vpx_idct16x16_256_add_c,

+                   &vpx_idct16x16_10_add_msa,

                    TX_16X16, 10),

         make_tuple(&vpx_fdct16x16_c,

-                   &vp9_idct16x16_256_add_c,

-                   &vp9_idct16x16_1_add_msa,

+                   &vpx_idct16x16_256_add_c,

+                   &vpx_idct16x16_1_add_msa,

                    TX_16X16, 1),

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_12_add_msa,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_12_add_msa,

                    TX_8X8, 10),

         make_tuple(&vpx_fdct8x8_c,

-                   &vp9_idct8x8_64_add_c,

-                   &vp9_idct8x8_1_add_msa,

+                   &vpx_idct8x8_64_add_c,

+                   &vpx_idct8x8_1_add_msa,

                    TX_8X8, 1),

         make_tuple(&vpx_fdct4x4_c,

-                   &vp9_idct4x4_16_add_c,

-                   &vp9_idct4x4_1_add_msa,

+                   &vpx_idct4x4_16_add_c,

+                   &vpx_idct4x4_1_add_msa,

                    TX_4X4, 1)));

 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

--- a/test/register_state_check.h

+++ b/test/register_state_check.h

@@ -96,7 +96,7 @@

 extern "C" {

 // Save the d8-d15 registers into store.

-void vp9_push_neon(int64_t *store);

+void vpx_push_neon(int64_t *store);

 namespace libvpx_test {

@@ -111,7 +111,7 @@

  private:

   static bool StoreRegisters(int64_t store[8]) {

-    vp9_push_neon(store);

+    vpx_push_neon(store);

     return true;

@@ -119,7 +119,7 @@

   bool Check() const {

     if (!initialized_) return false;

     int64_t post_store[8];

-    vp9_push_neon(post_store);

+    vpx_push_neon(post_store);

     for (int i = 0; i < 8; ++i) {

       EXPECT_EQ(pre_store_[i], post_store[i]) << "d"

           << i + 8 << " has been modified";

--- a/vp9/common/arm/neon/vp9_save_reg_neon.asm

+++ /dev/null

@@ -1,36 +1,0 @@

-;

-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

-;

-;  Use of this source code is governed by a BSD-style license

-;  that can be found in the LICENSE file in the root of the source

-;  tree. An additional intellectual property rights grant can be found

-;  in the file PATENTS.  All contributing project authors may

-;  be found in the AUTHORS file in the root of the source tree.

-;

-    EXPORT  |vp9_push_neon|

-    EXPORT  |vp9_pop_neon|

-    ARM

-    REQUIRE8

-    PRESERVE8

-    AREA ||.text||, CODE, READONLY, ALIGN=2

-|vp9_push_neon| PROC

-    vst1.i64            {d8, d9, d10, d11}, [r0]!

-    vst1.i64            {d12, d13, d14, d15}, [r0]!

-    bx              lr

-    ENDP

-|vp9_pop_neon| PROC

-    vld1.i64            {d8, d9, d10, d11}, [r0]!

-    vld1.i64            {d12, d13, d14, d15}, [r0]!

-    bx              lr

-    ENDP

-    END

--- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c

@@ -38,11 +38,11 @@

   switch (tx_type) {

     case DCT_DCT:   // DCT in both horizontal and vertical

-      vp9_idct4_rows_dspr2(input, outptr);

-      vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);

+      vpx_idct4_rows_dspr2(input, outptr);

+      vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);

       break;

     case ADST_DCT:  // ADST in vertical, DCT in horizontal

-      vp9_idct4_rows_dspr2(input, outptr);

+      vpx_idct4_rows_dspr2(input, outptr);

       outptr = out;

@@ -69,7 +69,7 @@

           temp_in[i * 4 + j] = out[j * 4 + i];

-      vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);

+      vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);

       break;

     case ADST_ADST:  // ADST in both directions

       for (i = 0; i < 4; ++i) {

--- a/vp9/common/mips/msa/vp9_idct16x16_msa.c

+++ b/vp9/common/mips/msa/vp9_idct16x16_msa.c

@@ -24,13 +24,13 @@

       /* transform rows */

       for (i = 0; i < 2; ++i) {

         /* process 16 * 8 block */

-        vp9_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));

+        vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));

       /* transform columns */

       for (i = 0; i < 2; ++i) {

         /* process 8 * 16 block */

-        vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),

+        vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),

                                          dst_stride);

       break;

@@ -38,12 +38,12 @@

       /* transform rows */

       for (i = 0; i < 2; ++i) {

         /* process 16 * 8 block */

-        vp9_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));

+        vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));

       /* transform columns */

       for (i = 0; i < 2; ++i) {

-        vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),

+        vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),

                                           (dst + (i << 3)), dst_stride);

       break;

@@ -51,13 +51,13 @@

       /* transform rows */

       for (i = 0; i < 2; ++i) {

         /* process 16 * 8 block */

-        vp9_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));

+        vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));

       /* transform columns */

       for (i = 0; i < 2; ++i) {

         /* process 8 * 16 block */

-        vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),

+        vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),

                                          dst_stride);

       break;

@@ -65,12 +65,12 @@

       /* transform rows */

       for (i = 0; i < 2; ++i) {

         /* process 16 * 8 block */

-        vp9_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));

+        vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));

       /* transform columns */

       for (i = 0; i < 2; ++i) {

-        vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),

+        vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),

                                           (dst + (i << 3)), dst_stride);

       break;

--- a/vp9/common/vp9_idct.c

+++ b/vp9/common/vp9_idct.c

@@ -123,9 +123,9 @@

 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,

                      int eob) {

   if (eob > 1)

-    vp9_idct4x4_16_add(input, dest, stride);

+    vpx_idct4x4_16_add(input, dest, stride);

   else

-    vp9_idct4x4_1_add(input, dest, stride);

+    vpx_idct4x4_1_add(input, dest, stride);

@@ -132,9 +132,9 @@

 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,

                      int eob) {

   if (eob > 1)

-    vp9_iwht4x4_16_add(input, dest, stride);

+    vpx_iwht4x4_16_add(input, dest, stride);

   else

-    vp9_iwht4x4_1_add(input, dest, stride);

+    vpx_iwht4x4_1_add(input, dest, stride);

 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,

@@ -148,11 +148,11 @@

   // Combine that with code here.

   if (eob == 1)

     // DC only DCT coefficient

-    vp9_idct8x8_1_add(input, dest, stride);

+    vpx_idct8x8_1_add(input, dest, stride);

   else if (eob <= 12)

-    vp9_idct8x8_12_add(input, dest, stride);

+    vpx_idct8x8_12_add(input, dest, stride);

   else

-    vp9_idct8x8_64_add(input, dest, stride);

+    vpx_idct8x8_64_add(input, dest, stride);

 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,

@@ -161,22 +161,22 @@

    * coefficients. Use eobs to separate different cases. */

   if (eob == 1)

     /* DC only DCT coefficient. */

-    vp9_idct16x16_1_add(input, dest, stride);

+    vpx_idct16x16_1_add(input, dest, stride);

   else if (eob <= 10)

-    vp9_idct16x16_10_add(input, dest, stride);

+    vpx_idct16x16_10_add(input, dest, stride);

   else

-    vp9_idct16x16_256_add(input, dest, stride);

+    vpx_idct16x16_256_add(input, dest, stride);

 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,

                        int eob) {

   if (eob == 1)

-    vp9_idct32x32_1_add(input, dest, stride);

+    vpx_idct32x32_1_add(input, dest, stride);

   else if (eob <= 34)

     // non-zero coeff only in upper-left 8x8

-    vp9_idct32x32_34_add(input, dest, stride);

+    vpx_idct32x32_34_add(input, dest, stride);

   else

-    vp9_idct32x32_1024_add(input, dest, stride);

+    vpx_idct32x32_1024_add(input, dest, stride);

 // iht

@@ -210,10 +210,10 @@

 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,

                                 int stride, int tx_type, int bd) {

   const highbd_transform_2d IHT_4[] = {

-    { vp9_highbd_idct4_c, vp9_highbd_idct4_c  },    // DCT_DCT  = 0

-    { highbd_iadst4_c, vp9_highbd_idct4_c },    // ADST_DCT = 1

-    { vp9_highbd_idct4_c, highbd_iadst4_c },    // DCT_ADST = 2

-    { highbd_iadst4_c, highbd_iadst4_c }    // ADST_ADST = 3

+    { vpx_highbd_idct4_c, vpx_highbd_idct4_c  },    // DCT_DCT  = 0

+    { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },    // ADST_DCT = 1

+    { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },    // DCT_ADST = 2

+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }    // ADST_ADST = 3

};

   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

@@ -242,10 +242,10 @@

 static const highbd_transform_2d HIGH_IHT_8[] = {

-  { vp9_highbd_idct8_c,  vp9_highbd_idct8_c  },  // DCT_DCT  = 0

-  { highbd_iadst8_c, vp9_highbd_idct8_c  },  // ADST_DCT = 1

-  { vp9_highbd_idct8_c,  highbd_iadst8_c },  // DCT_ADST = 2

-  { highbd_iadst8_c, highbd_iadst8_c }   // ADST_ADST = 3

+  { vpx_highbd_idct8_c,  vpx_highbd_idct8_c  },  // DCT_DCT  = 0

+  { vpx_highbd_iadst8_c, vpx_highbd_idct8_c  },  // ADST_DCT = 1

+  { vpx_highbd_idct8_c,  vpx_highbd_iadst8_c },  // DCT_ADST = 2

+  { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }   // ADST_ADST = 3

};

 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,

@@ -277,10 +277,10 @@

 static const highbd_transform_2d HIGH_IHT_16[] = {

-  { vp9_highbd_idct16_c,  vp9_highbd_idct16_c  },  // DCT_DCT  = 0

-  { highbd_iadst16_c, vp9_highbd_idct16_c  },  // ADST_DCT = 1

-  { vp9_highbd_idct16_c,  highbd_iadst16_c },  // DCT_ADST = 2

-  { highbd_iadst16_c, highbd_iadst16_c }   // ADST_ADST = 3

+  { vpx_highbd_idct16_c,  vpx_highbd_idct16_c  },  // DCT_DCT  = 0

+  { vpx_highbd_iadst16_c, vpx_highbd_idct16_c  },  // ADST_DCT = 1

+  { vpx_highbd_idct16_c,  vpx_highbd_iadst16_c },  // DCT_ADST = 2

+  { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }   // ADST_ADST = 3

};

 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,

@@ -315,9 +315,9 @@

 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,

                             int eob, int bd) {

   if (eob > 1)

-    vp9_highbd_idct4x4_16_add(input, dest, stride, bd);

+    vpx_highbd_idct4x4_16_add(input, dest, stride, bd);

   else

-    vp9_highbd_idct4x4_1_add(input, dest, stride, bd);

+    vpx_highbd_idct4x4_1_add(input, dest, stride, bd);

@@ -324,9 +324,9 @@

 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,

                             int eob, int bd) {

   if (eob > 1)

-    vp9_highbd_iwht4x4_16_add(input, dest, stride, bd);

+    vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);

   else

-    vp9_highbd_iwht4x4_1_add(input, dest, stride, bd);

+    vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);

 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,

@@ -340,11 +340,11 @@

   // Combine that with code here.

   // DC only DCT coefficient

   if (eob == 1) {

-    vp9_highbd_idct8x8_1_add(input, dest, stride, bd);

+    vpx_highbd_idct8x8_1_add(input, dest, stride, bd);

   } else if (eob <= 10) {

-    vp9_highbd_idct8x8_10_add(input, dest, stride, bd);

+    vpx_highbd_idct8x8_10_add(input, dest, stride, bd);

   } else {

-    vp9_highbd_idct8x8_64_add(input, dest, stride, bd);

+    vpx_highbd_idct8x8_64_add(input, dest, stride, bd);

@@ -354,11 +354,11 @@

   // coefficients. Use eobs to separate different cases.

   // DC only DCT coefficient.

   if (eob == 1) {

-    vp9_highbd_idct16x16_1_add(input, dest, stride, bd);

+    vpx_highbd_idct16x16_1_add(input, dest, stride, bd);

   } else if (eob <= 10) {

-    vp9_highbd_idct16x16_10_add(input, dest, stride, bd);

+    vpx_highbd_idct16x16_10_add(input, dest, stride, bd);

   } else {

-    vp9_highbd_idct16x16_256_add(input, dest, stride, bd);

+    vpx_highbd_idct16x16_256_add(input, dest, stride, bd);

@@ -366,11 +366,11 @@

                               int stride, int eob, int bd) {

   // Non-zero coeff only in upper-left 8x8

   if (eob == 1) {

-    vp9_highbd_idct32x32_1_add(input, dest, stride, bd);

+    vpx_highbd_idct32x32_1_add(input, dest, stride, bd);

   } else if (eob <= 34) {

-    vp9_highbd_idct32x32_34_add(input, dest, stride, bd);

+    vpx_highbd_idct32x32_34_add(input, dest, stride, bd);

   } else {

-    vp9_highbd_idct32x32_1024_add(input, dest, stride, bd);

+    vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);

--- a/vp9/common/vp9_idct.h

+++ b/vp9/common/vp9_idct.h

@@ -44,8 +44,8 @@

                      int eob);

 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,

                      int eob);

-void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int

-                       eob);

+void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,

+                       int eob);

 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,

                        int eob);

--- a/vp9/vp9_common.mk

+++ b/vp9/vp9_common.mk

@@ -88,8 +88,6 @@

 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c

-VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)

 ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)

 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c

 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c

--- a/vpx_dsp/arm/fwd_txfm_neon.c

+++ b/vpx_dsp/arm/fwd_txfm_neon.c

@@ -170,7 +170,7 @@

   }  // for

-    // from vp9_dct_sse2.c

+    // from vpx_dct_sse2.c

     // Post-condition (division by two)

     //    division of two 16 bits signed numbers using shifts

     //    n / 2 = (n - (n >> 15)) >> 1

--- a/vpx_dsp/arm/idct16x16_1_add_neon.asm

+++ b/vpx_dsp/arm/idct16x16_1_add_neon.asm

@@ -8,7 +8,7 @@

-    EXPORT  |vp9_idct16x16_1_add_neon|

+    EXPORT  |vpx_idct16x16_1_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -15,7 +15,7 @@

     AREA ||.text||, CODE, READONLY, ALIGN=2

-;void vp9_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,

+;void vpx_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,

 ;                                    int dest_stride)

 ; r0  int16_t input

@@ -22,7 +22,7 @@

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_idct16x16_1_add_neon| PROC

+|vpx_idct16x16_1_add_neon| PROC

     ldrsh            r0, [r0]

     ; generate cospi_16_64 = 11585

@@ -193,6 +193,6 @@

     vst1.64          {d31}, [r12], r2

     bx               lr

-    ENDP             ; |vp9_idct16x16_1_add_neon|

+    ENDP             ; |vpx_idct16x16_1_add_neon|

END

--- a/vpx_dsp/arm/idct16x16_1_add_neon.c

+++ b/vpx_dsp/arm/idct16x16_1_add_neon.c

@@ -13,7 +13,7 @@

 #include "vpx_dsp/inv_txfm.h"

 #include "vpx_ports/mem.h"

-void vp9_idct16x16_1_add_neon(

+void vpx_idct16x16_1_add_neon(

         int16_t *input,

         uint8_t *dest,

         int dest_stride) {

--- a/vpx_dsp/arm/idct16x16_add_neon.asm

+++ b/vpx_dsp/arm/idct16x16_add_neon.asm

@@ -8,10 +8,10 @@

 ;  be found in the AUTHORS file in the root of the source tree.

-    EXPORT  |vp9_idct16x16_256_add_neon_pass1|

-    EXPORT  |vp9_idct16x16_256_add_neon_pass2|

-    EXPORT  |vp9_idct16x16_10_add_neon_pass1|

-    EXPORT  |vp9_idct16x16_10_add_neon_pass2|

+    EXPORT  |vpx_idct16x16_256_add_neon_pass1|

+    EXPORT  |vpx_idct16x16_256_add_neon_pass2|

+    EXPORT  |vpx_idct16x16_10_add_neon_pass1|

+    EXPORT  |vpx_idct16x16_10_add_neon_pass2|

ARM

     REQUIRE8

     PRESERVE8

@@ -36,7 +36,7 @@

     MEND

     AREA    Block, CODE, READONLY ; name this block of code

-;void |vp9_idct16x16_256_add_neon_pass1|(int16_t *input,

+;void |vpx_idct16x16_256_add_neon_pass1|(int16_t *input,

 ;                                          int16_t *output, int output_stride)

 ; r0  int16_t input

@@ -46,7 +46,7 @@

 ; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output

 ; will be stored back into q8-q15 registers. This function will touch q0-q7

 ; registers and use them as buffer during calculation.

-|vp9_idct16x16_256_add_neon_pass1| PROC

+|vpx_idct16x16_256_add_neon_pass1| PROC

     ; TODO(hkuang): Find a better way to load the elements.

     ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15

@@ -273,9 +273,9 @@

     vst1.64         {d31}, [r1], r2

     bx              lr

-    ENDP  ; |vp9_idct16x16_256_add_neon_pass1|

+    ENDP  ; |vpx_idct16x16_256_add_neon_pass1|

-;void vp9_idct16x16_256_add_neon_pass2(int16_t *src,

+;void vpx_idct16x16_256_add_neon_pass2(int16_t *src,

 ;                                        int16_t *output,

 ;                                        int16_t *pass1Output,

 ;                                        int16_t skip_adding,

@@ -292,7 +292,7 @@

 ; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output

 ; will be stored back into q8-q15 registers. This function will touch q0-q7

 ; registers and use them as buffer during calculation.

-|vp9_idct16x16_256_add_neon_pass2| PROC

+|vpx_idct16x16_256_add_neon_pass2| PROC

     push            {r3-r9}

     ; TODO(hkuang): Find a better way to load the elements.

@@ -784,9 +784,9 @@

 end_idct16x16_pass2

     pop             {r3-r9}

     bx              lr

-    ENDP  ; |vp9_idct16x16_256_add_neon_pass2|

+    ENDP  ; |vpx_idct16x16_256_add_neon_pass2|

-;void |vp9_idct16x16_10_add_neon_pass1|(int16_t *input,

+;void |vpx_idct16x16_10_add_neon_pass1|(int16_t *input,

 ;                                             int16_t *output, int output_stride)

 ; r0  int16_t input

@@ -796,7 +796,7 @@

 ; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output

 ; will be stored back into q8-q15 registers. This function will touch q0-q7

 ; registers and use them as buffer during calculation.

-|vp9_idct16x16_10_add_neon_pass1| PROC

+|vpx_idct16x16_10_add_neon_pass1| PROC

     ; TODO(hkuang): Find a better way to load the elements.

     ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15

@@ -905,9 +905,9 @@

     vst1.64         {d31}, [r1], r2

     bx              lr

-    ENDP  ; |vp9_idct16x16_10_add_neon_pass1|

+    ENDP  ; |vpx_idct16x16_10_add_neon_pass1|

-;void vp9_idct16x16_10_add_neon_pass2(int16_t *src,

+;void vpx_idct16x16_10_add_neon_pass2(int16_t *src,

 ;                                           int16_t *output,

 ;                                           int16_t *pass1Output,

 ;                                           int16_t skip_adding,

@@ -924,7 +924,7 @@

 ; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output

 ; will be stored back into q8-q15 registers. This function will touch q0-q7

 ; registers and use them as buffer during calculation.

-|vp9_idct16x16_10_add_neon_pass2| PROC

+|vpx_idct16x16_10_add_neon_pass2| PROC

     push            {r3-r9}

     ; TODO(hkuang): Find a better way to load the elements.

@@ -1175,5 +1175,5 @@

 end_idct10_16x16_pass2

     pop             {r3-r9}

     bx              lr

-    ENDP  ; |vp9_idct16x16_10_add_neon_pass2|

+    ENDP  ; |vpx_idct16x16_10_add_neon_pass2|

END

--- a/vpx_dsp/arm/idct16x16_add_neon.c

+++ b/vpx_dsp/arm/idct16x16_add_neon.c

@@ -82,7 +82,7 @@

     return;

-void vp9_idct16x16_256_add_neon_pass1(

+void vpx_idct16x16_256_add_neon_pass1(

         int16_t *in,

         int16_t *out,

         int output_stride) {

@@ -320,7 +320,7 @@

     return;

-void vp9_idct16x16_256_add_neon_pass2(

+void vpx_idct16x16_256_add_neon_pass2(

         int16_t *src,

         int16_t *out,

         int16_t *pass1Output,

@@ -879,7 +879,7 @@

     return;

-void vp9_idct16x16_10_add_neon_pass1(

+void vpx_idct16x16_10_add_neon_pass1(

         int16_t *in,

         int16_t *out,

         int output_stride) {

@@ -1017,7 +1017,7 @@

     return;

-void vp9_idct16x16_10_add_neon_pass2(

+void vpx_idct16x16_10_add_neon_pass2(

         int16_t *src,

         int16_t *out,

         int16_t *pass1Output,

--- a/vpx_dsp/arm/idct16x16_neon.c

+++ b/vpx_dsp/arm/idct16x16_neon.c

@@ -10,19 +10,19 @@

 #include "vpx_dsp/vpx_dsp_common.h"

-void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,

+void vpx_idct16x16_256_add_neon_pass1(const int16_t *input,

                                       int16_t *output,

                                       int output_stride);

-void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,

+void vpx_idct16x16_256_add_neon_pass2(const int16_t *src,

                                       int16_t *output,

                                       int16_t *pass1Output,

                                       int16_t skip_adding,

                                       uint8_t *dest,

                                       int dest_stride);

-void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,

+void vpx_idct16x16_10_add_neon_pass1(const int16_t *input,

                                      int16_t *output,

                                      int output_stride);

-void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,

+void vpx_idct16x16_10_add_neon_pass2(const int16_t *src,

                                      int16_t *output,

                                      int16_t *pass1Output,

                                      int16_t skip_adding,

@@ -31,11 +31,11 @@

 #if HAVE_NEON_ASM

 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */

-extern void vp9_push_neon(int64_t *store);

-extern void vp9_pop_neon(int64_t *store);

+extern void vpx_push_neon(int64_t *store);

+extern void vpx_pop_neon(int64_t *store);

 #endif  // HAVE_NEON_ASM

-void vp9_idct16x16_256_add_neon(const int16_t *input,

+void vpx_idct16x16_256_add_neon(const int16_t *input,

                                 uint8_t *dest, int dest_stride) {

 #if HAVE_NEON_ASM

   int64_t store_reg[8];

@@ -45,18 +45,18 @@

 #if HAVE_NEON_ASM

   // save d8-d15 register values.

-  vp9_push_neon(store_reg);

+  vpx_push_neon(store_reg);

 #endif

   /* Parallel idct on the upper 8 rows */

   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   // stage 6 result in pass1_output.

-  vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);

+  vpx_idct16x16_256_add_neon_pass1(input, pass1_output, 8);

   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   // with result in pass1(pass1_output) to calculate final result in stage 7

   // which will be saved into row_idct_output.

-  vp9_idct16x16_256_add_neon_pass2(input+1,

+  vpx_idct16x16_256_add_neon_pass2(input+1,

                                      row_idct_output,

                                      pass1_output,

0,

@@ -66,12 +66,12 @@

   /* Parallel idct on the lower 8 rows */

   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   // stage 6 result in pass1_output.

-  vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);

+  vpx_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);

   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   // with result in pass1(pass1_output) to calculate final result in stage 7

   // which will be saved into row_idct_output.

-  vp9_idct16x16_256_add_neon_pass2(input+8*16+1,

+  vpx_idct16x16_256_add_neon_pass2(input+8*16+1,

                                      row_idct_output+8,

                                      pass1_output,

0,

@@ -81,12 +81,12 @@

   /* Parallel idct on the left 8 columns */

   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   // stage 6 result in pass1_output.

-  vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

+  vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   // with result in pass1(pass1_output) to calculate final result in stage 7.

   // Then add the result to the destination data.

-  vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,

+  vpx_idct16x16_256_add_neon_pass2(row_idct_output+1,

                                      row_idct_output,

                                      pass1_output,

1,

@@ -96,12 +96,12 @@

   /* Parallel idct on the right 8 columns */

   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   // stage 6 result in pass1_output.

-  vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

+  vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   // with result in pass1(pass1_output) to calculate final result in stage 7.

   // Then add the result to the destination data.

-  vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

+  vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

                                      row_idct_output+8,

                                      pass1_output,

1,

@@ -110,13 +110,13 @@

 #if HAVE_NEON_ASM

   // restore d8-d15 register values.

-  vp9_pop_neon(store_reg);

+  vpx_pop_neon(store_reg);

 #endif

   return;

-void vp9_idct16x16_10_add_neon(const int16_t *input,

+void vpx_idct16x16_10_add_neon(const int16_t *input,

                                uint8_t *dest, int dest_stride) {

 #if HAVE_NEON_ASM

   int64_t store_reg[8];

@@ -126,18 +126,18 @@

 #if HAVE_NEON_ASM

   // save d8-d15 register values.

-  vp9_push_neon(store_reg);

+  vpx_push_neon(store_reg);

 #endif

   /* Parallel idct on the upper 8 rows */

   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   // stage 6 result in pass1_output.

-  vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);

+  vpx_idct16x16_10_add_neon_pass1(input, pass1_output, 8);

   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   // with result in pass1(pass1_output) to calculate final result in stage 7

   // which will be saved into row_idct_output.

-  vp9_idct16x16_10_add_neon_pass2(input+1,

+  vpx_idct16x16_10_add_neon_pass2(input+1,

                                         row_idct_output,

                                         pass1_output,

0,

@@ -149,12 +149,12 @@

   /* Parallel idct on the left 8 columns */

   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   // stage 6 result in pass1_output.

-  vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

+  vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   // with result in pass1(pass1_output) to calculate final result in stage 7.

   // Then add the result to the destination data.

-  vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,

+  vpx_idct16x16_256_add_neon_pass2(row_idct_output+1,

                                      row_idct_output,

                                      pass1_output,

1,

@@ -164,12 +164,12 @@

   /* Parallel idct on the right 8 columns */

   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   // stage 6 result in pass1_output.

-  vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

+  vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   // with result in pass1(pass1_output) to calculate final result in stage 7.

   // Then add the result to the destination data.

-  vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

+  vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

                                      row_idct_output+8,

                                      pass1_output,

1,

@@ -178,7 +178,7 @@

 #if HAVE_NEON_ASM

   // restore d8-d15 register values.

-  vp9_pop_neon(store_reg);

+  vpx_pop_neon(store_reg);

 #endif

   return;

--- a/vpx_dsp/arm/idct32x32_1_add_neon.asm

+++ b/vpx_dsp/arm/idct32x32_1_add_neon.asm

@@ -7,7 +7,7 @@

 ;  file in the root of the source tree.

-    EXPORT  |vp9_idct32x32_1_add_neon|

+    EXPORT  |vpx_idct32x32_1_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -64,7 +64,7 @@

     vst1.8           {q15},[$dst], $stride

     MEND

-;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest,

+;void vpx_idct32x32_1_add_neon(int16_t *input, uint8_t *dest,

 ;                              int dest_stride)

 ; r0  int16_t input

@@ -71,7 +71,7 @@

 ; r1  uint8_t *dest

 ; r2  int dest_stride

-|vp9_idct32x32_1_add_neon| PROC

+|vpx_idct32x32_1_add_neon| PROC

     push             {lr}

     pld              [r1]

     add              r3, r1, #16               ; r3 dest + 16 for second loop

@@ -140,5 +140,5 @@

     bne              diff_positive_32_32_loop

     pop              {pc}

-    ENDP             ; |vp9_idct32x32_1_add_neon|

+    ENDP             ; |vpx_idct32x32_1_add_neon|

END

--- a/vpx_dsp/arm/idct32x32_1_add_neon.c

+++ b/vpx_dsp/arm/idct32x32_1_add_neon.c

@@ -115,7 +115,7 @@

     return;

-void vp9_idct32x32_1_add_neon(

+void vpx_idct32x32_1_add_neon(

         int16_t *input,

         uint8_t *dest,

         int dest_stride) {

--- a/vpx_dsp/arm/idct32x32_add_neon.asm

+++ b/vpx_dsp/arm/idct32x32_add_neon.asm

@@ -43,7 +43,7 @@

 cospi_31_64 EQU   804

-    EXPORT  |vp9_idct32x32_1024_add_neon|

+    EXPORT  |vpx_idct32x32_1024_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -288,7 +288,7 @@

     MEND

     ; --------------------------------------------------------------------------

-;void vp9_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride);

+;void vpx_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride);

 ;   r0  int16_t *input,

 ;   r1  uint8_t *dest,

@@ -303,7 +303,7 @@

 ;   r9  dest + 15 * dest_stride, descending (14, 13, 12, ...)

 ;   r10 dest + 16 * dest_stride, ascending  (17, 18, 19, ...)

-|vp9_idct32x32_1024_add_neon| PROC

+|vpx_idct32x32_1024_add_neon| PROC

     ; This function does one pass of idct32x32 transform.

     ; This is done by transposing the input and then doing a 1d transform on

@@ -1295,5 +1295,5 @@

     vpop {d8-d15}

     pop  {r4-r11}

     bx              lr

-    ENDP  ; |vp9_idct32x32_1024_add_neon|

+    ENDP  ; |vpx_idct32x32_1024_add_neon|

END

--- a/vpx_dsp/arm/idct32x32_add_neon.c

+++ b/vpx_dsp/arm/idct32x32_add_neon.c

@@ -454,7 +454,7 @@

     return;

-void vp9_idct32x32_1024_add_neon(

+void vpx_idct32x32_1024_add_neon(

         int16_t *input,

         uint8_t *dest,

         int stride) {

--- a/vpx_dsp/arm/idct4x4_1_add_neon.asm

+++ b/vpx_dsp/arm/idct4x4_1_add_neon.asm

@@ -8,7 +8,7 @@

-    EXPORT  |vp9_idct4x4_1_add_neon|

+    EXPORT  |vpx_idct4x4_1_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -15,7 +15,7 @@

     AREA ||.text||, CODE, READONLY, ALIGN=2

-;void vp9_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,

+;void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,

 ;                                  int dest_stride)

 ; r0  int16_t input

@@ -22,7 +22,7 @@

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_idct4x4_1_add_neon| PROC

+|vpx_idct4x4_1_add_neon| PROC

     ldrsh            r0, [r0]

     ; generate cospi_16_64 = 11585

@@ -63,6 +63,6 @@

     vst1.32          {d7[1]}, [r12]

     bx               lr

-    ENDP             ; |vp9_idct4x4_1_add_neon|

+    ENDP             ; |vpx_idct4x4_1_add_neon|

END

--- a/vpx_dsp/arm/idct4x4_1_add_neon.c

+++ b/vpx_dsp/arm/idct4x4_1_add_neon.c

@@ -13,7 +13,7 @@

 #include "vpx_dsp/inv_txfm.h"

 #include "vpx_ports/mem.h"

-void vp9_idct4x4_1_add_neon(

+void vpx_idct4x4_1_add_neon(

         int16_t *input,

         uint8_t *dest,

         int dest_stride) {

--- a/vpx_dsp/arm/idct4x4_add_neon.asm

+++ b/vpx_dsp/arm/idct4x4_add_neon.asm

@@ -8,7 +8,7 @@

 ;  be found in the AUTHORS file in the root of the source tree.

-    EXPORT  |vp9_idct4x4_16_add_neon|

+    EXPORT  |vpx_idct4x4_16_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -16,13 +16,13 @@

     AREA ||.text||, CODE, READONLY, ALIGN=2

     AREA     Block, CODE, READONLY ; name this block of code

-;void vp9_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

+;void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

 ; r0  int16_t input

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_idct4x4_16_add_neon| PROC

+|vpx_idct4x4_16_add_neon| PROC

     ; The 2D transform is done with two passes which are actually pretty

     ; similar. We first transform the rows. This is done by transposing

@@ -185,6 +185,6 @@

     vst1.32 {d26[1]}, [r1], r2

     vst1.32 {d26[0]}, [r1]  ; no post-increment

     bx              lr

-    ENDP  ; |vp9_idct4x4_16_add_neon|

+    ENDP  ; |vpx_idct4x4_16_add_neon|

END

--- a/vpx_dsp/arm/idct4x4_add_neon.c

+++ b/vpx_dsp/arm/idct4x4_add_neon.c

@@ -10,7 +10,7 @@

 #include <arm_neon.h>

-void vp9_idct4x4_16_add_neon(

+void vpx_idct4x4_16_add_neon(

         int16_t *input,

         uint8_t *dest,

         int dest_stride) {

--- a/vpx_dsp/arm/idct8x8_1_add_neon.asm

+++ b/vpx_dsp/arm/idct8x8_1_add_neon.asm

@@ -8,7 +8,7 @@

-    EXPORT  |vp9_idct8x8_1_add_neon|

+    EXPORT  |vpx_idct8x8_1_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -15,7 +15,7 @@

     AREA ||.text||, CODE, READONLY, ALIGN=2

-;void vp9_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,

+;void vpx_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,

 ;                                  int dest_stride)

 ; r0  int16_t input

@@ -22,7 +22,7 @@

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_idct8x8_1_add_neon| PROC

+|vpx_idct8x8_1_add_neon| PROC

     ldrsh            r0, [r0]

     ; generate cospi_16_64 = 11585

@@ -83,6 +83,6 @@

     vst1.64          {d31}, [r12], r2

     bx               lr

-    ENDP             ; |vp9_idct8x8_1_add_neon|

+    ENDP             ; |vpx_idct8x8_1_add_neon|

END

--- a/vpx_dsp/arm/idct8x8_1_add_neon.c

+++ b/vpx_dsp/arm/idct8x8_1_add_neon.c

@@ -13,7 +13,7 @@

 #include "vpx_dsp/inv_txfm.h"

 #include "vpx_ports/mem.h"

-void vp9_idct8x8_1_add_neon(

+void vpx_idct8x8_1_add_neon(

         int16_t *input,

         uint8_t *dest,

         int dest_stride) {

--- a/vpx_dsp/arm/idct8x8_add_neon.asm

+++ b/vpx_dsp/arm/idct8x8_add_neon.asm

@@ -8,8 +8,8 @@

 ;  be found in the AUTHORS file in the root of the source tree.

-    EXPORT  |vp9_idct8x8_64_add_neon|

-    EXPORT  |vp9_idct8x8_12_add_neon|

+    EXPORT  |vpx_idct8x8_64_add_neon|

+    EXPORT  |vpx_idct8x8_12_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -198,13 +198,13 @@

     MEND

     AREA    Block, CODE, READONLY ; name this block of code

-;void vp9_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

+;void vpx_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

 ; r0  int16_t input

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_idct8x8_64_add_neon| PROC

+|vpx_idct8x8_64_add_neon| PROC

     push            {r4-r9}

     vpush           {d8-d15}

     vld1.s16        {q8,q9}, [r0]!

@@ -308,15 +308,15 @@

     vpop            {d8-d15}

     pop             {r4-r9}

     bx              lr

-    ENDP  ; |vp9_idct8x8_64_add_neon|

+    ENDP  ; |vpx_idct8x8_64_add_neon|

-;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

+;void vpx_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

 ; r0  int16_t input

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_idct8x8_12_add_neon| PROC

+|vpx_idct8x8_12_add_neon| PROC

     push            {r4-r9}

     vpush           {d8-d15}

     vld1.s16        {q8,q9}, [r0]!

@@ -514,6 +514,6 @@

     vpop            {d8-d15}

     pop             {r4-r9}

     bx              lr

-    ENDP  ; |vp9_idct8x8_12_add_neon|

+    ENDP  ; |vpx_idct8x8_12_add_neon|

END

--- a/vpx_dsp/arm/idct8x8_add_neon.c

+++ b/vpx_dsp/arm/idct8x8_add_neon.c

@@ -238,7 +238,7 @@

     return;

-void vp9_idct8x8_64_add_neon(

+void vpx_idct8x8_64_add_neon(

         int16_t *input,

         uint8_t *dest,

         int dest_stride) {

@@ -351,7 +351,7 @@

     return;

-void vp9_idct8x8_12_add_neon(

+void vpx_idct8x8_12_add_neon(

         int16_t *input,

         uint8_t *dest,

         int dest_stride) {

--- /dev/null

+++ b/vpx_dsp/arm/save_reg_neon.asm

@@ -1,0 +1,36 @@

+;

+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

+;

+;  Use of this source code is governed by a BSD-style license

+;  that can be found in the LICENSE file in the root of the source

+;  tree. An additional intellectual property rights grant can be found

+;  in the file PATENTS.  All contributing project authors may

+;  be found in the AUTHORS file in the root of the source tree.

+;

+    EXPORT  |vpx_push_neon|

+    EXPORT  |vpx_pop_neon|

+    ARM

+    REQUIRE8

+    PRESERVE8

+    AREA ||.text||, CODE, READONLY, ALIGN=2

+|vpx_push_neon| PROC

+    vst1.i64            {d8, d9, d10, d11}, [r0]!

+    vst1.i64            {d12, d13, d14, d15}, [r0]!

+    bx              lr

+    ENDP

+|vpx_pop_neon| PROC

+    vld1.i64            {d8, d9, d10, d11}, [r0]!

+    vld1.i64            {d12, d13, d14, d15}, [r0]!

+    bx              lr

+    ENDP

+    END

--- a/vpx_dsp/fwd_txfm.c

+++ b/vpx_dsp/fwd_txfm.c

@@ -754,7 +754,7 @@

     for (j = 0; j < 32; ++j)

       // TODO(cd): see quality impact of only doing

       //           output[j * 32 + i] = (temp_out[j] + 1) >> 2;

-      //           PS: also change code in vp9/encoder/x86/vp9_dct_sse2.c

+      //           PS: also change code in vpx_dsp/x86/vpx_dct_sse2.c

       output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;

--- a/vpx_dsp/inv_txfm.c

+++ b/vpx_dsp/inv_txfm.c

@@ -13,7 +13,7 @@

 #include "vpx_dsp/inv_txfm.h"

-void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

+void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,

    0.5 shifts per pixel. */

   int i;

@@ -65,7 +65,7 @@

-void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {

+void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {

   int i;

   tran_high_t a1, e1;

   tran_low_t tmp[4];

@@ -111,7 +111,7 @@

   output[3] = WRAPLOW(step[0] - step[3], 8);

-void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

+void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

   tran_low_t out[4 * 4];

   tran_low_t *outptr = out;

   int i, j;

@@ -136,7 +136,7 @@

-void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,

+void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,

                          int dest_stride) {

   int i;

   tran_high_t a1;

@@ -198,7 +198,7 @@

   output[7] = WRAPLOW(step1[0] - step1[7], 8);

-void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

+void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

   tran_low_t out[8 * 8];

   tran_low_t *outptr = out;

   int i, j;

@@ -223,7 +223,7 @@

-void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

+void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

   int i, j;

   tran_high_t a1;

   tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);

@@ -350,7 +350,7 @@

   output[7] = WRAPLOW(-x1, 8);

-void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

+void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

   tran_low_t out[8 * 8] = { 0 };

   tran_low_t *outptr = out;

   int i, j;

@@ -541,7 +541,7 @@

   output[15] = WRAPLOW(step2[0] - step2[15], 8);

-void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,

+void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,

                              int stride) {

   tran_low_t out[16 * 16];

   tran_low_t *outptr = out;

@@ -739,7 +739,7 @@

   output[15] = WRAPLOW(-x1, 8);

-void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,

+void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,

                             int stride) {

   tran_low_t out[16 * 16] = { 0 };

   tran_low_t *outptr = out;

@@ -766,7 +766,7 @@

-void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

+void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

   int i, j;

   tran_high_t a1;

   tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);

@@ -1146,7 +1146,7 @@

   output[31] = WRAPLOW(step1[0] - step1[31], 8);

-void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,

+void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,

                               int stride) {

   tran_low_t out[32 * 32];

   tran_low_t *outptr = out;

@@ -1185,7 +1185,7 @@

-void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,

+void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,

                             int stride) {

   tran_low_t out[32 * 32] = {0};

   tran_low_t *outptr = out;

@@ -1212,7 +1212,7 @@

-void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

+void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {

   int i, j;

   tran_high_t a1;

@@ -1228,7 +1228,7 @@

 #if CONFIG_VP9_HIGHBITDEPTH

-void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,

                                  int stride, int bd) {

   /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,

      0.5 shifts per pixel. */

@@ -1282,7 +1282,7 @@

-void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,

+void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,

                                 int dest_stride, int bd) {

   int i;

   tran_high_t a1, e1;

@@ -1315,7 +1315,7 @@

-void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {

+void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {

   tran_low_t step[4];

   tran_high_t temp1, temp2;

   (void) bd;

@@ -1336,7 +1336,7 @@

   output[3] = WRAPLOW(step[0] - step[3], bd);

-void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,

                                  int stride, int bd) {

   tran_low_t out[4 * 4];

   tran_low_t *outptr = out;

@@ -1346,7 +1346,7 @@

   // Rows

   for (i = 0; i < 4; ++i) {

-    vp9_highbd_idct4_c(input, outptr, bd);

+    vpx_highbd_idct4_c(input, outptr, bd);

     input += 4;

     outptr += 4;

@@ -1355,7 +1355,7 @@

   for (i = 0; i < 4; ++i) {

     for (j = 0; j < 4; ++j)

       temp_in[j] = out[j * 4 + i];

-    vp9_highbd_idct4_c(temp_in, temp_out, bd);

+    vpx_highbd_idct4_c(temp_in, temp_out, bd);

     for (j = 0; j < 4; ++j) {

       dest[j * stride + i] = highbd_clip_pixel_add(

           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);

@@ -1363,7 +1363,7 @@

-void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,

                                 int dest_stride, int bd) {

   int i;

   tran_high_t a1;

@@ -1383,7 +1383,7 @@

-void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {

+void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {

   tran_low_t step1[8], step2[8];

   tran_high_t temp1, temp2;

   // stage 1

@@ -1401,7 +1401,7 @@

   step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);

   // stage 2 & stage 3 - even half

-  vp9_highbd_idct4_c(step1, step1, bd);

+  vpx_highbd_idct4_c(step1, step1, bd);

   // stage 2 - odd half

   step2[4] = WRAPLOW(step1[4] + step1[5], bd);

@@ -1428,7 +1428,7 @@

   output[7] = WRAPLOW(step1[0] - step1[7], bd);

-void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,

                                  int stride, int bd) {

   tran_low_t out[8 * 8];

   tran_low_t *outptr = out;

@@ -1438,7 +1438,7 @@

   // First transform rows.

   for (i = 0; i < 8; ++i) {

-    vp9_highbd_idct8_c(input, outptr, bd);

+    vpx_highbd_idct8_c(input, outptr, bd);

     input += 8;

     outptr += 8;

@@ -1447,7 +1447,7 @@

   for (i = 0; i < 8; ++i) {

     for (j = 0; j < 8; ++j)

       temp_in[j] = out[j * 8 + i];

-    vp9_highbd_idct8_c(temp_in, temp_out, bd);

+    vpx_highbd_idct8_c(temp_in, temp_out, bd);

     for (j = 0; j < 8; ++j) {

       dest[j * stride + i] = highbd_clip_pixel_add(

           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);

@@ -1455,7 +1455,7 @@

-void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,

                                 int stride, int bd) {

   int i, j;

   tran_high_t a1;

@@ -1471,7 +1471,7 @@

-void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {

+void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {

   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;

   tran_low_t x0 = input[0];

@@ -1509,7 +1509,7 @@

   output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd);

-void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {

+void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {

   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;

   tran_low_t x0 = input[7];

@@ -1586,7 +1586,7 @@

   output[7] = WRAPLOW(-x1, bd);

-void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,

                                  int stride, int bd) {

   tran_low_t out[8 * 8] = { 0 };

   tran_low_t *outptr = out;

@@ -1597,7 +1597,7 @@

   // First transform rows.

   // Only first 4 row has non-zero coefs.

   for (i = 0; i < 4; ++i) {

-    vp9_highbd_idct8_c(input, outptr, bd);

+    vpx_highbd_idct8_c(input, outptr, bd);

     input += 8;

     outptr += 8;

@@ -1605,7 +1605,7 @@

   for (i = 0; i < 8; ++i) {

     for (j = 0; j < 8; ++j)

       temp_in[j] = out[j * 8 + i];

-    vp9_highbd_idct8_c(temp_in, temp_out, bd);

+    vpx_highbd_idct8_c(temp_in, temp_out, bd);

     for (j = 0; j < 8; ++j) {

       dest[j * stride + i] = highbd_clip_pixel_add(

           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);

@@ -1613,7 +1613,7 @@

-void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {

+void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {

   tran_low_t step1[16], step2[16];

   tran_high_t temp1, temp2;

   (void) bd;

@@ -1779,7 +1779,7 @@

   output[15] = WRAPLOW(step2[0] - step2[15], bd);

-void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,

                                     int stride, int bd) {

   tran_low_t out[16 * 16];

   tran_low_t *outptr = out;

@@ -1789,7 +1789,7 @@

   // First transform rows.

   for (i = 0; i < 16; ++i) {

-    vp9_highbd_idct16_c(input, outptr, bd);

+    vpx_highbd_idct16_c(input, outptr, bd);

     input += 16;

     outptr += 16;

@@ -1798,7 +1798,7 @@

   for (i = 0; i < 16; ++i) {

     for (j = 0; j < 16; ++j)

       temp_in[j] = out[j * 16 + i];

-    vp9_highbd_idct16_c(temp_in, temp_out, bd);

+    vpx_highbd_idct16_c(temp_in, temp_out, bd);

     for (j = 0; j < 16; ++j) {

       dest[j * stride + i] = highbd_clip_pixel_add(

           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);

@@ -1806,7 +1806,7 @@

-void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {

+void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {

   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;

   tran_high_t s9, s10, s11, s12, s13, s14, s15;

@@ -1976,7 +1976,7 @@

   output[15] = WRAPLOW(-x1, bd);

-void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,

                                    int stride, int bd) {

   tran_low_t out[16 * 16] = { 0 };

   tran_low_t *outptr = out;

@@ -1987,7 +1987,7 @@

   // First transform rows. Since all non-zero dct coefficients are in

   // upper-left 4x4 area, we only need to calculate first 4 rows here.

   for (i = 0; i < 4; ++i) {

-    vp9_highbd_idct16_c(input, outptr, bd);

+    vpx_highbd_idct16_c(input, outptr, bd);

     input += 16;

     outptr += 16;

@@ -1996,7 +1996,7 @@

   for (i = 0; i < 16; ++i) {

     for (j = 0; j < 16; ++j)

       temp_in[j] = out[j*16 + i];

-    vp9_highbd_idct16_c(temp_in, temp_out, bd);

+    vpx_highbd_idct16_c(temp_in, temp_out, bd);

     for (j = 0; j < 16; ++j) {

       dest[j * stride + i] = highbd_clip_pixel_add(

           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);

@@ -2004,7 +2004,7 @@

-void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,

                                   int stride, int bd) {

   int i, j;

   tran_high_t a1;

@@ -2021,7 +2021,8 @@

-void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) {

+static void highbd_idct32_c(const tran_low_t *input,

+                            tran_low_t *output, int bd) {

   tran_low_t step1[32], step2[32];

   tran_high_t temp1, temp2;

   (void) bd;

@@ -2389,7 +2390,7 @@

   output[31] = WRAPLOW(step1[0] - step1[31], bd);

-void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,

                                      int stride, int bd) {

   tran_low_t out[32 * 32];

   tran_low_t *outptr = out;

@@ -2429,7 +2430,7 @@

-void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,

                                    int stride, int bd) {

   tran_low_t out[32 * 32] = {0};

   tran_low_t *outptr = out;

@@ -2456,7 +2457,7 @@

-void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,

                                   int stride, int bd) {

   int i, j;

   int a1;

--- a/vpx_dsp/inv_txfm.h

+++ b/vpx_dsp/inv_txfm.h

@@ -97,14 +97,13 @@

 void iadst16_c(const tran_low_t *input, tran_low_t *output);

 #if CONFIG_VP9_HIGHBITDEPTH

-void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);

-void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);

-void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);

-void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd);

+void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);

+void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);

+void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);

-void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);

-void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);

-void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);

+void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);

+void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);

+void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);

 static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,

                                              int bd) {

--- a/vpx_dsp/mips/idct16x16_msa.c

+++ b/vpx_dsp/mips/idct16x16_msa.c

@@ -10,7 +10,7 @@

 #include "vpx_dsp/mips/inv_txfm_msa.h"

-void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {

+void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {

   v8i16 loc0, loc1, loc2, loc3;

   v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14;

   v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15;

@@ -103,7 +103,7 @@

   ST_SH8(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, (output + 8), 16);

-void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

+void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

                                       int32_t dst_stride) {

   v8i16 loc0, loc1, loc2, loc3;

   v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14;

@@ -201,7 +201,7 @@

   VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15);

-void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst,

                                int32_t dst_stride) {

   int32_t i;

   DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]);

@@ -210,18 +210,18 @@

   /* transform rows */

   for (i = 0; i < 2; ++i) {

     /* process 16 * 8 block */

-    vp9_idct16_1d_rows_msa((input + (i << 7)), (out + (i << 7)));

+    vpx_idct16_1d_rows_msa((input + (i << 7)), (out + (i << 7)));

   /* transform columns */

   for (i = 0; i < 2; ++i) {

     /* process 8 * 16 block */

-    vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),

+    vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),

                                      dst_stride);

-void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst,

                               int32_t dst_stride) {

   uint8_t i;

   DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]);

@@ -228,7 +228,7 @@

   int16_t *out = out_arr;

   /* process 16 * 8 block */

-  vp9_idct16_1d_rows_msa(input, out);

+  vpx_idct16_1d_rows_msa(input, out);

   /* short case just considers top 4 rows as valid output */

   out += 4 * 16;

@@ -255,12 +255,12 @@

   /* transform columns */

   for (i = 0; i < 2; ++i) {

     /* process 8 * 16 block */

-    vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),

+    vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),

                                      dst_stride);

-void vp9_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst,

                              int32_t dst_stride) {

   uint8_t i;

   int16_t out;

@@ -290,7 +290,7 @@

-void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {

+void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {

   v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;

   v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;

@@ -321,7 +321,7 @@

   ST_SH8(l8, l9, l10, l11, l12, l13, l14, l15, (output + 8), 16);

-void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

+void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

                                        int32_t dst_stride) {

   v8i16 v0, v2, v4, v6, k0, k1, k2, k3;

   v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;

--- a/vpx_dsp/mips/idct32x32_msa.c

+++ b/vpx_dsp/mips/idct32x32_msa.c

@@ -10,8 +10,8 @@

 #include "vpx_dsp/mips/inv_txfm_msa.h"

-static void vp9_idct32x8_row_transpose_store(const int16_t *input,

-                                             int16_t *tmp_buf) {

+static void idct32x8_row_transpose_store(const int16_t *input,

+                                         int16_t *tmp_buf) {

   v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;

   /* 1st & 2nd 8x8 */

@@ -38,8 +38,8 @@

   ST_SH4(m6, n6, m7, n7, (tmp_buf + 28 * 8), 8);

-static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf,

-                                                int16_t *tmp_eve_buf) {

+static void idct32x8_row_even_process_store(int16_t *tmp_buf,

+                                            int16_t *tmp_eve_buf) {

   v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;

   v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;

   v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;

@@ -122,8 +122,8 @@

   ST_SH(loc3, (tmp_eve_buf + 7 * 8));

-static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf,

-                                               int16_t *tmp_odd_buf) {

+static void idct32x8_row_odd_process_store(int16_t *tmp_buf,

+                                           int16_t *tmp_odd_buf) {

   v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;

   v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;

@@ -240,10 +240,10 @@

   ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);

-static void vp9_idct_butterfly_transpose_store(int16_t *tmp_buf,

-                                               int16_t *tmp_eve_buf,

-                                               int16_t *tmp_odd_buf,

-                                               int16_t *dst) {

+static void idct_butterfly_transpose_store(int16_t *tmp_buf,

+                                           int16_t *tmp_eve_buf,

+                                           int16_t *tmp_odd_buf,

+                                           int16_t *dst) {

   v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;

   v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;

@@ -341,20 +341,20 @@

   ST_SH4(m6, n6, m7, n7, (dst + 24 + 4 * 32), 32);

-static void vp9_idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) {

+static void idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) {

   DECLARE_ALIGNED(32, int16_t, tmp_buf[8 * 32]);

   DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]);

   DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]);

-  vp9_idct32x8_row_transpose_store(input, &tmp_buf[0]);

-  vp9_idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]);

-  vp9_idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]);

-  vp9_idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0],

-                                     &tmp_odd_buf[0], output);

+  idct32x8_row_transpose_store(input, &tmp_buf[0]);

+  idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]);

+  idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]);

+  idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0],

+                                 &tmp_odd_buf[0], output);

-static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf,

-                                                   int16_t *tmp_eve_buf) {

+static void idct8x32_column_even_process_store(int16_t *tmp_buf,

+                                               int16_t *tmp_eve_buf) {

   v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;

   v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;

   v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;

@@ -433,8 +433,8 @@

   ST_SH2(loc2, loc0, (tmp_eve_buf + 8 * 8), 8);

-static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,

-                                                  int16_t *tmp_odd_buf) {

+static void idct8x32_column_odd_process_store(int16_t *tmp_buf,

+                                              int16_t *tmp_odd_buf) {

   v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;

   v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;

@@ -540,10 +540,10 @@

   ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);

-static void vp9_idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,

-                                                 int16_t *tmp_odd_buf,

-                                                 uint8_t *dst,

-                                                 int32_t dst_stride) {

+static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,

+                                             int16_t *tmp_odd_buf,

+                                             uint8_t *dst,

+                                             int32_t dst_stride) {

   v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;

   v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;

@@ -627,18 +627,18 @@

                       n1, n3, n5, n7);

-static void vp9_idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

-                                               int32_t dst_stride) {

+static void idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

+                                           int32_t dst_stride) {

   DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]);

   DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]);

-  vp9_idct8x32_column_even_process_store(input, &tmp_eve_buf[0]);

-  vp9_idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]);

-  vp9_idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0],

-                                       dst, dst_stride);

+  idct8x32_column_even_process_store(input, &tmp_eve_buf[0]);

+  idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]);

+  idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0],

+                                   dst, dst_stride);

-void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst,

                                 int32_t dst_stride) {

   int32_t i;

   DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]);

@@ -647,18 +647,18 @@

   /* transform rows */

   for (i = 0; i < 4; ++i) {

     /* process 32 * 8 block */

-    vp9_idct32x8_1d_rows_msa((input + (i << 8)), (out_ptr + (i << 8)));

+    idct32x8_1d_rows_msa((input + (i << 8)), (out_ptr + (i << 8)));

   /* transform columns */

   for (i = 0; i < 4; ++i) {

     /* process 8 * 32 block */

-    vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),

-                                       dst_stride);

+    idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),

+                                   dst_stride);

-void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst,

                               int32_t dst_stride) {

   int32_t i;

   DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]);

@@ -693,17 +693,17 @@

   out_ptr = out_arr;

   /* rows: only upper-left 8x8 has non-zero coeff */

-  vp9_idct32x8_1d_rows_msa(input, out_ptr);

+  idct32x8_1d_rows_msa(input, out_ptr);

   /* transform columns */

   for (i = 0; i < 4; ++i) {

     /* process 8 * 32 block */

-    vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),

-                                       dst_stride);

+    idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),

+                                   dst_stride);

-void vp9_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst,

                              int32_t dst_stride) {

   int32_t i;

   int16_t out;

--- a/vpx_dsp/mips/idct4x4_msa.c

+++ b/vpx_dsp/mips/idct4x4_msa.c

@@ -10,7 +10,7 @@

 #include "vpx_dsp/mips/inv_txfm_msa.h"

-void vp9_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst,

                             int32_t dst_stride) {

   v8i16 in0, in1, in2, in3;

   v4i32 in0_r, in1_r, in2_r, in3_r, in4_r;

@@ -47,7 +47,7 @@

   ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride);

-void vp9_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst,

                            int32_t dst_stride) {

   int16_t a1, e1;

   v8i16 in1, in0 = { 0 };

@@ -67,7 +67,7 @@

   ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride);

-void vp9_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst,

                             int32_t dst_stride) {

   v8i16 in0, in1, in2, in3;

@@ -84,7 +84,7 @@

   ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);

-void vp9_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst,

                            int32_t dst_stride) {

   int16_t out;

   v8i16 vec;

--- a/vpx_dsp/mips/idct8x8_msa.c

+++ b/vpx_dsp/mips/idct8x8_msa.c

@@ -10,7 +10,7 @@

 #include "vpx_dsp/mips/inv_txfm_msa.h"

-void vp9_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,

                             int32_t dst_stride) {

   v8i16 in0, in1, in2, in3, in4, in5, in6, in7;

@@ -38,7 +38,7 @@

   VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);

-void vp9_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,

                             int32_t dst_stride) {

   v8i16 in0, in1, in2, in3, in4, in5, in6, in7;

   v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;

@@ -99,7 +99,7 @@

   VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);

-void vp9_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,

+void vpx_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,

                            int32_t dst_stride) {

   int16_t out;

   int32_t val;

--- a/vpx_dsp/mips/inv_txfm_dspr2.h

+++ b/vpx_dsp/mips/inv_txfm_dspr2.h

@@ -49,10 +49,10 @@

    );                                                                          \

   out;                                                                    })

-void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,

+void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,

                                    int dest_stride);

-void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output);

-void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,

+void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output);

+void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,

                                      int dest_stride);

 void iadst4_dspr2(const int16_t *input, int16_t *output);

 void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows);

--- a/vpx_dsp/mips/inv_txfm_msa.h

+++ b/vpx_dsp/mips/inv_txfm_msa.h

@@ -401,10 +401,10 @@

   MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15);               \

-void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

+void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

                                       int32_t dst_stride);

-void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output);

-void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

+void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output);

+void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,

                                        int32_t dst_stride);

-void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output);

+void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output);

 #endif  // VPX_DSP_MIPS_INV_TXFM_MSA_H_

--- a/vpx_dsp/mips/itrans16_dspr2.c

+++ b/vpx_dsp/mips/itrans16_dspr2.c

@@ -887,7 +887,7 @@

-void vp9_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,

                                  int dest_stride) {

   DECLARE_ALIGNED(32, int16_t,  out[16 * 16]);

   uint32_t pos = 45;

@@ -906,7 +906,7 @@

   idct16_cols_add_blk_dspr2(out, dest, dest_stride);

-void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,

                                 int dest_stride) {

   DECLARE_ALIGNED(32, int16_t,  out[16 * 16]);

   int16_t *outptr = out;

@@ -955,7 +955,7 @@

   idct16_cols_add_blk_dspr2(out, dest, dest_stride);

-void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,

                                int dest_stride) {

   uint32_t pos = 45;

   int32_t out;

--- a/vpx_dsp/mips/itrans32_cols_dspr2.c

+++ b/vpx_dsp/mips/itrans32_cols_dspr2.c

@@ -13,7 +13,7 @@

 #include "vpx_dsp/txfm_common.h"

 #if HAVE_DSPR2

-void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,

+void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,

                                    int dest_stride) {

   int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;

   int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;

--- a/vpx_dsp/mips/itrans32_dspr2.c

+++ b/vpx_dsp/mips/itrans32_dspr2.c

@@ -865,7 +865,7 @@

-void vp9_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,

                                   int dest_stride) {

   DECLARE_ALIGNED(32, int16_t,  out[32 * 32]);

   int16_t *outptr = out;

@@ -882,10 +882,10 @@

   idct32_rows_dspr2(input, outptr, 32);

   // Columns

-  vp9_idct32_cols_add_blk_dspr2(out, dest, dest_stride);

+  vpx_idct32_cols_add_blk_dspr2(out, dest, dest_stride);

-void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,

                                 int stride) {

   DECLARE_ALIGNED(32, int16_t,  out[32 * 32]);

   int16_t *outptr = out;

@@ -944,10 +944,10 @@

   // Columns

-  vp9_idct32_cols_add_blk_dspr2(out, dest, stride);

+  vpx_idct32_cols_add_blk_dspr2(out, dest, stride);

-void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,

                                int stride) {

   int       r, out;

   int32_t   a1, absa1;

--- a/vpx_dsp/mips/itrans4_dspr2.c

+++ b/vpx_dsp/mips/itrans4_dspr2.c

@@ -14,7 +14,7 @@

 #include "vpx_dsp/txfm_common.h"

 #if HAVE_DSPR2

-void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) {

+void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output) {

   int16_t   step_0, step_1, step_2, step_3;

   int       Temp0, Temp1, Temp2, Temp3;

   const int const_2_power_13 = 8192;

@@ -99,7 +99,7 @@

-void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,

+void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,

                                      int dest_stride) {

   int16_t   step_0, step_1, step_2, step_3;

   int       Temp0, Temp1, Temp2, Temp3;

@@ -221,7 +221,7 @@

-void vp9_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,

                               int dest_stride) {

   DECLARE_ALIGNED(32, int16_t, out[4 * 4]);

   int16_t *outptr = out;

@@ -235,13 +235,13 @@

);

   // Rows

-  vp9_idct4_rows_dspr2(input, outptr);

+  vpx_idct4_rows_dspr2(input, outptr);

   // Columns

-  vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);

+  vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);

-void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,

                              int dest_stride) {

   int       a1, absa1;

   int       r;

--- a/vpx_dsp/mips/itrans8_dspr2.c

+++ b/vpx_dsp/mips/itrans8_dspr2.c

@@ -442,7 +442,7 @@

-void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,

                               int dest_stride) {

   DECLARE_ALIGNED(32, int16_t, out[8 * 8]);

   int16_t *outptr = out;

@@ -462,7 +462,7 @@

   idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);

-void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,

                               int dest_stride) {

   DECLARE_ALIGNED(32, int16_t, out[8 * 8]);

   int16_t *outptr = out;

@@ -507,7 +507,7 @@

   idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);

-void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,

+void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,

                              int dest_stride) {

   uint32_t pos = 45;

   int32_t out;

--- a/vpx_dsp/vpx_dsp.mk

+++ b/vpx_dsp/vpx_dsp.mk

@@ -186,6 +186,7 @@

 endif  # CONFIG_USE_X86INC

 ifeq ($(HAVE_NEON_ASM),yes)

+DSP_SRCS-yes  += arm/save_reg_neon$(ASM)

 DSP_SRCS-yes  += arm/idct4x4_1_add_neon$(ASM)

 DSP_SRCS-yes  += arm/idct4x4_add_neon$(ASM)

 DSP_SRCS-yes  += arm/idct8x8_1_add_neon$(ASM)

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -598,183 +598,183 @@

 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {

   # Note as optimized versions of these functions are added we need to add a check to ensure

   # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.

-  add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct4x4_1_add/;

+  add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct4x4_1_add/;

-  add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct4x4_16_add/;

+  add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct4x4_16_add/;

-  add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct8x8_1_add/;

+  add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct8x8_1_add/;

-  add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct8x8_64_add/;

+  add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct8x8_64_add/;

-  add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct8x8_12_add/;

+  add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct8x8_12_add/;

-  add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct16x16_1_add/;

+  add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct16x16_1_add/;

-  add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct16x16_256_add/;

+  add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct16x16_256_add/;

-  add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct16x16_10_add/;

+  add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct16x16_10_add/;

-  add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct32x32_1024_add/;

+  add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct32x32_1024_add/;

-  add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct32x32_34_add/;

+  add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct32x32_34_add/;

-  add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_idct32x32_1_add/;

+  add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_idct32x32_1_add/;

-  add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_iwht4x4_1_add/;

+  add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_iwht4x4_1_add/;

-  add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-  specialize qw/vp9_iwht4x4_16_add/;

+  add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+  specialize qw/vpx_iwht4x4_16_add/;

-  add_proto qw/void vp9_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-  specialize qw/vp9_highbd_idct4x4_1_add/;

+  add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+  specialize qw/vpx_highbd_idct4x4_1_add/;

-  add_proto qw/void vp9_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-  specialize qw/vp9_highbd_idct8x8_1_add/;

+  add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+  specialize qw/vpx_highbd_idct8x8_1_add/;

-  add_proto qw/void vp9_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-  specialize qw/vp9_highbd_idct16x16_1_add/;

+  add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+  specialize qw/vpx_highbd_idct16x16_1_add/;

-  add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-  specialize qw/vp9_highbd_idct32x32_1024_add/;

+  add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+  specialize qw/vpx_highbd_idct32x32_1024_add/;

-  add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-  specialize qw/vp9_highbd_idct32x32_34_add/;

+  add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+  specialize qw/vpx_highbd_idct32x32_34_add/;

-  add_proto qw/void vp9_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-  specialize qw/vp9_highbd_idct32x32_1_add/;

+  add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+  specialize qw/vpx_highbd_idct32x32_1_add/;

-  add_proto qw/void vp9_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-  specialize qw/vp9_highbd_iwht4x4_1_add/;

+  add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+  specialize qw/vpx_highbd_iwht4x4_1_add/;

-  add_proto qw/void vp9_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-  specialize qw/vp9_highbd_iwht4x4_16_add/;

+  add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+  specialize qw/vpx_highbd_iwht4x4_16_add/;

   # Force C versions if CONFIG_EMULATE_HARDWARE is 1

   if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {

-    add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct4x4_16_add/;

+    add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct4x4_16_add/;

-    add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct8x8_64_add/;

+    add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct8x8_64_add/;

-    add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct8x8_10_add/;

+    add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct8x8_10_add/;

-    add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct16x16_256_add/;

+    add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct16x16_256_add/;

-    add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct16x16_10_add/;

+    add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct16x16_10_add/;

   } else {

-    add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct4x4_16_add sse2/;

+    add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct4x4_16_add sse2/;

-    add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct8x8_64_add sse2/;

+    add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct8x8_64_add sse2/;

-    add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct8x8_10_add sse2/;

+    add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct8x8_10_add sse2/;

-    add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct16x16_256_add sse2/;

+    add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct16x16_256_add sse2/;

-    add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

-    specialize qw/vp9_highbd_idct16x16_10_add sse2/;

+    add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

+    specialize qw/vpx_highbd_idct16x16_10_add sse2/;

   }  # CONFIG_EMULATE_HARDWARE

 } else {

   # Force C versions if CONFIG_EMULATE_HARDWARE is 1

   if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {

-    add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct4x4_1_add/;

+    add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct4x4_1_add/;

-    add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct4x4_16_add/;

+    add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct4x4_16_add/;

-    add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct8x8_1_add/;

+    add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct8x8_1_add/;

-    add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct8x8_64_add/;

+    add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct8x8_64_add/;

-    add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct8x8_12_add/;

+    add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct8x8_12_add/;

-    add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct16x16_1_add/;

+    add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct16x16_1_add/;

-    add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct16x16_256_add/;

+    add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct16x16_256_add/;

-    add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct16x16_10_add/;

+    add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct16x16_10_add/;

-    add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct32x32_1024_add/;

+    add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct32x32_1024_add/;

-    add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct32x32_34_add/;

+    add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct32x32_34_add/;

-    add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct32x32_1_add/;

+    add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct32x32_1_add/;

-    add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_iwht4x4_1_add/;

+    add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_iwht4x4_1_add/;

-    add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_iwht4x4_16_add/;

+    add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_iwht4x4_16_add/;

   } else {

-    add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct4x4_1_add sse2 neon dspr2 msa/;

+    add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct4x4_16_add sse2 neon dspr2 msa/;

+    add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;

+    add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";

+    add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";

-    add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";

+    add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";

-    add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;

+    add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct16x16_256_add sse2 neon dspr2 msa/;

+    add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;

+    add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;

+    add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;

+    add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct32x32_34_add sse2 neon_asm dspr2 msa/;

     # Need to add 34 eob idct32x32 neon implementation.

-    $vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;

+    $vpx_idct32x32_34_add_neon_asm=vpx_idct32x32_1024_add_neon;

-    add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;

+    add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_iwht4x4_1_add msa/;

+    add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_iwht4x4_1_add msa/;

-    add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vp9_iwht4x4_16_add msa/, "$sse2_x86inc";

+    add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

+    specialize qw/vpx_iwht4x4_16_add msa/, "$sse2_x86inc";

   }  # CONFIG_EMULATE_HARDWARE

 }  # CONFIG_VP9_HIGHBITDEPTH

 }  # CONFIG_VP9

--- a/vpx_dsp/x86/inv_txfm_sse2.asm

+++ b/vpx_dsp/x86/inv_txfm_sse2.asm

@@ -7,6 +7,9 @@

 ;  in the file PATENTS.  All contributing project authors may

 ;  be found in the AUTHORS file in the root of the source tree.

+%define program_name vpx

 %include "third_party/x86inc/x86inc.asm"

 SECTION .text

--- a/vpx_dsp/x86/inv_txfm_sse2.c

+++ b/vpx_dsp/x86/inv_txfm_sse2.c

@@ -21,7 +21,7 @@

   *(int *)(dest) = _mm_cvtsi128_si32(d0); \

-void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

+void vpx_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

   const __m128i zero = _mm_setzero_si128();

   const __m128i eight = _mm_set1_epi16(8);

   const __m128i cst = _mm_setr_epi16(

@@ -151,7 +151,7 @@

-void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

+void vpx_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

   __m128i dc_value;

   const __m128i zero = _mm_setzero_si128();

   int a;

@@ -449,7 +449,7 @@

   out7 = _mm_subs_epi16(stp1_0, stp2_7); \

-void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

+void vpx_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

   const __m128i zero = _mm_setzero_si128();

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1 << 4);

@@ -480,7 +480,7 @@

   // 2-D

   for (i = 0; i < 2; i++) {

-    // 8x8 Transpose is copied from vp9_fdct8x8_sse2()

+    // 8x8 Transpose is copied from vpx_fdct8x8_sse2()

     TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7,

                   in0, in1, in2, in3, in4, in5, in6, in7);

@@ -518,7 +518,7 @@

   RECON_AND_STORE(dest + 7 * stride, in7);

-void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

+void vpx_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

   __m128i dc_value;

   const __m128i zero = _mm_setzero_si128();

   int a;

@@ -555,7 +555,7 @@

   __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;

   __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;

-  // 8x8 Transpose is copied from vp9_fdct8x8_sse2()

+  // 8x8 Transpose is copied from vpx_fdct8x8_sse2()

   TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7],

                 in0, in1, in2, in3, in4, in5, in6, in7);

@@ -792,7 +792,7 @@

   in[7] = _mm_sub_epi16(k__const_0, s1);

-void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

+void vpx_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

   const __m128i zero = _mm_setzero_si128();

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1 << 4);

@@ -1169,7 +1169,7 @@

                              stp2_10, stp2_13, stp2_11, stp2_12) \

-void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,

+void vpx_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,

                                 int stride) {

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1 << 5);

@@ -1294,7 +1294,7 @@

-void vp9_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

+void vpx_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

   __m128i dc_value;

   const __m128i zero = _mm_setzero_si128();

   int a, i;

@@ -2152,7 +2152,7 @@

   iadst16_8col(in1);

-void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,

+void vpx_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,

                                int stride) {

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1 << 5);

@@ -3029,7 +3029,7 @@

 // Only upper-left 8x8 has non-zero coeff

-void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,

+void vpx_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,

                                int stride) {

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1<<5);

@@ -3188,7 +3188,7 @@

-void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,

+void vpx_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,

                                  int stride) {

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1 << 5);

@@ -3464,7 +3464,7 @@

-void vp9_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

+void vpx_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {

   __m128i dc_value;

   const __m128i zero = _mm_setzero_si128();

   int a, i;

@@ -3498,7 +3498,7 @@

   return retval;

-void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,

                                     int stride, int bd) {

   tran_low_t out[4 * 4];

   tran_low_t *outptr = out;

@@ -3561,7 +3561,7 @@

   } else {

     // Run the un-optimised row transform

     for (i = 0; i < 4; ++i) {

-      vp9_highbd_idct4_c(input, outptr, bd);

+      vpx_highbd_idct4_c(input, outptr, bd);

       input += 4;

       outptr += 4;

@@ -3605,7 +3605,7 @@

     for (i = 0; i < 4; ++i) {

       for (j = 0; j < 4; ++j)

         temp_in[j] = out[j * 4 + i];

-      vp9_highbd_idct4_c(temp_in, temp_out, bd);

+      vpx_highbd_idct4_c(temp_in, temp_out, bd);

       for (j = 0; j < 4; ++j) {

         dest[j * stride + i] = highbd_clip_pixel_add(

             dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);

@@ -3614,7 +3614,7 @@

-void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,

                                     int stride, int bd) {

   tran_low_t out[8 * 8];

   tran_low_t *outptr = out;

@@ -3679,7 +3679,7 @@

   } else {

     // Run the un-optimised row transform

     for (i = 0; i < 8; ++i) {

-      vp9_highbd_idct8_c(input, outptr, bd);

+      vpx_highbd_idct8_c(input, outptr, bd);

       input += 8;

       outptr += 8;

@@ -3706,7 +3706,7 @@

     for (i = 0; i < 8; ++i) {

       for (j = 0; j < 8; ++j)

         temp_in[j] = out[j * 8 + i];

-      vp9_highbd_idct8_c(temp_in, temp_out, bd);

+      vpx_highbd_idct8_c(temp_in, temp_out, bd);

       for (j = 0; j < 8; ++j) {

         dest[j * stride + i] = highbd_clip_pixel_add(

             dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);

@@ -3715,7 +3715,7 @@

-void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,

                                     int stride, int bd) {

   tran_low_t out[8 * 8] = { 0 };

   tran_low_t *outptr = out;

@@ -3783,7 +3783,7 @@

   } else {

     // Run the un-optimised row transform

     for (i = 0; i < 4; ++i) {

-      vp9_highbd_idct8_c(input, outptr, bd);

+      vpx_highbd_idct8_c(input, outptr, bd);

       input += 8;

       outptr += 8;

@@ -3810,7 +3810,7 @@

     for (i = 0; i < 8; ++i) {

       for (j = 0; j < 8; ++j)

         temp_in[j] = out[j * 8 + i];

-      vp9_highbd_idct8_c(temp_in, temp_out, bd);

+      vpx_highbd_idct8_c(temp_in, temp_out, bd);

       for (j = 0; j < 8; ++j) {

         dest[j * stride + i] = highbd_clip_pixel_add(

             dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);

@@ -3819,7 +3819,7 @@

-void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8,

                                        int stride, int bd) {

   tran_low_t out[16 * 16];

   tran_low_t *outptr = out;

@@ -3892,7 +3892,7 @@

   } else {

     // Run the un-optimised row transform

     for (i = 0; i < 16; ++i) {

-      vp9_highbd_idct16_c(input, outptr, bd);

+      vpx_highbd_idct16_c(input, outptr, bd);

       input += 16;

       outptr += 16;

@@ -3924,7 +3924,7 @@

     for (i = 0; i < 16; ++i) {

       for (j = 0; j < 16; ++j)

         temp_in[j] = out[j * 16 + i];

-      vp9_highbd_idct16_c(temp_in, temp_out, bd);

+      vpx_highbd_idct16_c(temp_in, temp_out, bd);

       for (j = 0; j < 16; ++j) {

         dest[j * stride + i] = highbd_clip_pixel_add(

             dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);

@@ -3933,7 +3933,7 @@

-void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,

+void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,

                                       int stride, int bd) {

   tran_low_t out[16 * 16] = { 0 };

   tran_low_t *outptr = out;

@@ -4011,7 +4011,7 @@

   } else {

     // Run the un-optimised row transform

     for (i = 0; i < 4; ++i) {

-      vp9_highbd_idct16_c(input, outptr, bd);

+      vpx_highbd_idct16_c(input, outptr, bd);

       input += 16;

       outptr += 16;

@@ -4043,7 +4043,7 @@

     for (i = 0; i < 16; ++i) {

       for (j = 0; j < 16; ++j)

         temp_in[j] = out[j * 16 + i];

-      vp9_highbd_idct16_c(temp_in, temp_out, bd);

+      vpx_highbd_idct16_c(temp_in, temp_out, bd);

       for (j = 0; j < 16; ++j) {

         dest[j * stride + i] = highbd_clip_pixel_add(

             dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);

--- a/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm

+++ b/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm

@@ -7,6 +7,9 @@

 ;  in the file PATENTS.  All contributing project authors may

 ;  be found in the AUTHORS file in the root of the source tree.

+%define program_name vpx

 %include "third_party/x86inc/x86inc.asm"

 ; This file provides SSSE3 version of the inverse transformation. Part