shithub: libvpx

--- a/test/lpf_8_test.cc

+++ b/test/lpf_8_test.cc

@@ -60,49 +60,49 @@

 void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,

                               const uint8_t *limit, const uint8_t *thresh,

                               int count, int bd) {

-  vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);

+  vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);

 void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,

                            const uint8_t *limit, const uint8_t *thresh,

                            int count, int bd) {

-  vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);

+  vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);

 void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,

                                    const uint8_t *limit, const uint8_t *thresh,

                                    int count, int bd) {

-  vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);

+  vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);

 void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,

                                 const uint8_t *limit, const uint8_t *thresh,

                                 int count, int bd) {

-  vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);

+  vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);

 #else

 void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,

                               const uint8_t *limit, const uint8_t *thresh,

                               int count) {

-  vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);

 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,

                            const uint8_t *limit, const uint8_t *thresh,

                            int count) {

-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);

 void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,

                                    const uint8_t *limit, const uint8_t *thresh,

                                    int count) {

-  vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);

 void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,

                                 const uint8_t *limit, const uint8_t *thresh,

                                 int count) {

-  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #endif  // HAVE_SSE2

@@ -114,25 +114,25 @@

 void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,

                               const uint8_t *limit, const uint8_t *thresh,

                               int count) {

-  vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);

 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,

                            const uint8_t *limit, const uint8_t *thresh,

                            int count) {

-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);

 void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,

                                    const uint8_t *limit, const uint8_t *thresh,

                                    int count) {

-  vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);

 void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,

                                 const uint8_t *limit, const uint8_t *thresh,

                                 int count) {

-  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #endif  // HAVE_NEON_ASM

@@ -141,13 +141,13 @@

 void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,

                              const uint8_t *limit, const uint8_t *thresh,

                              int count) {

-  vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);

 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,

                            const uint8_t *limit, const uint8_t *thresh,

                            int count) {

-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);

 #endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)

@@ -534,46 +534,46 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, Loop8Test6Param,

     ::testing::Values(

-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,

-                   &vp9_highbd_lpf_horizontal_4_c, 8, 1),

-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,

-                   &vp9_highbd_lpf_vertical_4_c, 8, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,

-                   &vp9_highbd_lpf_horizontal_8_c, 8, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,

-                   &vp9_highbd_lpf_horizontal_16_c, 8, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,

-                   &vp9_highbd_lpf_horizontal_16_c, 8, 2),

-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,

-                   &vp9_highbd_lpf_vertical_8_c, 8, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,

+                   &vpx_highbd_lpf_horizontal_4_c, 8, 1),

+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,

+                   &vpx_highbd_lpf_vertical_4_c, 8, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,

+                   &vpx_highbd_lpf_horizontal_8_c, 8, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,

+                   &vpx_highbd_lpf_horizontal_16_c, 8, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,

+                   &vpx_highbd_lpf_horizontal_16_c, 8, 2),

+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,

+                   &vpx_highbd_lpf_vertical_8_c, 8, 1),

         make_tuple(&wrapper_vertical_16_sse2,

                    &wrapper_vertical_16_c, 8, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,

-                   &vp9_highbd_lpf_horizontal_4_c, 10, 1),

-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,

-                   &vp9_highbd_lpf_vertical_4_c, 10, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,

-                   &vp9_highbd_lpf_horizontal_8_c, 10, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,

-                   &vp9_highbd_lpf_horizontal_16_c, 10, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,

-                   &vp9_highbd_lpf_horizontal_16_c, 10, 2),

-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,

-                   &vp9_highbd_lpf_vertical_8_c, 10, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,

+                   &vpx_highbd_lpf_horizontal_4_c, 10, 1),

+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,

+                   &vpx_highbd_lpf_vertical_4_c, 10, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,

+                   &vpx_highbd_lpf_horizontal_8_c, 10, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,

+                   &vpx_highbd_lpf_horizontal_16_c, 10, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,

+                   &vpx_highbd_lpf_horizontal_16_c, 10, 2),

+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,

+                   &vpx_highbd_lpf_vertical_8_c, 10, 1),

         make_tuple(&wrapper_vertical_16_sse2,

                    &wrapper_vertical_16_c, 10, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,

-                   &vp9_highbd_lpf_horizontal_4_c, 12, 1),

-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,

-                   &vp9_highbd_lpf_vertical_4_c, 12, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,

-                   &vp9_highbd_lpf_horizontal_8_c, 12, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,

-                   &vp9_highbd_lpf_horizontal_16_c, 12, 1),

-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,

-                   &vp9_highbd_lpf_horizontal_16_c, 12, 2),

-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,

-                   &vp9_highbd_lpf_vertical_8_c, 12, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,

+                   &vpx_highbd_lpf_horizontal_4_c, 12, 1),

+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,

+                   &vpx_highbd_lpf_vertical_4_c, 12, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,

+                   &vpx_highbd_lpf_horizontal_8_c, 12, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,

+                   &vpx_highbd_lpf_horizontal_16_c, 12, 1),

+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,

+                   &vpx_highbd_lpf_horizontal_16_c, 12, 2),

+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,

+                   &vpx_highbd_lpf_vertical_8_c, 12, 1),

         make_tuple(&wrapper_vertical_16_sse2,

                    &wrapper_vertical_16_c, 12, 1),

         make_tuple(&wrapper_vertical_16_dual_sse2,

@@ -586,10 +586,10 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, Loop8Test6Param,

     ::testing::Values(

-        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1),

-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1),

-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2),

-        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),

+        make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),

         make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #endif

@@ -598,8 +598,8 @@

 INSTANTIATE_TEST_CASE_P(

     AVX2, Loop8Test6Param,

     ::testing::Values(

-        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1),

-        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8,

+        make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,

                    2)));

 #endif

@@ -608,42 +608,42 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, Loop8Test9Param,

     ::testing::Values(

-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,

-                   &vp9_highbd_lpf_horizontal_4_dual_c, 8),

-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,

-                   &vp9_highbd_lpf_horizontal_8_dual_c, 8),

-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,

-                   &vp9_highbd_lpf_vertical_4_dual_c, 8),

-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,

-                   &vp9_highbd_lpf_vertical_8_dual_c, 8),

-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,

-                   &vp9_highbd_lpf_horizontal_4_dual_c, 10),

-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,

-                   &vp9_highbd_lpf_horizontal_8_dual_c, 10),

-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,

-                   &vp9_highbd_lpf_vertical_4_dual_c, 10),

-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,

-                   &vp9_highbd_lpf_vertical_8_dual_c, 10),

-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,

-                   &vp9_highbd_lpf_horizontal_4_dual_c, 12),

-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,

-                   &vp9_highbd_lpf_horizontal_8_dual_c, 12),

-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,

-                   &vp9_highbd_lpf_vertical_4_dual_c, 12),

-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,

-                   &vp9_highbd_lpf_vertical_8_dual_c, 12)));

+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,

+                   &vpx_highbd_lpf_horizontal_4_dual_c, 8),

+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,

+                   &vpx_highbd_lpf_horizontal_8_dual_c, 8),

+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,

+                   &vpx_highbd_lpf_vertical_4_dual_c, 8),

+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,

+                   &vpx_highbd_lpf_vertical_8_dual_c, 8),

+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,

+                   &vpx_highbd_lpf_horizontal_4_dual_c, 10),

+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,

+                   &vpx_highbd_lpf_horizontal_8_dual_c, 10),

+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,

+                   &vpx_highbd_lpf_vertical_4_dual_c, 10),

+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,

+                   &vpx_highbd_lpf_vertical_8_dual_c, 10),

+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,

+                   &vpx_highbd_lpf_horizontal_4_dual_c, 12),

+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,

+                   &vpx_highbd_lpf_horizontal_8_dual_c, 12),

+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,

+                   &vpx_highbd_lpf_vertical_4_dual_c, 12),

+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,

+                   &vpx_highbd_lpf_vertical_8_dual_c, 12)));

 #else

 INSTANTIATE_TEST_CASE_P(

     SSE2, Loop8Test9Param,

     ::testing::Values(

-        make_tuple(&vp9_lpf_horizontal_4_dual_sse2,

-                   &vp9_lpf_horizontal_4_dual_c, 8),

-        make_tuple(&vp9_lpf_horizontal_8_dual_sse2,

-                   &vp9_lpf_horizontal_8_dual_c, 8),

-        make_tuple(&vp9_lpf_vertical_4_dual_sse2,

-                   &vp9_lpf_vertical_4_dual_c, 8),

-        make_tuple(&vp9_lpf_vertical_8_dual_sse2,

-                   &vp9_lpf_vertical_8_dual_c, 8)));

+        make_tuple(&vpx_lpf_horizontal_4_dual_sse2,

+                   &vpx_lpf_horizontal_4_dual_c, 8),

+        make_tuple(&vpx_lpf_horizontal_8_dual_sse2,

+                   &vpx_lpf_horizontal_8_dual_c, 8),

+        make_tuple(&vpx_lpf_vertical_4_dual_sse2,

+                   &vpx_lpf_vertical_4_dual_c, 8),

+        make_tuple(&vpx_lpf_vertical_8_dual_sse2,

+                   &vpx_lpf_vertical_8_dual_c, 8)));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #endif

@@ -657,36 +657,36 @@

 #if HAVE_NEON_ASM

 // Using #if inside the macro is unsupported on MSVS but the tests are not

 // currently built for MSVS with ARM and NEON.

-        make_tuple(&vp9_lpf_horizontal_16_neon,

-                   &vp9_lpf_horizontal_16_c, 8, 1),

-        make_tuple(&vp9_lpf_horizontal_16_neon,

-                   &vp9_lpf_horizontal_16_c, 8, 2),

+        make_tuple(&vpx_lpf_horizontal_16_neon,

+                   &vpx_lpf_horizontal_16_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_16_neon,

+                   &vpx_lpf_horizontal_16_c, 8, 2),

         make_tuple(&wrapper_vertical_16_neon,

                    &wrapper_vertical_16_c, 8, 1),

         make_tuple(&wrapper_vertical_16_dual_neon,

                    &wrapper_vertical_16_dual_c, 8, 1),

 #endif  // HAVE_NEON_ASM

-        make_tuple(&vp9_lpf_horizontal_8_neon,

-                   &vp9_lpf_horizontal_8_c, 8, 1),

-        make_tuple(&vp9_lpf_vertical_8_neon,

-                   &vp9_lpf_vertical_8_c, 8, 1),

-        make_tuple(&vp9_lpf_horizontal_4_neon,

-                   &vp9_lpf_horizontal_4_c, 8, 1),

-        make_tuple(&vp9_lpf_vertical_4_neon,

-                   &vp9_lpf_vertical_4_c, 8, 1)));

+        make_tuple(&vpx_lpf_horizontal_8_neon,

+                   &vpx_lpf_horizontal_8_c, 8, 1),

+        make_tuple(&vpx_lpf_vertical_8_neon,

+                   &vpx_lpf_vertical_8_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_4_neon,

+                   &vpx_lpf_horizontal_4_c, 8, 1),

+        make_tuple(&vpx_lpf_vertical_4_neon,

+                   &vpx_lpf_vertical_4_c, 8, 1)));

 INSTANTIATE_TEST_CASE_P(

     NEON, Loop8Test9Param,

     ::testing::Values(

 #if HAVE_NEON_ASM

-        make_tuple(&vp9_lpf_horizontal_8_dual_neon,

-                   &vp9_lpf_horizontal_8_dual_c, 8),

-        make_tuple(&vp9_lpf_vertical_8_dual_neon,

-                   &vp9_lpf_vertical_8_dual_c, 8),

+        make_tuple(&vpx_lpf_horizontal_8_dual_neon,

+                   &vpx_lpf_horizontal_8_dual_c, 8),

+        make_tuple(&vpx_lpf_vertical_8_dual_neon,

+                   &vpx_lpf_vertical_8_dual_c, 8),

 #endif  // HAVE_NEON_ASM

-        make_tuple(&vp9_lpf_horizontal_4_dual_neon,

-                   &vp9_lpf_horizontal_4_dual_c, 8),

-        make_tuple(&vp9_lpf_vertical_4_dual_neon,

-                   &vp9_lpf_vertical_4_dual_c, 8)));

+        make_tuple(&vpx_lpf_horizontal_4_dual_neon,

+                   &vpx_lpf_horizontal_4_dual_c, 8),

+        make_tuple(&vpx_lpf_vertical_4_dual_neon,

+                   &vpx_lpf_vertical_4_dual_c, 8)));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #endif  // HAVE_NEON

@@ -694,23 +694,23 @@

 INSTANTIATE_TEST_CASE_P(

     MSA, Loop8Test6Param,

     ::testing::Values(

-        make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),

-        make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),

-        make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),

-        make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),

+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),

+        make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),

         make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));

 INSTANTIATE_TEST_CASE_P(

     MSA, Loop8Test9Param,

     ::testing::Values(

-        make_tuple(&vp9_lpf_horizontal_4_dual_msa,

-                   &vp9_lpf_horizontal_4_dual_c, 8),

-        make_tuple(&vp9_lpf_horizontal_8_dual_msa,

-                   &vp9_lpf_horizontal_8_dual_c, 8),

-        make_tuple(&vp9_lpf_vertical_4_dual_msa,

-                   &vp9_lpf_vertical_4_dual_c, 8),

-        make_tuple(&vp9_lpf_vertical_8_dual_msa,

-                   &vp9_lpf_vertical_8_dual_c, 8)));

+        make_tuple(&vpx_lpf_horizontal_4_dual_msa,

+                   &vpx_lpf_horizontal_4_dual_c, 8),

+        make_tuple(&vpx_lpf_horizontal_8_dual_msa,

+                   &vpx_lpf_horizontal_8_dual_c, 8),

+        make_tuple(&vpx_lpf_vertical_4_dual_msa,

+                   &vpx_lpf_vertical_4_dual_c, 8),

+        make_tuple(&vpx_lpf_vertical_8_dual_msa,

+                   &vpx_lpf_vertical_8_dual_c, 8)));

 #endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)

 }  // namespace

--- a/test/test.mk

+++ b/test/test.mk

@@ -91,6 +91,7 @@

 ## shared library builds don't make these functions accessible.

##

 ifeq ($(CONFIG_SHARED),)

+LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += lpf_8_test.cc

 ## VP8

 ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)

@@ -142,7 +143,6 @@

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc

-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc

--- a/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c

@@ -44,7 +44,7 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_store(dst + dst_stride);

     for (x = 0; x < w; x += 4) {

       src_ptr = src + x;

@@ -148,8 +148,8 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_store(dst + dst_stride);

-    vp9_prefetch_store(dst + dst_stride + 32);

+    prefetch_store(dst + dst_stride);

+    prefetch_store(dst + dst_stride + 32);

     for (x = 0; x < 64; x += 4) {

       src_ptr = src + x;

@@ -245,7 +245,7 @@

       : [pos] "r" (pos)

);

-    vp9_prefetch_store(dst);

+    prefetch_store(dst);

     switch (w) {

       case 4:

@@ -257,7 +257,7 @@

                                      filter_y, w, h);

         break;

       case 64:

-        vp9_prefetch_store(dst + 32);

+        prefetch_store(dst + 32);

         convolve_bi_avg_vert_64_dspr2(src, src_stride,

                                       dst, dst_stride,

                                       filter_y, h);

--- a/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c

@@ -40,9 +40,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

+    prefetch_store(dst + dst_stride);

     __asm__ __volatile__ (

         "ulw              %[tp1],         0(%[src])                      \n\t"

@@ -135,9 +135,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

+    prefetch_store(dst + dst_stride);

     __asm__ __volatile__ (

         "ulw              %[tp1],         0(%[src])                      \n\t"

@@ -290,9 +290,9 @@

     dst = dst_ptr;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_store(dst_ptr + dst_stride);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_store(dst_ptr + dst_stride);

     for (c = 0; c < count; c++) {

       __asm__ __volatile__ (

@@ -539,11 +539,11 @@

     dst = dst_ptr;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_load(src_ptr + src_stride + 64);

-    vp9_prefetch_store(dst_ptr + dst_stride);

-    vp9_prefetch_store(dst_ptr + dst_stride + 32);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_load(src_ptr + src_stride + 64);

+    prefetch_store(dst_ptr + dst_stride);

+    prefetch_store(dst_ptr + dst_stride + 32);

     for (c = 0; c < 4; c++) {

       __asm__ __volatile__ (

@@ -781,9 +781,9 @@

);

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src);

-    vp9_prefetch_load(src + 32);

-    vp9_prefetch_store(dst);

+    prefetch_load(src);

+    prefetch_load(src + 32);

+    prefetch_store(dst);

     switch (w) {

       case 4:

@@ -807,8 +807,8 @@

                                       filter_x, h, 2);

         break;

       case 64:

-        vp9_prefetch_load(src + 64);

-        vp9_prefetch_store(dst + 32);

+        prefetch_load(src + 64);

+        prefetch_store(dst + 32);

         convolve_bi_avg_horiz_64_dspr2(src, src_stride,

                                       dst, dst_stride,

--- a/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c

@@ -41,8 +41,8 @@

   for (y = h; y--;) {

     dst_ptr = dst;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

     __asm__ __volatile__ (

         "ulw              %[tp1],         0(%[src])                      \n\t"

@@ -132,8 +132,8 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

     dst_ptr = dst;

     odd_dst = (dst_ptr + dst_stride);

@@ -272,8 +272,8 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

     src = src_ptr;

     dst = dst_ptr;

@@ -504,9 +504,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_load(src_ptr + src_stride + 64);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_load(src_ptr + src_stride + 64);

     src = src_ptr;

     dst = dst_ptr;

@@ -747,8 +747,8 @@

);

   /* prefetch data to cache memory */

-  vp9_prefetch_load(src);

-  vp9_prefetch_load(src + 32);

+  prefetch_load(src);

+  prefetch_load(src + 32);

   switch (w) {

     case 4:

@@ -769,7 +769,7 @@

                                             (w/16));

       break;

     case 64:

-      vp9_prefetch_load(src + 32);

+      prefetch_load(src + 32);

       convolve_bi_horiz_64_transposed_dspr2(src, src_stride,

                                             dst, dst_stride,

                                             filter, h);

--- a/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c

@@ -39,9 +39,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

+    prefetch_store(dst + dst_stride);

     __asm__ __volatile__ (

         "ulw              %[tp1],      0(%[src])                      \n\t"

@@ -122,9 +122,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

+    prefetch_store(dst + dst_stride);

     __asm__ __volatile__ (

         "ulw              %[tp1],      0(%[src])                      \n\t"

@@ -252,9 +252,9 @@

     dst = dst_ptr;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_store(dst_ptr + dst_stride);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_store(dst_ptr + dst_stride);

     for (c = 0; c < count; c++) {

       __asm__ __volatile__ (

@@ -459,11 +459,11 @@

     dst = dst_ptr;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_load(src_ptr + src_stride + 64);

-    vp9_prefetch_store(dst_ptr + dst_stride);

-    vp9_prefetch_store(dst_ptr + dst_stride + 32);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_load(src_ptr + src_stride + 64);

+    prefetch_store(dst_ptr + dst_stride);

+    prefetch_store(dst_ptr + dst_stride + 32);

     for (c = 0; c < 4; c++) {

       __asm__ __volatile__ (

@@ -651,7 +651,7 @@

   if (16 == x_step_q4) {

     uint32_t pos = 38;

-    vp9_prefetch_load((const uint8_t *)filter_x);

+    prefetch_load((const uint8_t *)filter_x);

     /* bit positon for extract from acc */

     __asm__ __volatile__ (

@@ -661,9 +661,9 @@

);

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src);

-    vp9_prefetch_load(src + 32);

-    vp9_prefetch_store(dst);

+    prefetch_load(src);

+    prefetch_load(src + 32);

+    prefetch_store(dst);

     switch (w) {

       case 4:

@@ -687,8 +687,8 @@

                                    filter_x, (int32_t)h, 2);

         break;

       case 64:

-        vp9_prefetch_load(src + 64);

-        vp9_prefetch_store(dst + 32);

+        prefetch_load(src + 64);

+        prefetch_store(dst + 32);

         convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride,

                                    dst, (int32_t)dst_stride,

--- a/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c

@@ -44,7 +44,7 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_store(dst + dst_stride);

     for (x = 0; x < w; x += 4) {

       src_ptr = src + x;

@@ -141,7 +141,7 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_store(dst + dst_stride);

     for (x = 0; x < 64; x += 4) {

       src_ptr = src + x;

@@ -230,7 +230,7 @@

       : [pos] "r" (pos)

);

-    vp9_prefetch_store(dst);

+    prefetch_store(dst);

     switch (w) {

       case 4 :

@@ -242,7 +242,7 @@

                                  filter_y, w, h);

         break;

       case 64 :

-        vp9_prefetch_store(dst + 32);

+        prefetch_store(dst + 32);

         convolve_bi_vert_64_dspr2(src, src_stride,

                                   dst, dst_stride,

                                   filter_y, h);

--- a/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c

@@ -49,7 +49,7 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_store(dst + dst_stride);

     for (x = 0; x < w; x += 4) {

       src_ptr = src + x;

@@ -210,8 +210,8 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_store(dst + dst_stride);

-    vp9_prefetch_store(dst + dst_stride + 32);

+    prefetch_store(dst + dst_stride);

+    prefetch_store(dst + dst_stride + 32);

     for (x = 0; x < 64; x += 4) {

       src_ptr = src + x;

@@ -372,7 +372,7 @@

         : [pos] "r" (pos)

);

-      vp9_prefetch_store(dst);

+      prefetch_store(dst);

       switch (w) {

         case 4:

@@ -384,7 +384,7 @@

                                     filter_y, w, h);

           break;

         case 64:

-          vp9_prefetch_store(dst + 32);

+          prefetch_store(dst + 32);

           convolve_avg_vert_64_dspr2(src, src_stride,

                                      dst, dst_stride,

                                      filter_y, h);

@@ -452,17 +452,17 @@

   uint32_t tp3, tp4, tn2;

   /* prefetch data to cache memory */

-  vp9_prefetch_load(src);

-  vp9_prefetch_load(src + 32);

-  vp9_prefetch_store(dst);

+  prefetch_load(src);

+  prefetch_load(src + 32);

+  prefetch_store(dst);

   switch (w) {

     case 4:

       /* 1 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_store(dst + dst_stride);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_store(dst + dst_stride);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

@@ -482,9 +482,9 @@

     case 8:

       /* 2 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_store(dst + dst_stride);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_store(dst + dst_stride);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

@@ -509,9 +509,9 @@

     case 16:

       /* 4 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_store(dst + dst_stride);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_store(dst + dst_stride);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

@@ -544,9 +544,9 @@

     case 32:

       /* 8 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_store(dst + dst_stride);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_store(dst + dst_stride);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

@@ -593,16 +593,16 @@

       break;

     case 64:

-      vp9_prefetch_load(src + 64);

-      vp9_prefetch_store(dst + 32);

+      prefetch_load(src + 64);

+      prefetch_store(dst + 32);

       /* 16 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_load(src + src_stride + 64);

-        vp9_prefetch_store(dst + dst_stride);

-        vp9_prefetch_store(dst + dst_stride + 32);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_load(src + src_stride + 64);

+        prefetch_store(dst + dst_stride);

+        prefetch_store(dst + dst_stride + 32);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

--- a/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c

@@ -43,9 +43,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

+    prefetch_store(dst + dst_stride);

     __asm__ __volatile__ (

         "ulw              %[tp1],         0(%[src])                      \n\t"

@@ -165,9 +165,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

+    prefetch_store(dst + dst_stride);

     __asm__ __volatile__ (

         "ulw              %[tp1],         0(%[src])                      \n\t"

@@ -357,9 +357,9 @@

     dst = dst_ptr;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_store(dst_ptr + dst_stride);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_store(dst_ptr + dst_stride);

     for (c = 0; c < count; c++) {

       __asm__ __volatile__ (

@@ -668,11 +668,11 @@

     dst = dst_ptr;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_load(src_ptr + src_stride + 64);

-    vp9_prefetch_store(dst_ptr + dst_stride);

-    vp9_prefetch_store(dst_ptr + dst_stride + 32);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_load(src_ptr + src_stride + 64);

+    prefetch_store(dst_ptr + dst_stride);

+    prefetch_store(dst_ptr + dst_stride + 32);

     for (c = 0; c < 4; c++) {

       __asm__ __volatile__ (

@@ -985,9 +985,9 @@

);

       /* prefetch data to cache memory */

-      vp9_prefetch_load(src);

-      vp9_prefetch_load(src + 32);

-      vp9_prefetch_store(dst);

+      prefetch_load(src);

+      prefetch_load(src + 32);

+      prefetch_store(dst);

       switch (w) {

         case 4:

@@ -1011,8 +1011,8 @@

                                       filter_x, h, 2);

           break;

         case 64:

-          vp9_prefetch_load(src + 64);

-          vp9_prefetch_store(dst + 32);

+          prefetch_load(src + 64);

+          prefetch_store(dst + 32);

           convolve_avg_horiz_64_dspr2(src, src_stride,

                                       dst, dst_stride,

--- a/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c

@@ -60,8 +60,8 @@

   for (y = h; y--;) {

     dst_ptr = dst;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

     __asm__ __volatile__ (

         "ulw              %[tp1],         0(%[src])                      \n\t"

@@ -176,8 +176,8 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

     dst_ptr = dst;

     odd_dst = (dst_ptr + dst_stride);

@@ -355,8 +355,8 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

     src = src_ptr;

     dst = dst_ptr;

@@ -645,9 +645,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_load(src_ptr + src_stride + 64);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_load(src_ptr + src_stride + 64);

     src = src_ptr;

     dst = dst_ptr;

@@ -993,8 +993,8 @@

     src -= (src_stride * 3 + 3);

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src);

-    vp9_prefetch_load(src + 32);

+    prefetch_load(src);

+    prefetch_load(src + 32);

     switch (w) {

       case 4:

@@ -1015,7 +1015,7 @@

                                            (w/16));

         break;

       case 64:

-        vp9_prefetch_load(src + 32);

+        prefetch_load(src + 32);

         convolve_horiz_64_transposed_dspr2(src, src_stride,

                                            temp, intermediate_height,

                                            filter_x, intermediate_height);

@@ -1078,9 +1078,9 @@

   int x, y;

   /* prefetch data to cache memory */

-  vp9_prefetch_load(src);

-  vp9_prefetch_load(src + 32);

-  vp9_prefetch_store(dst);

+  prefetch_load(src);

+  prefetch_load(src + 32);

+  prefetch_store(dst);

   switch (w) {

     case 4:

@@ -1089,9 +1089,9 @@

       /* 1 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_store(dst + dst_stride);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_store(dst + dst_stride);

         __asm__ __volatile__ (

             "ulw              %[tp1],         (%[src])      \n\t"

@@ -1112,9 +1112,9 @@

       /* 2 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_store(dst + dst_stride);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_store(dst + dst_stride);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

@@ -1137,9 +1137,9 @@

       /* 4 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_store(dst + dst_stride);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_store(dst + dst_stride);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

@@ -1169,9 +1169,9 @@

       /* 8 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_store(dst + dst_stride);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_store(dst + dst_stride);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

@@ -1209,16 +1209,16 @@

       uint32_t tp1, tp2, tp3, tp4;

       uint32_t tp5, tp6, tp7, tp8;

-      vp9_prefetch_load(src + 64);

-      vp9_prefetch_store(dst + 32);

+      prefetch_load(src + 64);

+      prefetch_store(dst + 32);

       /* 16 word storage */

       for (y = h; y--; ) {

-        vp9_prefetch_load(src + src_stride);

-        vp9_prefetch_load(src + src_stride + 32);

-        vp9_prefetch_load(src + src_stride + 64);

-        vp9_prefetch_store(dst + dst_stride);

-        vp9_prefetch_store(dst + dst_stride + 32);

+        prefetch_load(src + src_stride);

+        prefetch_load(src + src_stride + 32);

+        prefetch_load(src + src_stride + 64);

+        prefetch_store(dst + dst_stride);

+        prefetch_store(dst + dst_stride + 32);

         __asm__ __volatile__ (

             "ulw              %[tp1],         0(%[src])      \n\t"

--- a/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c

@@ -43,9 +43,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

+    prefetch_store(dst + dst_stride);

     __asm__ __volatile__ (

         "ulw              %[tp1],      0(%[src])                      \n\t"

@@ -154,9 +154,9 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src + src_stride);

-    vp9_prefetch_load(src + src_stride + 32);

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_load(src + src_stride);

+    prefetch_load(src + src_stride + 32);

+    prefetch_store(dst + dst_stride);

     __asm__ __volatile__ (

         "ulw              %[tp1],      0(%[src])                      \n\t"

@@ -323,9 +323,9 @@

     dst = dst_ptr;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_store(dst_ptr + dst_stride);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_store(dst_ptr + dst_stride);

     for (c = 0; c < count; c++) {

       __asm__ __volatile__ (

@@ -593,11 +593,11 @@

     dst = dst_ptr;

     /* prefetch data to cache memory */

-    vp9_prefetch_load(src_ptr + src_stride);

-    vp9_prefetch_load(src_ptr + src_stride + 32);

-    vp9_prefetch_load(src_ptr + src_stride + 64);

-    vp9_prefetch_store(dst_ptr + dst_stride);

-    vp9_prefetch_store(dst_ptr + dst_stride + 32);

+    prefetch_load(src_ptr + src_stride);

+    prefetch_load(src_ptr + src_stride + 32);

+    prefetch_load(src_ptr + src_stride + 64);

+    prefetch_store(dst_ptr + dst_stride);

+    prefetch_store(dst_ptr + dst_stride + 32);

     for (c = 0; c < 4; c++) {

       __asm__ __volatile__ (

@@ -859,7 +859,7 @@

     if (16 == x_step_q4) {

       uint32_t pos = 38;

-      vp9_prefetch_load((const uint8_t *)filter_x);

+      prefetch_load((const uint8_t *)filter_x);

       src -= 3;

       /* bit positon for extract from acc */

@@ -870,9 +870,9 @@

);

       /* prefetch data to cache memory */

-      vp9_prefetch_load(src);

-      vp9_prefetch_load(src + 32);

-      vp9_prefetch_store(dst);

+      prefetch_load(src);

+      prefetch_load(src + 32);

+      prefetch_store(dst);

       switch (w) {

         case 4:

@@ -896,8 +896,8 @@

                                   filter_x, (int32_t)h, 2);

           break;

         case 64:

-          vp9_prefetch_load(src + 64);

-          vp9_prefetch_store(dst + 32);

+          prefetch_load(src + 64);

+          prefetch_store(dst + 32);

           convolve_horiz_64_dspr2(src, (int32_t)src_stride,

                                   dst, (int32_t)dst_stride,

--- a/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c

@@ -49,7 +49,7 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_store(dst + dst_stride);

+    prefetch_store(dst + dst_stride);

     for (x = 0; x < w; x += 4) {

       src_ptr = src + x;

@@ -203,8 +203,8 @@

   for (y = h; y--;) {

     /* prefetch data to cache memory */

-    vp9_prefetch_store(dst + dst_stride);

-    vp9_prefetch_store(dst + dst_stride + 32);

+    prefetch_store(dst + dst_stride);

+    prefetch_store(dst + dst_stride + 32);

     for (x = 0; x < 64; x += 4) {

       src_ptr = src + x;

@@ -358,7 +358,7 @@

         : [pos] "r" (pos)

);

-      vp9_prefetch_store(dst);

+      prefetch_store(dst);

       switch (w) {

         case 4 :

@@ -370,7 +370,7 @@

                                 filter_y, w, h);

           break;

         case 64 :

-          vp9_prefetch_store(dst + 32);

+          prefetch_store(dst + 32);

           convolve_vert_64_dspr2(src, src_stride,

                                  dst, dst_stride,

                                  filter_y, h);

--- a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c

@@ -34,7 +34,7 @@

   for (i = no_rows; i--; ) {

     /* prefetch row */

-    vp9_prefetch_load((const uint8_t *)(input + 16));

+    prefetch_load((const uint8_t *)(input + 16));

     __asm__ __volatile__ (

         "lh       %[load1],              0(%[input])                    \n\t"

@@ -421,14 +421,14 @@

   uint8_t *cm = vp9_ff_cropTbl;

   /* prefetch vp9_ff_cropTbl */

-  vp9_prefetch_load(vp9_ff_cropTbl);

-  vp9_prefetch_load(vp9_ff_cropTbl +  32);

-  vp9_prefetch_load(vp9_ff_cropTbl +  64);

-  vp9_prefetch_load(vp9_ff_cropTbl +  96);

-  vp9_prefetch_load(vp9_ff_cropTbl + 128);

-  vp9_prefetch_load(vp9_ff_cropTbl + 160);

-  vp9_prefetch_load(vp9_ff_cropTbl + 192);

-  vp9_prefetch_load(vp9_ff_cropTbl + 224);

+  prefetch_load(vp9_ff_cropTbl);

+  prefetch_load(vp9_ff_cropTbl +  32);

+  prefetch_load(vp9_ff_cropTbl +  64);

+  prefetch_load(vp9_ff_cropTbl +  96);

+  prefetch_load(vp9_ff_cropTbl + 128);

+  prefetch_load(vp9_ff_cropTbl + 160);

+  prefetch_load(vp9_ff_cropTbl + 192);

+  prefetch_load(vp9_ff_cropTbl + 224);

   for (i = 0; i < 16; ++i) {

     dest_pix = (dest + i);

@@ -1124,7 +1124,7 @@

       for (i = 0; i < 16; ++i) {

         /* prefetch row */

-        vp9_prefetch_load((const uint8_t *)(input + 16));

+        prefetch_load((const uint8_t *)(input + 16));

         iadst16(input, outptr);

         input += 16;

@@ -1144,7 +1144,7 @@

       for (i = 0; i < 16; ++i) {

         /* prefetch row */

-        vp9_prefetch_load((const uint8_t *)(input + 16));

+        prefetch_load((const uint8_t *)(input + 16));

         iadst16(input, outptr);

         input += 16;

--- a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c

@@ -44,14 +44,14 @@

   uint8_t *cm = vp9_ff_cropTbl;

   /* prefetch vp9_ff_cropTbl */

-  vp9_prefetch_load(vp9_ff_cropTbl);

-  vp9_prefetch_load(vp9_ff_cropTbl +  32);

-  vp9_prefetch_load(vp9_ff_cropTbl +  64);

-  vp9_prefetch_load(vp9_ff_cropTbl +  96);

-  vp9_prefetch_load(vp9_ff_cropTbl + 128);

-  vp9_prefetch_load(vp9_ff_cropTbl + 160);

-  vp9_prefetch_load(vp9_ff_cropTbl + 192);

-  vp9_prefetch_load(vp9_ff_cropTbl + 224);

+  prefetch_load(vp9_ff_cropTbl);

+  prefetch_load(vp9_ff_cropTbl +  32);

+  prefetch_load(vp9_ff_cropTbl +  64);

+  prefetch_load(vp9_ff_cropTbl +  96);

+  prefetch_load(vp9_ff_cropTbl + 128);

+  prefetch_load(vp9_ff_cropTbl + 160);

+  prefetch_load(vp9_ff_cropTbl + 192);

+  prefetch_load(vp9_ff_cropTbl + 224);

   for (i = 0; i < 32; ++i) {

     dest_pix = dest + i;

--- a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c

@@ -96,8 +96,8 @@

     /* prefetch row */

-    vp9_prefetch_load((const uint8_t *)(input + 32));

-    vp9_prefetch_load((const uint8_t *)(input + 48));

+    prefetch_load((const uint8_t *)(input + 32));

+    prefetch_load((const uint8_t *)(input + 48));

     __asm__ __volatile__ (

         "lh       %[load1],             2(%[input])                     \n\t"

--- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c

@@ -115,14 +115,14 @@

   uint8_t   *cm = vp9_ff_cropTbl;

   /* prefetch vp9_ff_cropTbl */

-  vp9_prefetch_load(vp9_ff_cropTbl);

-  vp9_prefetch_load(vp9_ff_cropTbl +  32);

-  vp9_prefetch_load(vp9_ff_cropTbl +  64);

-  vp9_prefetch_load(vp9_ff_cropTbl +  96);

-  vp9_prefetch_load(vp9_ff_cropTbl + 128);

-  vp9_prefetch_load(vp9_ff_cropTbl + 160);

-  vp9_prefetch_load(vp9_ff_cropTbl + 192);

-  vp9_prefetch_load(vp9_ff_cropTbl + 224);

+  prefetch_load(vp9_ff_cropTbl);

+  prefetch_load(vp9_ff_cropTbl +  32);

+  prefetch_load(vp9_ff_cropTbl +  64);

+  prefetch_load(vp9_ff_cropTbl +  96);

+  prefetch_load(vp9_ff_cropTbl + 128);

+  prefetch_load(vp9_ff_cropTbl + 160);

+  prefetch_load(vp9_ff_cropTbl + 192);

+  prefetch_load(vp9_ff_cropTbl + 224);

   for (i = 0; i < 4; ++i) {

       dest_pix = (dest + i);

--- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c

+++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c

@@ -211,14 +211,14 @@

   uint8_t *cm = vp9_ff_cropTbl;

   /* prefetch vp9_ff_cropTbl */

-  vp9_prefetch_load(vp9_ff_cropTbl);

-  vp9_prefetch_load(vp9_ff_cropTbl +  32);

-  vp9_prefetch_load(vp9_ff_cropTbl +  64);

-  vp9_prefetch_load(vp9_ff_cropTbl +  96);

-  vp9_prefetch_load(vp9_ff_cropTbl + 128);

-  vp9_prefetch_load(vp9_ff_cropTbl + 160);

-  vp9_prefetch_load(vp9_ff_cropTbl + 192);

-  vp9_prefetch_load(vp9_ff_cropTbl + 224);

+  prefetch_load(vp9_ff_cropTbl);

+  prefetch_load(vp9_ff_cropTbl +  32);

+  prefetch_load(vp9_ff_cropTbl +  64);

+  prefetch_load(vp9_ff_cropTbl +  96);

+  prefetch_load(vp9_ff_cropTbl + 128);

+  prefetch_load(vp9_ff_cropTbl + 160);

+  prefetch_load(vp9_ff_cropTbl + 192);

+  prefetch_load(vp9_ff_cropTbl + 224);

   for (i = 0; i < 8; ++i) {

       dest_pix = (dest + i);

--- a/vp9/common/vp9_loopfilter.c

+++ b/vp9/common/vp9_loopfilter.c

@@ -327,13 +327,13 @@

     if (mask & 1) {

       if ((mask_16x16_0 | mask_16x16_1) & 1) {

         if ((mask_16x16_0 & mask_16x16_1) & 1) {

-          vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,

                                    lfi0->hev_thr);

         } else if (mask_16x16_0 & 1) {

-          vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,

                               lfi0->hev_thr);

         } else {

-          vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,

+          vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,

                               lfi1->lim, lfi1->hev_thr);

@@ -340,14 +340,14 @@

       if ((mask_8x8_0 | mask_8x8_1) & 1) {

         if ((mask_8x8_0 & mask_8x8_1) & 1) {

-          vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,

                                   lfi0->hev_thr, lfi1->mblim, lfi1->lim,

                                   lfi1->hev_thr);

         } else if (mask_8x8_0 & 1) {

-          vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,

+          vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,

1);

         } else {

-          vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,

+          vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,

                              lfi1->hev_thr, 1);

@@ -354,14 +354,14 @@

       if ((mask_4x4_0 | mask_4x4_1) & 1) {

         if ((mask_4x4_0 & mask_4x4_1) & 1) {

-          vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,

                                   lfi0->hev_thr, lfi1->mblim, lfi1->lim,

                                   lfi1->hev_thr);

         } else if (mask_4x4_0 & 1) {

-          vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,

+          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,

1);

         } else {

-          vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,

+          vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,

                              lfi1->hev_thr, 1);

@@ -368,14 +368,14 @@

       if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {

         if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {

-          vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,

+          vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,

                                   lfi0->hev_thr, lfi1->mblim, lfi1->lim,

                                   lfi1->hev_thr);

         } else if (mask_4x4_int_0 & 1) {

-          vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,

+          vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,

                              lfi0->hev_thr, 1);

         } else {

-          vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,

+          vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,

                              lfi1->hev_thr, 1);

@@ -427,13 +427,13 @@

     if (mask & 1) {

       if ((mask_16x16_0 | mask_16x16_1) & 1) {

         if ((mask_16x16_0 & mask_16x16_1) & 1) {

-          vp9_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,

                                           lfi0->hev_thr, bd);

         } else if (mask_16x16_0 & 1) {

-          vp9_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,

                                      lfi0->hev_thr, bd);

         } else {

-          vp9_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,

+          vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,

                                      lfi1->lim, lfi1->hev_thr, bd);

@@ -440,14 +440,14 @@

       if ((mask_8x8_0 | mask_8x8_1) & 1) {

         if ((mask_8x8_0 & mask_8x8_1) & 1) {

-          vp9_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,

                                          lfi0->hev_thr, lfi1->mblim, lfi1->lim,

                                          lfi1->hev_thr, bd);

         } else if (mask_8x8_0 & 1) {

-          vp9_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,

                                     lfi0->hev_thr, 1, bd);

         } else {

-          vp9_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,

+          vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,

                                     lfi1->lim, lfi1->hev_thr, 1, bd);

@@ -454,14 +454,14 @@

       if ((mask_4x4_0 | mask_4x4_1) & 1) {

         if ((mask_4x4_0 & mask_4x4_1) & 1) {

-          vp9_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,

                                          lfi0->hev_thr, lfi1->mblim, lfi1->lim,

                                          lfi1->hev_thr, bd);

         } else if (mask_4x4_0 & 1) {

-          vp9_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,

+          vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,

                                     lfi0->hev_thr, 1, bd);

         } else {

-          vp9_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,

+          vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,

                                     lfi1->lim, lfi1->hev_thr, 1, bd);

@@ -468,14 +468,14 @@

       if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {

         if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {

-          vp9_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,

+          vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,

                                          lfi0->hev_thr, lfi1->mblim, lfi1->lim,

                                          lfi1->hev_thr, bd);

         } else if (mask_4x4_int_0 & 1) {

-          vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,

+          vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,

                                     lfi0->hev_thr, 1, bd);

         } else {

-          vp9_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,

+          vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,

                                     lfi1->lim, lfi1->hev_thr, 1, bd);

@@ -513,11 +513,11 @@

     if (mask & 1) {

       if (mask_16x16 & 1) {

         if ((mask_16x16 & 3) == 3) {

-          vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,

+          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,

                                 lfi->hev_thr, 2);

           count = 2;

         } else {

-          vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,

+          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,

                                 lfi->hev_thr, 1);

       } else if (mask_8x8 & 1) {

@@ -525,28 +525,28 @@

           // Next block's thresholds.

           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

-          vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,

+          vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,

                                     lfi->hev_thr, lfin->mblim, lfin->lim,

                                     lfin->hev_thr);

           if ((mask_4x4_int & 3) == 3) {

-            vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,

+            vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,

                                       lfi->lim, lfi->hev_thr, lfin->mblim,

                                       lfin->lim, lfin->hev_thr);

           } else {

             if (mask_4x4_int & 1)

-              vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

+              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                                    lfi->hev_thr, 1);

             else if (mask_4x4_int & 2)

-              vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,

+              vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,

                                    lfin->lim, lfin->hev_thr, 1);

           count = 2;

         } else {

-          vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

+          vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

           if (mask_4x4_int & 1)

-            vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

+            vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                                  lfi->hev_thr, 1);

       } else if (mask_4x4 & 1) {

@@ -554,31 +554,31 @@

           // Next block's thresholds.

           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

-          vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,

+          vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,

                                     lfi->hev_thr, lfin->mblim, lfin->lim,

                                     lfin->hev_thr);

           if ((mask_4x4_int & 3) == 3) {

-            vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,

+            vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,

                                       lfi->lim, lfi->hev_thr, lfin->mblim,

                                       lfin->lim, lfin->hev_thr);

           } else {

             if (mask_4x4_int & 1)

-              vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

+              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                                    lfi->hev_thr, 1);

             else if (mask_4x4_int & 2)

-              vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,

+              vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,

                                    lfin->lim, lfin->hev_thr, 1);

           count = 2;

         } else {

-          vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

+          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

           if (mask_4x4_int & 1)

-            vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

+            vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                                  lfi->hev_thr, 1);

       } else if (mask_4x4_int & 1) {

-        vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

+        vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                              lfi->hev_thr, 1);

@@ -610,11 +610,11 @@

     if (mask & 1) {

       if (mask_16x16 & 1) {

         if ((mask_16x16 & 3) == 3) {

-          vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,

+          vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,

                                        lfi->hev_thr, 2, bd);

           count = 2;

         } else {

-          vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,

+          vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,

                                        lfi->hev_thr, 1, bd);

       } else if (mask_8x8 & 1) {

@@ -622,31 +622,31 @@

           // Next block's thresholds.

           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

-          vp9_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,

+          vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,

                                            lfi->hev_thr, lfin->mblim, lfin->lim,

                                            lfin->hev_thr, bd);

           if ((mask_4x4_int & 3) == 3) {

-            vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,

+            vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,

                                              lfi->lim, lfi->hev_thr,

                                              lfin->mblim, lfin->lim,

                                              lfin->hev_thr, bd);

           } else {

             if (mask_4x4_int & 1) {

-              vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,

+              vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,

                                           lfi->lim, lfi->hev_thr, 1, bd);

             } else if (mask_4x4_int & 2) {

-              vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,

+              vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,

                                           lfin->lim, lfin->hev_thr, 1, bd);

           count = 2;

         } else {

-          vp9_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,

+          vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,

                                       lfi->hev_thr, 1, bd);

           if (mask_4x4_int & 1) {

-            vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,

+            vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,

                                         lfi->lim, lfi->hev_thr, 1, bd);

@@ -655,35 +655,35 @@

           // Next block's thresholds.

           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

-          vp9_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,

+          vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,

                                            lfi->hev_thr, lfin->mblim, lfin->lim,

                                            lfin->hev_thr, bd);

           if ((mask_4x4_int & 3) == 3) {

-            vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,

+            vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,

                                              lfi->lim, lfi->hev_thr,

                                              lfin->mblim, lfin->lim,

                                              lfin->hev_thr, bd);

           } else {

             if (mask_4x4_int & 1) {

-              vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,

+              vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,

                                           lfi->lim, lfi->hev_thr, 1, bd);

             } else if (mask_4x4_int & 2) {

-              vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,

+              vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,

                                           lfin->lim, lfin->hev_thr, 1, bd);

           count = 2;

         } else {

-          vp9_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,

+          vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,

                                       lfi->hev_thr, 1, bd);

           if (mask_4x4_int & 1) {

-            vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,

+            vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,

                                         lfi->lim, lfi->hev_thr, 1, bd);

       } else if (mask_4x4_int & 1) {

-        vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

+        vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                                     lfi->hev_thr, 1, bd);

@@ -1094,15 +1094,15 @@

     if (mask & 1) {

       if (mask_16x16 & 1) {

-        vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);

+        vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);

       } else if (mask_8x8 & 1) {

-        vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

+        vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

       } else if (mask_4x4 & 1) {

-        vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

+        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

     if (mask_4x4_int & 1)

-      vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

+      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);

     s += 8;

     lfl += 1;

     mask_16x16 >>= 1;

@@ -1128,18 +1128,18 @@

     if (mask & 1) {

       if (mask_16x16 & 1) {

-        vp9_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,

+        vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,

                                    lfi->hev_thr, bd);

       } else if (mask_8x8 & 1) {

-        vp9_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,

+        vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,

                                   lfi->hev_thr, 1, bd);

       } else if (mask_4x4 & 1) {

-        vp9_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,

+        vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,

                                 lfi->hev_thr, 1, bd);

     if (mask_4x4_int & 1)

-      vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,

+      vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,

                                 lfi->hev_thr, 1, bd);

     s += 8;

     lfl += 1;

--- a/vpx_dsp/arm/loopfilter_16_neon.asm

+++ b/vpx_dsp/arm/loopfilter_16_neon.asm

@@ -8,12 +8,12 @@

 ;  be found in the AUTHORS file in the root of the source tree.

-    EXPORT  |vp9_lpf_horizontal_4_dual_neon|

+    EXPORT  |vpx_lpf_horizontal_4_dual_neon|

ARM

     AREA ||.text||, CODE, READONLY, ALIGN=2

-;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p,

+;void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p,

 ;                                    const uint8_t *blimit0,

 ;                                    const uint8_t *limit0,

 ;                                    const uint8_t *thresh0,

@@ -29,7 +29,7 @@

 ; sp+8  const uint8_t *limit1,

 ; sp+12 const uint8_t *thresh1,

-|vp9_lpf_horizontal_4_dual_neon| PROC

+|vpx_lpf_horizontal_4_dual_neon| PROC

     push        {lr}

     ldr         r12, [sp, #4]              ; load thresh0

@@ -66,7 +66,7 @@

     sub         r2, r2, r1, lsl #1

     sub         r3, r3, r1, lsl #1

-    bl          vp9_loop_filter_neon_16

+    bl          vpx_loop_filter_neon_16

     vst1.u8     {q5}, [r2@64], r1          ; store op1

     vst1.u8     {q6}, [r3@64], r1          ; store op0

@@ -76,9 +76,9 @@

     vpop        {d8-d15}                   ; restore neon registers

     pop         {pc}

-    ENDP        ; |vp9_lpf_horizontal_4_dual_neon|

+    ENDP        ; |vpx_lpf_horizontal_4_dual_neon|

-; void vp9_loop_filter_neon_16();

+; void vpx_loop_filter_neon_16();

 ; This is a helper function for the loopfilters. The invidual functions do the

 ; necessary load, transpose (if necessary) and store. This function uses

 ; registers d8-d15, so the calling function must save those registers.

@@ -101,7 +101,7 @@

 ; q6    op0

 ; q7    oq0

 ; q8    oq1

-|vp9_loop_filter_neon_16| PROC

+|vpx_loop_filter_neon_16| PROC

     ; filter_mask

     vabd.u8     q11, q3, q4                 ; m1 = abs(p3 - p2)

@@ -194,6 +194,6 @@

     veor        q8, q12, q10                ; *oq1 = u^0x80

     bx          lr

-    ENDP        ; |vp9_loop_filter_neon_16|

+    ENDP        ; |vpx_loop_filter_neon_16|

END

--- a/vpx_dsp/arm/loopfilter_16_neon.c

+++ b/vpx_dsp/arm/loopfilter_16_neon.c

@@ -14,7 +14,7 @@

 #include "./vpx_config.h"

 #include "vpx/vpx_integer.h"

-static INLINE void vp9_loop_filter_neon_16(

+static INLINE void loop_filter_neon_16(

         uint8x16_t qblimit,  // blimit

         uint8x16_t qlimit,   // limit

         uint8x16_t qthresh,  // thresh

@@ -124,7 +124,7 @@

     return;

-void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,

+void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,

                                     const uint8_t *blimit0,

                                     const uint8_t *limit0,

                                     const uint8_t *thresh0,

@@ -163,9 +163,9 @@

     s += p;

     q10u8 = vld1q_u8(s);

-    vp9_loop_filter_neon_16(qblimit, qlimit, qthresh,

-                            q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8,

-                            &q5u8, &q6u8, &q7u8, &q8u8);

+    loop_filter_neon_16(qblimit, qlimit, qthresh,

+                        q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8,

+                        &q5u8, &q6u8, &q7u8, &q8u8);

     s -= (p * 5);

     vst1q_u8(s, q5u8);

--- a/vpx_dsp/arm/loopfilter_4_neon.asm

+++ b/vpx_dsp/arm/loopfilter_4_neon.asm

@@ -8,18 +8,18 @@

 ;  be found in the AUTHORS file in the root of the source tree.

-    EXPORT  |vp9_lpf_horizontal_4_neon|

-    EXPORT  |vp9_lpf_vertical_4_neon|

+    EXPORT  |vpx_lpf_horizontal_4_neon|

+    EXPORT  |vpx_lpf_vertical_4_neon|

ARM

     AREA ||.text||, CODE, READONLY, ALIGN=2

-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter

+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter

 ; works on 16 iterations at a time.

 ; TODO(fgalligan): See about removing the count code as this function is only

 ; called with a count of 1.

-; void vp9_lpf_horizontal_4_neon(uint8_t *s,

+; void vpx_lpf_horizontal_4_neon(uint8_t *s,

 ;                                int p /* pitch */,

 ;                                const uint8_t *blimit,

 ;                                const uint8_t *limit,

@@ -32,7 +32,7 @@

 ; r3    const uint8_t *limit,

 ; sp    const uint8_t *thresh,

 ; sp+4  int count

-|vp9_lpf_horizontal_4_neon| PROC

+|vpx_lpf_horizontal_4_neon| PROC

     push        {lr}

     vld1.8      {d0[]}, [r2]               ; duplicate *blimit

@@ -41,7 +41,7 @@

     add         r1, r1, r1                 ; double pitch

     cmp         r12, #0

-    beq         end_vp9_lf_h_edge

+    beq         end_vpx_lf_h_edge

     vld1.8      {d1[]}, [r3]               ; duplicate *limit

     vld1.8      {d2[]}, [r2]               ; duplicate *thresh

@@ -62,7 +62,7 @@

     sub         r2, r2, r1, lsl #1

     sub         r3, r3, r1, lsl #1

-    bl          vp9_loop_filter_neon

+    bl          vpx_loop_filter_neon

     vst1.u8     {d4}, [r2@64], r1          ; store op1

     vst1.u8     {d5}, [r3@64], r1          ; store op0

@@ -73,16 +73,16 @@

     subs        r12, r12, #1

     bne         count_lf_h_loop

-end_vp9_lf_h_edge

+end_vpx_lf_h_edge

     pop         {pc}

-    ENDP        ; |vp9_lpf_horizontal_4_neon|

+    ENDP        ; |vpx_lpf_horizontal_4_neon|

-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter

+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter

 ; works on 16 iterations at a time.

 ; TODO(fgalligan): See about removing the count code as this function is only

 ; called with a count of 1.

-; void vp9_lpf_vertical_4_neon(uint8_t *s,

+; void vpx_lpf_vertical_4_neon(uint8_t *s,

 ;                              int p /* pitch */,

 ;                              const uint8_t *blimit,

 ;                              const uint8_t *limit,

@@ -95,7 +95,7 @@

 ; r3    const uint8_t *limit,

 ; sp    const uint8_t *thresh,

 ; sp+4  int count

-|vp9_lpf_vertical_4_neon| PROC

+|vpx_lpf_vertical_4_neon| PROC

     push        {lr}

     vld1.8      {d0[]}, [r2]              ; duplicate *blimit

@@ -105,7 +105,7 @@

     ldr         r3, [sp, #4]              ; load thresh

     sub         r2, r0, #4                ; move s pointer down by 4 columns

     cmp         r12, #0

-    beq         end_vp9_lf_v_edge

+    beq         end_vpx_lf_v_edge

     vld1.8      {d2[]}, [r3]              ; duplicate *thresh

@@ -135,7 +135,7 @@

     vtrn.8      d7, d16

     vtrn.8      d17, d18

-    bl          vp9_loop_filter_neon

+    bl          vpx_loop_filter_neon

     sub         r0, r0, #2

@@ -154,11 +154,11 @@

     subne       r2, r0, #4                 ; move s pointer down by 4 columns

     bne         count_lf_v_loop

-end_vp9_lf_v_edge

+end_vpx_lf_v_edge

     pop         {pc}

-    ENDP        ; |vp9_lpf_vertical_4_neon|

+    ENDP        ; |vpx_lpf_vertical_4_neon|

-; void vp9_loop_filter_neon();

+; void vpx_loop_filter_neon();

 ; This is a helper function for the loopfilters. The invidual functions do the

 ; necessary load, transpose (if necessary) and store. The function does not use

 ; registers d8-d15.

@@ -182,7 +182,7 @@

 ; d5    op0

 ; d6    oq0

 ; d7    oq1

-|vp9_loop_filter_neon| PROC

+|vpx_loop_filter_neon| PROC

     ; filter_mask

     vabd.u8     d19, d3, d4                 ; m1 = abs(p3 - p2)

     vabd.u8     d20, d4, d5                 ; m2 = abs(p2 - p1)

@@ -272,6 +272,6 @@

     veor        d7, d20, d18                ; *oq1 = u^0x80

     bx          lr

-    ENDP        ; |vp9_loop_filter_neon|

+    ENDP        ; |vpx_loop_filter_neon|

END

--- a/vpx_dsp/arm/loopfilter_4_neon.c

+++ b/vpx_dsp/arm/loopfilter_4_neon.c

@@ -12,7 +12,7 @@

 #include "./vpx_dsp_rtcd.h"

-static INLINE void vp9_loop_filter_neon(

+static INLINE void loop_filter_neon(

         uint8x8_t dblimit,    // flimit

         uint8x8_t dlimit,     // limit

         uint8x8_t dthresh,    // thresh

@@ -110,7 +110,7 @@

     return;

-void vp9_lpf_horizontal_4_neon(

+void vpx_lpf_horizontal_4_neon(

         uint8_t *src,

         int pitch,

         const uint8_t *blimit,

@@ -122,7 +122,7 @@

     uint8x8_t dblimit, dlimit, dthresh;

     uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;

-    if (count == 0)  // end_vp9_lf_h_edge

+    if (count == 0)  // end_vpx_lf_h_edge

         return;

     dblimit = vld1_u8(blimit);

@@ -149,9 +149,9 @@

         s += pitch;

         d18u8 = vld1_u8(s);

-        vp9_loop_filter_neon(dblimit, dlimit, dthresh,

-                             d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,

-                             &d4u8, &d5u8, &d6u8, &d7u8);

+        loop_filter_neon(dblimit, dlimit, dthresh,

+                         d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,

+                         &d4u8, &d5u8, &d6u8, &d7u8);

         s -= (pitch * 5);

         vst1_u8(s, d4u8);

@@ -165,7 +165,7 @@

     return;

-void vp9_lpf_vertical_4_neon(

+void vpx_lpf_vertical_4_neon(

         uint8_t *src,

         int pitch,

         const uint8_t *blimit,

@@ -181,7 +181,7 @@

     uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;

     uint8x8x4_t d4Result;

-    if (count == 0)  // end_vp9_lf_h_edge

+    if (count == 0)  // end_vpx_lf_h_edge

         return;

     dblimit = vld1_u8(blimit);

@@ -244,9 +244,9 @@

         d17u8 = d2tmp11.val[0];

         d18u8 = d2tmp11.val[1];

-        vp9_loop_filter_neon(dblimit, dlimit, dthresh,

-                             d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,

-                             &d4u8, &d5u8, &d6u8, &d7u8);

+        loop_filter_neon(dblimit, dlimit, dthresh,

+                         d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,

+                         &d4u8, &d5u8, &d6u8, &d7u8);

         d4Result.val[0] = d4u8;

         d4Result.val[1] = d5u8;

--- a/vpx_dsp/arm/loopfilter_8_neon.asm

+++ b/vpx_dsp/arm/loopfilter_8_neon.asm

@@ -8,18 +8,18 @@

 ;  be found in the AUTHORS file in the root of the source tree.

-    EXPORT  |vp9_lpf_horizontal_8_neon|

-    EXPORT  |vp9_lpf_vertical_8_neon|

+    EXPORT  |vpx_lpf_horizontal_8_neon|

+    EXPORT  |vpx_lpf_vertical_8_neon|

ARM

     AREA ||.text||, CODE, READONLY, ALIGN=2

-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter

+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter

 ; works on 16 iterations at a time.

 ; TODO(fgalligan): See about removing the count code as this function is only

 ; called with a count of 1.

-; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p,

+; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,

 ;                                const uint8_t *blimit,

 ;                                const uint8_t *limit,

 ;                                const uint8_t *thresh,

@@ -30,7 +30,7 @@

 ; r3    const uint8_t *limit,

 ; sp    const uint8_t *thresh,

 ; sp+4  int count

-|vp9_lpf_horizontal_8_neon| PROC

+|vpx_lpf_horizontal_8_neon| PROC

     push        {r4-r5, lr}

     vld1.8      {d0[]}, [r2]               ; duplicate *blimit

@@ -39,7 +39,7 @@

     add         r1, r1, r1                 ; double pitch

     cmp         r12, #0

-    beq         end_vp9_mblf_h_edge

+    beq         end_vpx_mblf_h_edge

     vld1.8      {d1[]}, [r3]               ; duplicate *limit

     vld1.8      {d2[]}, [r2]               ; duplicate *thresh

@@ -60,7 +60,7 @@

     sub         r3, r3, r1, lsl #1

     sub         r2, r2, r1, lsl #2

-    bl          vp9_mbloop_filter_neon

+    bl          vpx_mbloop_filter_neon

     vst1.u8     {d0}, [r2@64], r1          ; store op2

     vst1.u8     {d1}, [r3@64], r1          ; store op1

@@ -73,12 +73,12 @@

     subs        r12, r12, #1

     bne         count_mblf_h_loop

-end_vp9_mblf_h_edge

+end_vpx_mblf_h_edge

     pop         {r4-r5, pc}

-    ENDP        ; |vp9_lpf_horizontal_8_neon|

+    ENDP        ; |vpx_lpf_horizontal_8_neon|

-; void vp9_lpf_vertical_8_neon(uint8_t *s,

+; void vpx_lpf_vertical_8_neon(uint8_t *s,

 ;                              int pitch,

 ;                              const uint8_t *blimit,

 ;                              const uint8_t *limit,

@@ -91,7 +91,7 @@

 ; r3    const uint8_t *limit,

 ; sp    const uint8_t *thresh,

 ; sp+4  int count

-|vp9_lpf_vertical_8_neon| PROC

+|vpx_lpf_vertical_8_neon| PROC

     push        {r4-r5, lr}

     vld1.8      {d0[]}, [r2]              ; duplicate *blimit

@@ -101,7 +101,7 @@

     ldr         r3, [sp, #12]             ; load thresh

     sub         r2, r0, #4                ; move s pointer down by 4 columns

     cmp         r12, #0

-    beq         end_vp9_mblf_v_edge

+    beq         end_vpx_mblf_v_edge

     vld1.8      {d2[]}, [r3]              ; duplicate *thresh

@@ -134,7 +134,7 @@

     sub         r2, r0, #3

     add         r3, r0, #1

-    bl          vp9_mbloop_filter_neon

+    bl          vpx_mbloop_filter_neon

     ;store op2, op1, op0, oq0

     vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r2], r1

@@ -161,11 +161,11 @@

     subne       r2, r0, #4                 ; move s pointer down by 4 columns

     bne         count_mblf_v_loop

-end_vp9_mblf_v_edge

+end_vpx_mblf_v_edge

     pop         {r4-r5, pc}

-    ENDP        ; |vp9_lpf_vertical_8_neon|

+    ENDP        ; |vpx_lpf_vertical_8_neon|

-; void vp9_mbloop_filter_neon();

+; void vpx_mbloop_filter_neon();

 ; This is a helper function for the loopfilters. The invidual functions do the

 ; necessary load, transpose (if necessary) and store. The function does not use

 ; registers d8-d15.

@@ -191,7 +191,7 @@

 ; d3    oq0

 ; d4    oq1

 ; d5    oq2

-|vp9_mbloop_filter_neon| PROC

+|vpx_mbloop_filter_neon| PROC

     ; filter_mask

     vabd.u8     d19, d3, d4                ; m1 = abs(p3 - p2)

     vabd.u8     d20, d4, d5                ; m2 = abs(p2 - p1)

@@ -446,6 +446,6 @@

     bx          lr

-    ENDP        ; |vp9_mbloop_filter_neon|

+    ENDP        ; |vpx_mbloop_filter_neon|

END

--- a/vpx_dsp/arm/loopfilter_8_neon.c

+++ b/vpx_dsp/arm/loopfilter_8_neon.c

@@ -12,7 +12,7 @@

 #include "./vpx_dsp_rtcd.h"

-static INLINE void vp9_mbloop_filter_neon(

+static INLINE void mbloop_filter_neon(

         uint8x8_t dblimit,   // mblimit

         uint8x8_t dlimit,    // limit

         uint8x8_t dthresh,   // thresh

@@ -263,7 +263,7 @@

     return;

-void vp9_lpf_horizontal_8_neon(

+void vpx_lpf_horizontal_8_neon(

         uint8_t *src,

         int pitch,

         const uint8_t *blimit,

@@ -276,7 +276,7 @@

     uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;

     uint8x8_t d16u8, d17u8, d18u8;

-    if (count == 0)  // end_vp9_mblf_h_edge

+    if (count == 0)  // end_vpx_mblf_h_edge

         return;

     dblimit = vld1_u8(blimit);

@@ -303,9 +303,9 @@

         s += pitch;

         d18u8 = vld1_u8(s);

-        vp9_mbloop_filter_neon(dblimit, dlimit, dthresh,

-                             d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,

-                             &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);

+        mbloop_filter_neon(dblimit, dlimit, dthresh,

+                           d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,

+                           &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);

         s -= (pitch * 6);

         vst1_u8(s, d0u8);

@@ -323,7 +323,7 @@

     return;

-void vp9_lpf_vertical_8_neon(

+void vpx_lpf_vertical_8_neon(

         uint8_t *src,

         int pitch,

         const uint8_t *blimit,

@@ -403,9 +403,9 @@

         d17u8 = d2tmp11.val[0];

         d18u8 = d2tmp11.val[1];

-        vp9_mbloop_filter_neon(dblimit, dlimit, dthresh,

-                             d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,

-                             &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);

+        mbloop_filter_neon(dblimit, dlimit, dthresh,

+                           d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,

+                           &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);

         d4Result.val[0] = d0u8;

         d4Result.val[1] = d1u8;

--- a/vpx_dsp/arm/loopfilter_mb_neon.asm

+++ b/vpx_dsp/arm/loopfilter_mb_neon.asm

@@ -8,13 +8,13 @@

 ;  be found in the AUTHORS file in the root of the source tree.

-    EXPORT  |vp9_lpf_horizontal_16_neon|

-    EXPORT  |vp9_lpf_vertical_16_neon|

+    EXPORT  |vpx_lpf_horizontal_16_neon|

+    EXPORT  |vpx_lpf_vertical_16_neon|

ARM

     AREA ||.text||, CODE, READONLY, ALIGN=2

-; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p,

+; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,

 ;                                 const uint8_t *blimit,

 ;                                 const uint8_t *limit,

 ;                                 const uint8_t *thresh

@@ -24,7 +24,7 @@

 ; r2    const uint8_t *blimit,

 ; r3    const uint8_t *limit,

 ; sp    const uint8_t *thresh,

-|vp9_lpf_horizontal_16_neon| PROC

+|vpx_lpf_horizontal_16_neon| PROC

     push        {r4-r8, lr}

     vpush       {d8-d15}

     ldr         r4, [sp, #88]              ; load thresh

@@ -54,7 +54,7 @@

     vld1.u8     {d14}, [r8@64], r1         ; q6

     vld1.u8     {d15}, [r8@64], r1         ; q7

-    bl          vp9_wide_mbfilter_neon

+    bl          vpx_wide_mbfilter_neon

     tst         r7, #1

     beq         h_mbfilter

@@ -115,9 +115,9 @@

     vpop        {d8-d15}

     pop         {r4-r8, pc}

-    ENDP        ; |vp9_lpf_horizontal_16_neon|

+    ENDP        ; |vpx_lpf_horizontal_16_neon|

-; void vp9_lpf_vertical_16_neon(uint8_t *s, int p,

+; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,

 ;                               const uint8_t *blimit,

 ;                               const uint8_t *limit,

 ;                               const uint8_t *thresh)

@@ -126,7 +126,7 @@

 ; r2    const uint8_t *blimit,

 ; r3    const uint8_t *limit,

 ; sp    const uint8_t *thresh,

-|vp9_lpf_vertical_16_neon| PROC

+|vpx_lpf_vertical_16_neon| PROC

     push        {r4-r8, lr}

     vpush       {d8-d15}

     ldr         r4, [sp, #88]              ; load thresh

@@ -176,7 +176,7 @@

     vtrn.8      d12, d13

     vtrn.8      d14, d15

-    bl          vp9_wide_mbfilter_neon

+    bl          vpx_wide_mbfilter_neon

     tst         r7, #1

     beq         v_mbfilter

@@ -279,9 +279,9 @@

     vpop        {d8-d15}

     pop         {r4-r8, pc}

-    ENDP        ; |vp9_lpf_vertical_16_neon|

+    ENDP        ; |vpx_lpf_vertical_16_neon|

-; void vp9_wide_mbfilter_neon();

+; void vpx_wide_mbfilter_neon();

 ; This is a helper function for the loopfilters. The invidual functions do the

 ; necessary load, transpose (if necessary) and store.

@@ -305,7 +305,7 @@

 ; d13   q5

 ; d14   q6

 ; d15   q7

-|vp9_wide_mbfilter_neon| PROC

+|vpx_wide_mbfilter_neon| PROC

     mov         r7, #0

     ; filter_mask

@@ -601,6 +601,6 @@

     vbif        d3, d14, d17               ; oq6 |= q6 & ~(f2 & f & m)

     bx          lr

-    ENDP        ; |vp9_wide_mbfilter_neon|

+    ENDP        ; |vpx_wide_mbfilter_neon|

END

--- a/vpx_dsp/arm/loopfilter_neon.c

+++ b/vpx_dsp/arm/loopfilter_neon.c

@@ -14,7 +14,7 @@

 #include "./vpx_config.h"

 #include "vpx/vpx_integer.h"

-void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p,

+void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,

                                   const uint8_t *blimit0,

                                   const uint8_t *limit0,

                                   const uint8_t *thresh0,

@@ -21,12 +21,12 @@

                                   const uint8_t *blimit1,

                                   const uint8_t *limit1,

                                   const uint8_t *thresh1) {

-  vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);

+  vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);

 #if HAVE_NEON_ASM

-void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,

+void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,

                                     const uint8_t *blimit0,

                                     const uint8_t *limit0,

                                     const uint8_t *thresh0,

@@ -33,11 +33,11 @@

                                     const uint8_t *blimit1,

                                     const uint8_t *limit1,

                                     const uint8_t *thresh1) {

-  vp9_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);

+  vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);

-void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p,

+void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,

                                   const uint8_t *blimit0,

                                   const uint8_t *limit0,

                                   const uint8_t *thresh0,

@@ -44,15 +44,15 @@

                                   const uint8_t *blimit1,

                                   const uint8_t *limit1,

                                   const uint8_t *thresh1) {

-  vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);

+  vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);

-void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,

+void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p,

                                    const uint8_t *blimit,

                                    const uint8_t *limit,

                                    const uint8_t *thresh) {

-  vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);

-  vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);

 #endif  // HAVE_NEON_ASM

--- a/vpx_dsp/loopfilter.c

+++ b/vpx_dsp/loopfilter.c

@@ -115,7 +115,7 @@

   *op1 = signed_char_clamp(ps1 + filter) ^ 0x80;

-void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,

+void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,

                             const uint8_t *blimit, const uint8_t *limit,

                             const uint8_t *thresh, int count) {

   int i;

@@ -132,15 +132,15 @@

-void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,

+void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,

                                  const uint8_t *limit0, const uint8_t *thresh0,

                                  const uint8_t *blimit1, const uint8_t *limit1,

                                  const uint8_t *thresh1) {

-  vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);

+  vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);

-void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,

+void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,

                           const uint8_t *limit, const uint8_t *thresh,

                           int count) {

   int i;

@@ -157,12 +157,12 @@

-void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,

+void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,

                                const uint8_t *limit0, const uint8_t *thresh0,

                                const uint8_t *blimit1, const uint8_t *limit1,

                                const uint8_t *thresh1) {

-  vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);

-  vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,

+  vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);

+  vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,

                                   thresh1, 1);

@@ -187,7 +187,7 @@

-void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,

+void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,

                             const uint8_t *limit, const uint8_t *thresh,

                             int count) {

   int i;

@@ -207,15 +207,15 @@

-void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,

+void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,

                                  const uint8_t *limit0, const uint8_t *thresh0,

                                  const uint8_t *blimit1, const uint8_t *limit1,

                                  const uint8_t *thresh1) {

-  vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);

+  vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);

-void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,

+void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,

                           const uint8_t *limit, const uint8_t *thresh,

                           int count) {

   int i;

@@ -232,12 +232,12 @@

-void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,

+void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,

                                const uint8_t *limit0, const uint8_t *thresh0,

                                const uint8_t *blimit1, const uint8_t *limit1,

                                const uint8_t *thresh1) {

-  vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);

-  vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,

+  vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);

+  vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,

                                     thresh1, 1);

@@ -292,7 +292,7 @@

-void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,

+void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,

                              const uint8_t *limit, const uint8_t *thresh,

                              int count) {

   int i;

@@ -341,12 +341,12 @@

-void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,

+void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,

                            const uint8_t *limit, const uint8_t *thresh) {

   mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);

-void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,

+void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,

                                 const uint8_t *limit, const uint8_t *thresh) {

   mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);

@@ -446,7 +446,7 @@

   *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift);

-void vp9_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,

+void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,

                                    const uint8_t *blimit, const uint8_t *limit,

                                    const uint8_t *thresh, int count, int bd) {

   int i;

@@ -469,7 +469,7 @@

-void vp9_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,

+void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,

                                         const uint8_t *blimit0,

                                         const uint8_t *limit0,

                                         const uint8_t *thresh0,

@@ -477,11 +477,11 @@

                                         const uint8_t *limit1,

                                         const uint8_t *thresh1,

                                         int bd) {

-  vp9_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);

-  vp9_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);

+  vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);

+  vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);

-void vp9_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,

+void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,

                                  const uint8_t *limit, const uint8_t *thresh,

                                  int count, int bd) {

   int i;

@@ -498,7 +498,7 @@

-void vp9_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,

+void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,

                                       const uint8_t *blimit0,

                                       const uint8_t *limit0,

                                       const uint8_t *thresh0,

@@ -506,8 +506,8 @@

                                       const uint8_t *limit1,

                                       const uint8_t *thresh1,

                                       int bd) {

-  vp9_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);

-  vp9_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,

+  vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);

+  vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,

                               thresh1, 1, bd);

@@ -532,7 +532,7 @@

-void vp9_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,

+void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,

                                    const uint8_t *limit, const uint8_t *thresh,

                                    int count, int bd) {

   int i;

@@ -554,7 +554,7 @@

-void vp9_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,

+void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,

                                         const uint8_t *blimit0,

                                         const uint8_t *limit0,

                                         const uint8_t *thresh0,

@@ -562,11 +562,11 @@

                                         const uint8_t *limit1,

                                         const uint8_t *thresh1,

                                         int bd) {

-  vp9_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);

-  vp9_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);

+  vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);

+  vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);

-void vp9_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,

+void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,

                                  const uint8_t *limit, const uint8_t *thresh,

                                  int count, int bd) {

   int i;

@@ -586,7 +586,7 @@

-void vp9_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,

+void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,

                                       const uint8_t *blimit0,

                                       const uint8_t *limit0,

                                       const uint8_t *thresh0,

@@ -594,8 +594,8 @@

                                       const uint8_t *limit1,

                                       const uint8_t *thresh1,

                                       int bd) {

-  vp9_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);

-  vp9_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,

+  vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);

+  vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,

                               thresh1, 1, bd);

@@ -662,7 +662,7 @@

-void vp9_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,

+void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,

                                     const uint8_t *limit, const uint8_t *thresh,

                                     int count, int bd) {

   int i;

@@ -727,13 +727,13 @@

-void vp9_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,

+void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,

                                   const uint8_t *limit, const uint8_t *thresh,

                                   int bd) {

   highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);

-void vp9_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,

+void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,

                                        const uint8_t *blimit,

                                        const uint8_t *limit,

                                        const uint8_t *thresh,

--- a/vpx_dsp/mips/common_dspr2.h

+++ b/vpx_dsp/mips/common_dspr2.h

@@ -21,7 +21,7 @@

 #if HAVE_DSPR2

 #define CROP_WIDTH 512

-static INLINE void vp9_prefetch_load(const unsigned char *src) {

+static INLINE void prefetch_load(const unsigned char *src) {

   __asm__ __volatile__ (

       "pref   0,  0(%[src])   \n\t"

@@ -30,7 +30,7 @@

 /* prefetch data for store */

-static INLINE void vp9_prefetch_store(unsigned char *dst) {

+static INLINE void prefetch_store(unsigned char *dst) {

   __asm__ __volatile__ (

       "pref   1,  0(%[dst])   \n\t"

@@ -38,7 +38,7 @@

);

-static INLINE void vp9_prefetch_load_streamed(const unsigned char *src) {

+static INLINE void prefetch_load_streamed(const unsigned char *src) {

   __asm__ __volatile__ (

       "pref   4,  0(%[src])   \n\t"

@@ -47,7 +47,7 @@

 /* prefetch data for store */

-static INLINE void vp9_prefetch_store_streamed(unsigned char *dst) {

+static INLINE void prefetch_store_streamed(unsigned char *dst) {

   __asm__ __volatile__ (

       "pref   5,  0(%[dst])   \n\t"

--- a/vpx_dsp/mips/loopfilter_16_msa.c

+++ b/vpx_dsp/mips/loopfilter_16_msa.c

@@ -11,7 +11,7 @@

 #include "vpx_ports/mem.h"

 #include "vpx_dsp/mips/loopfilter_msa.h"

-int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,

+int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,

                                  uint8_t *filter48,

                                  const uint8_t *b_limit_ptr,

                                  const uint8_t *limit_ptr,

@@ -79,7 +79,7 @@

-void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {

+void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {

   v16u8 flat, flat2, filter8;

   v16i8 zero = { 0 };

   v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;

@@ -405,7 +405,7 @@

-void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,

                                     const uint8_t *b_limit_ptr,

                                     const uint8_t *limit_ptr,

                                     const uint8_t *thresh_ptr,

@@ -415,15 +415,15 @@

   (void)count;

-  early_exit = vp9_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,

+  early_exit = vpx_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,

                                         limit_ptr, thresh_ptr);

   if (0 == early_exit) {

-    vp9_hz_lpf_t16_16w(src, pitch, filter48);

+    vpx_hz_lpf_t16_16w(src, pitch, filter48);

-void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,

                                const uint8_t *b_limit_ptr,

                                const uint8_t *limit_ptr,

                                const uint8_t *thresh_ptr,

@@ -643,7 +643,7 @@

   } else {

-    vp9_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,

+    vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,

                                    thresh_ptr, count);

@@ -744,7 +744,7 @@

   ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);

-int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,

+int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,

                                 uint8_t *src_org, int32_t pitch_org,

                                 const uint8_t *b_limit_ptr,

                                 const uint8_t *limit_ptr,

@@ -812,7 +812,7 @@

-int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,

+int32_t vpx_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,

                           uint8_t *filter48) {

   v16i8 zero = { 0 };

   v16u8 filter8, flat, flat2;

@@ -1032,7 +1032,7 @@

-void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,

                              const uint8_t *b_limit_ptr,

                              const uint8_t *limit_ptr,

                              const uint8_t *thresh_ptr) {

@@ -1042,12 +1042,12 @@

   transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);

-  early_exit = vp9_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),

+  early_exit = vpx_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),

                                        &filter48[0], src, pitch, b_limit_ptr,

                                        limit_ptr, thresh_ptr);

   if (0 == early_exit) {

-    early_exit = vp9_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,

+    early_exit = vpx_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,

                                    &filter48[0]);

     if (0 == early_exit) {

@@ -1056,7 +1056,7 @@

-int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,

+int32_t vpx_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,

                                  uint8_t *src_org, int32_t pitch,

                                  const uint8_t *b_limit_ptr,

                                  const uint8_t *limit_ptr,

@@ -1134,7 +1134,7 @@

-int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,

+int32_t vpx_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,

                            uint8_t *filter48) {

   v16u8 flat, flat2, filter8;

   v16i8 zero = { 0 };

@@ -1455,7 +1455,7 @@

-void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,

                                   const uint8_t *b_limit_ptr,

                                   const uint8_t *limit_ptr,

                                   const uint8_t *thresh_ptr) {

@@ -1465,12 +1465,12 @@

   transpose_16x16((src - 8), pitch, &transposed_input[0], 16);

-  early_exit = vp9_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),

+  early_exit = vpx_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),

                                         &filter48[0], src, pitch, b_limit_ptr,

                                         limit_ptr, thresh_ptr);

   if (0 == early_exit) {

-    early_exit = vp9_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,

+    early_exit = vpx_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,

                                     &filter48[0]);

     if (0 == early_exit) {

--- a/vpx_dsp/mips/loopfilter_4_msa.c

+++ b/vpx_dsp/mips/loopfilter_4_msa.c

@@ -10,7 +10,7 @@

 #include "vpx_dsp/mips/loopfilter_msa.h"

-void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,

                               const uint8_t *b_limit_ptr,

                               const uint8_t *limit_ptr,

                               const uint8_t *thresh_ptr,

@@ -39,7 +39,7 @@

   SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);

-void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,

                                    const uint8_t *b_limit0_ptr,

                                    const uint8_t *limit0_ptr,

                                    const uint8_t *thresh0_ptr,

@@ -71,7 +71,7 @@

   ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);

-void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,

                             const uint8_t *b_limit_ptr,

                             const uint8_t *limit_ptr,

                             const uint8_t *thresh_ptr,

@@ -102,7 +102,7 @@

   ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);

-void vp9_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,

                                  const uint8_t *b_limit0_ptr,

                                  const uint8_t *limit0_ptr,

                                  const uint8_t *thresh0_ptr,

--- a/vpx_dsp/mips/loopfilter_8_msa.c

+++ b/vpx_dsp/mips/loopfilter_8_msa.c

@@ -10,7 +10,7 @@

 #include "vpx_dsp/mips/loopfilter_msa.h"

-void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,

                               const uint8_t *b_limit_ptr,

                               const uint8_t *limit_ptr,

                               const uint8_t *thresh_ptr,

@@ -83,7 +83,7 @@

-void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,

                                    const uint8_t *b_limit0,

                                    const uint8_t *limit0,

                                    const uint8_t *thresh0,

@@ -158,7 +158,7 @@

-void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,

                             const uint8_t *b_limit_ptr,

                             const uint8_t *limit_ptr,

                             const uint8_t *thresh_ptr,

@@ -237,7 +237,7 @@

-void vp9_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,

+void vpx_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,

                                  const uint8_t *b_limit0,

                                  const uint8_t *limit0,

                                  const uint8_t *thresh0,

--- a/vpx_dsp/mips/loopfilter_filters_dspr2.c

+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c

@@ -19,7 +19,7 @@

 #include "vpx_mem/vpx_mem.h"

 #if HAVE_DSPR2

-void vp9_lpf_horizontal_4_dspr2(unsigned char *s,

+void vpx_lpf_horizontal_4_dspr2(unsigned char *s,

                                 int pitch,

                                 const uint8_t *blimit,

                                 const uint8_t *limit,

@@ -49,7 +49,7 @@

);

   /* prefetch data for store */

-  vp9_prefetch_store(s);

+  prefetch_store(s);

   /* loop filter designed to work using chars so that we can make maximum use

      of 8 bit simd instructions. */

@@ -87,14 +87,14 @@

           : [sm1] "r" (sm1), [s0] "r" (s0), [s5] "r" (s5), [s6] "r" (s6)

);

-      vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2,

-                                pm1, p0, p3, p4, p5, p6,

-                                thresh_vec, &hev, &mask);

+      filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2,

+                            pm1, p0, p3, p4, p5, p6,

+                            thresh_vec, &hev, &mask);

       /* if mask == 0 do filtering is not needed */

       if (mask) {

         /* filtering */

-        vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);

+        filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);

         __asm__ __volatile__ (

             "sw     %[p1],  (%[s1])    \n\t"

@@ -113,7 +113,7 @@

-void vp9_lpf_vertical_4_dspr2(unsigned char *s,

+void vpx_lpf_vertical_4_dspr2(unsigned char *s,

                               int pitch,

                               const uint8_t *blimit,

                               const uint8_t *limit,

@@ -143,7 +143,7 @@

);

   /* prefetch data for store */

-  vp9_prefetch_store(s + pitch);

+  prefetch_store(s + pitch);

   for (i = 0; i < 2; i++) {

     s1 = s;

@@ -216,14 +216,14 @@

      * mask will be zero and filtering is not needed

*/

     if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {

-      vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1,

-                                p0, p3, p4, p5, p6, thresh_vec,

-                                &hev, &mask);

+      filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1,

+                            p0, p3, p4, p5, p6, thresh_vec,

+                            &hev, &mask);

       /* if mask == 0 do filtering is not needed */

       if (mask) {

         /* filtering */

-        vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);

+        filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);

         /* unpack processed 4x4 neighborhood

          * don't use transpose on output data

@@ -306,7 +306,7 @@

-void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,

+void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,

                                      const uint8_t *blimit0,

                                      const uint8_t *limit0,

                                      const uint8_t *thresh0,

@@ -313,11 +313,11 @@

                                      const uint8_t *blimit1,

                                      const uint8_t *limit1,

                                      const uint8_t *thresh1) {

-  vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);

+  vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);

-void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,

+void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,

                                      const uint8_t *blimit0,

                                      const uint8_t *limit0,

                                      const uint8_t *thresh0,

@@ -324,11 +324,11 @@

                                      const uint8_t *blimit1,

                                      const uint8_t *limit1,

                                      const uint8_t *thresh1) {

-  vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);

+  vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);

-void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,

+void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,

                                    const uint8_t *blimit0,

                                    const uint8_t *limit0,

                                    const uint8_t *thresh0,

@@ -335,11 +335,11 @@

                                    const uint8_t *blimit1,

                                    const uint8_t *limit1,

                                    const uint8_t *thresh1) {

-  vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);

+  vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);

-void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,

+void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,

                                    const uint8_t *blimit0,

                                    const uint8_t *limit0,

                                    const uint8_t *thresh0,

@@ -346,16 +346,16 @@

                                    const uint8_t *blimit1,

                                    const uint8_t *limit1,

                                    const uint8_t *thresh1) {

-  vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);

-  vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,

+  vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);

+  vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,

1);

-void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,

+void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,

                                     const uint8_t *blimit,

                                     const uint8_t *limit,

                                     const uint8_t *thresh) {

-  vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);

-  vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);

+  vpx_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);

 #endif  // #if HAVE_DSPR2

--- a/vpx_dsp/mips/loopfilter_filters_dspr2.h

+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.h

@@ -24,10 +24,10 @@

 #if HAVE_DSPR2

 /* inputs & outputs are quad-byte vectors */

-static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,

-                                    uint32_t *ps1, uint32_t *ps0,

-                                    uint32_t *qs0, uint32_t *qs1) {

-  int32_t   vp9_filter_l, vp9_filter_r;

+static INLINE void filter_dspr2(uint32_t mask, uint32_t hev,

+                                uint32_t *ps1, uint32_t *ps0,

+                                uint32_t *qs0, uint32_t *qs1) {

+  int32_t   vpx_filter_l, vpx_filter_r;

   int32_t   Filter1_l, Filter1_r, Filter2_l, Filter2_r;

   int32_t   subr_r, subr_l;

   uint32_t  t1, t2, HWM, t3;

@@ -73,34 +73,34 @@

   hev_r = hev_r & HWM;

   __asm__ __volatile__ (

-      /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */

-      "subq_s.ph    %[vp9_filter_l], %[vps1_l],       %[vqs1_l]       \n\t"

-      "subq_s.ph    %[vp9_filter_r], %[vps1_r],       %[vqs1_r]       \n\t"

+      /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */

+      "subq_s.ph    %[vpx_filter_l], %[vps1_l],       %[vqs1_l]       \n\t"

+      "subq_s.ph    %[vpx_filter_r], %[vps1_r],       %[vqs1_r]       \n\t"

       /* qs0 - ps0 */

       "subq_s.ph    %[subr_l],       %[vqs0_l],       %[vps0_l]       \n\t"

       "subq_s.ph    %[subr_r],       %[vqs0_r],       %[vps0_r]       \n\t"

-      /* vp9_filter &= hev; */

-      "and          %[vp9_filter_l], %[vp9_filter_l], %[hev_l]        \n\t"

-      "and          %[vp9_filter_r], %[vp9_filter_r], %[hev_r]        \n\t"

+      /* vpx_filter &= hev; */

+      "and          %[vpx_filter_l], %[vpx_filter_l], %[hev_l]        \n\t"

+      "and          %[vpx_filter_r], %[vpx_filter_r], %[hev_r]        \n\t"

-      /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */

-      "addq_s.ph    %[vp9_filter_l], %[vp9_filter_l], %[subr_l]       \n\t"

-      "addq_s.ph    %[vp9_filter_r], %[vp9_filter_r], %[subr_r]       \n\t"

+      /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */

+      "addq_s.ph    %[vpx_filter_l], %[vpx_filter_l], %[subr_l]       \n\t"

+      "addq_s.ph    %[vpx_filter_r], %[vpx_filter_r], %[subr_r]       \n\t"

       "xor          %[invhev_l],     %[hev_l],        %[HWM]          \n\t"

-      "addq_s.ph    %[vp9_filter_l], %[vp9_filter_l], %[subr_l]       \n\t"

-      "addq_s.ph    %[vp9_filter_r], %[vp9_filter_r], %[subr_r]       \n\t"

+      "addq_s.ph    %[vpx_filter_l], %[vpx_filter_l], %[subr_l]       \n\t"

+      "addq_s.ph    %[vpx_filter_r], %[vpx_filter_r], %[subr_r]       \n\t"

       "xor          %[invhev_r],     %[hev_r],        %[HWM]          \n\t"

-      "addq_s.ph    %[vp9_filter_l], %[vp9_filter_l], %[subr_l]       \n\t"

-      "addq_s.ph    %[vp9_filter_r], %[vp9_filter_r], %[subr_r]       \n\t"

+      "addq_s.ph    %[vpx_filter_l], %[vpx_filter_l], %[subr_l]       \n\t"

+      "addq_s.ph    %[vpx_filter_r], %[vpx_filter_r], %[subr_r]       \n\t"

-      /* vp9_filter &= mask; */

-      "and          %[vp9_filter_l], %[vp9_filter_l], %[mask_l]       \n\t"

-      "and          %[vp9_filter_r], %[vp9_filter_r], %[mask_r]       \n\t"

+      /* vpx_filter &= mask; */

+      "and          %[vpx_filter_l], %[vpx_filter_l], %[mask_l]       \n\t"

+      "and          %[vpx_filter_r], %[vpx_filter_r], %[mask_r]       \n\t"

-      : [vp9_filter_l] "=&r" (vp9_filter_l),

-        [vp9_filter_r] "=&r" (vp9_filter_r),

+      : [vpx_filter_l] "=&r" (vpx_filter_l),

+        [vpx_filter_r] "=&r" (vpx_filter_r),

         [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),

         [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)

       : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),

@@ -113,13 +113,13 @@

   /* save bottom 3 bits so that we round one side +4 and the other +3 */

   __asm__ __volatile__ (

-      /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */

-      "addq_s.ph    %[Filter1_l],    %[vp9_filter_l], %[t2]           \n\t"

-      "addq_s.ph    %[Filter1_r],    %[vp9_filter_r], %[t2]           \n\t"

+      /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */

+      "addq_s.ph    %[Filter1_l],    %[vpx_filter_l], %[t2]           \n\t"

+      "addq_s.ph    %[Filter1_r],    %[vpx_filter_r], %[t2]           \n\t"

-      /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */

-      "addq_s.ph    %[Filter2_l],    %[vp9_filter_l], %[t1]           \n\t"

-      "addq_s.ph    %[Filter2_r],    %[vp9_filter_r], %[t1]           \n\t"

+      /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */

+      "addq_s.ph    %[Filter2_l],    %[vpx_filter_l], %[t1]           \n\t"

+      "addq_s.ph    %[Filter2_r],    %[vpx_filter_r], %[t1]           \n\t"

       "shra.ph      %[Filter1_r],    %[Filter1_r],    3               \n\t"

       "shra.ph      %[Filter1_l],    %[Filter1_l],    3               \n\t"

@@ -142,23 +142,23 @@

         [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),

         [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)

       : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),

-        [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r)

+        [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r)

);

   __asm__ __volatile__ (

-      /* (vp9_filter += 1) >>= 1 */

+      /* (vpx_filter += 1) >>= 1 */

       "addqh.ph    %[Filter1_l],    %[Filter1_l],     %[t3]           \n\t"

       "addqh.ph    %[Filter1_r],    %[Filter1_r],     %[t3]           \n\t"

-      /* vp9_filter &= ~hev; */

+      /* vpx_filter &= ~hev; */

       "and          %[Filter1_l],    %[Filter1_l],    %[invhev_l]     \n\t"

       "and          %[Filter1_r],    %[Filter1_r],    %[invhev_r]     \n\t"

-      /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */

+      /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */

       "addq_s.ph    %[vps1_l],       %[vps1_l],       %[Filter1_l]    \n\t"

       "addq_s.ph    %[vps1_r],       %[vps1_r],       %[Filter1_r]    \n\t"

-      /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */

+      /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */

       "subq_s.ph    %[vqs1_l],       %[vqs1_l],       %[Filter1_l]    \n\t"

       "subq_s.ph    %[vqs1_r],       %[vqs1_r],       %[Filter1_r]    \n\t"

@@ -196,12 +196,12 @@

   *qs1 = vqs1 ^ N128;

-static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev,

-                                     uint32_t ps1, uint32_t ps0,

-                                     uint32_t qs0, uint32_t qs1,

-                                     uint32_t *p1_f0, uint32_t *p0_f0,

-                                     uint32_t *q0_f0, uint32_t *q1_f0) {

-  int32_t   vp9_filter_l, vp9_filter_r;

+static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev,

+                                 uint32_t ps1, uint32_t ps0,

+                                 uint32_t qs0, uint32_t qs1,

+                                 uint32_t *p1_f0, uint32_t *p0_f0,

+                                 uint32_t *q0_f0, uint32_t *q1_f0) {

+  int32_t   vpx_filter_l, vpx_filter_r;

   int32_t   Filter1_l, Filter1_r, Filter2_l, Filter2_r;

   int32_t   subr_r, subr_l;

   uint32_t  t1, t2, HWM, t3;

@@ -247,34 +247,34 @@

   hev_r = hev_r & HWM;

   __asm__ __volatile__ (

-      /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */

-      "subq_s.ph    %[vp9_filter_l], %[vps1_l],       %[vqs1_l]       \n\t"

-      "subq_s.ph    %[vp9_filter_r], %[vps1_r],       %[vqs1_r]       \n\t"

+      /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */

+      "subq_s.ph    %[vpx_filter_l], %[vps1_l],       %[vqs1_l]       \n\t"

+      "subq_s.ph    %[vpx_filter_r], %[vps1_r],       %[vqs1_r]       \n\t"

       /* qs0 - ps0 */

       "subq_s.ph    %[subr_l],       %[vqs0_l],       %[vps0_l]       \n\t"

       "subq_s.ph    %[subr_r],       %[vqs0_r],       %[vps0_r]       \n\t"

-      /* vp9_filter &= hev; */

-      "and          %[vp9_filter_l], %[vp9_filter_l], %[hev_l]        \n\t"

-      "and          %[vp9_filter_r], %[vp9_filter_r], %[hev_r]        \n\t"

+      /* vpx_filter &= hev; */

+      "and          %[vpx_filter_l], %[vpx_filter_l], %[hev_l]        \n\t"

+      "and          %[vpx_filter_r], %[vpx_filter_r], %[hev_r]        \n\t"

-      /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */

-      "addq_s.ph    %[vp9_filter_l], %[vp9_filter_l], %[subr_l]       \n\t"

-      "addq_s.ph    %[vp9_filter_r], %[vp9_filter_r], %[subr_r]       \n\t"

+      /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */

+      "addq_s.ph    %[vpx_filter_l], %[vpx_filter_l], %[subr_l]       \n\t"

+      "addq_s.ph    %[vpx_filter_r], %[vpx_filter_r], %[subr_r]       \n\t"

       "xor          %[invhev_l],     %[hev_l],        %[HWM]          \n\t"

-      "addq_s.ph    %[vp9_filter_l], %[vp9_filter_l], %[subr_l]       \n\t"

-      "addq_s.ph    %[vp9_filter_r], %[vp9_filter_r], %[subr_r]       \n\t"

+      "addq_s.ph    %[vpx_filter_l], %[vpx_filter_l], %[subr_l]       \n\t"

+      "addq_s.ph    %[vpx_filter_r], %[vpx_filter_r], %[subr_r]       \n\t"

       "xor          %[invhev_r],     %[hev_r],        %[HWM]          \n\t"

-      "addq_s.ph    %[vp9_filter_l], %[vp9_filter_l], %[subr_l]       \n\t"

-      "addq_s.ph    %[vp9_filter_r], %[vp9_filter_r], %[subr_r]       \n\t"

+      "addq_s.ph    %[vpx_filter_l], %[vpx_filter_l], %[subr_l]       \n\t"

+      "addq_s.ph    %[vpx_filter_r], %[vpx_filter_r], %[subr_r]       \n\t"

-      /* vp9_filter &= mask; */

-      "and          %[vp9_filter_l], %[vp9_filter_l], %[mask_l]       \n\t"

-      "and          %[vp9_filter_r], %[vp9_filter_r], %[mask_r]       \n\t"

+      /* vpx_filter &= mask; */

+      "and          %[vpx_filter_l], %[vpx_filter_l], %[mask_l]       \n\t"

+      "and          %[vpx_filter_r], %[vpx_filter_r], %[mask_r]       \n\t"

-      : [vp9_filter_l] "=&r" (vp9_filter_l),

-        [vp9_filter_r] "=&r" (vp9_filter_r),

+      : [vpx_filter_l] "=&r" (vpx_filter_l),

+        [vpx_filter_r] "=&r" (vpx_filter_r),

         [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),

         [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)

       : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),

@@ -286,13 +286,13 @@

   /* save bottom 3 bits so that we round one side +4 and the other +3 */

   __asm__ __volatile__ (

-      /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */

-      "addq_s.ph    %[Filter1_l],    %[vp9_filter_l], %[t2]           \n\t"

-      "addq_s.ph    %[Filter1_r],    %[vp9_filter_r], %[t2]           \n\t"

+      /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */

+      "addq_s.ph    %[Filter1_l],    %[vpx_filter_l], %[t2]           \n\t"

+      "addq_s.ph    %[Filter1_r],    %[vpx_filter_r], %[t2]           \n\t"

-      /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */

-      "addq_s.ph    %[Filter2_l],    %[vp9_filter_l], %[t1]           \n\t"

-      "addq_s.ph    %[Filter2_r],    %[vp9_filter_r], %[t1]           \n\t"

+      /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */

+      "addq_s.ph    %[Filter2_l],    %[vpx_filter_l], %[t1]           \n\t"

+      "addq_s.ph    %[Filter2_r],    %[vpx_filter_r], %[t1]           \n\t"

       "shra.ph      %[Filter1_r],    %[Filter1_r],    3               \n\t"

       "shra.ph      %[Filter1_l],    %[Filter1_l],    3               \n\t"

@@ -315,23 +315,23 @@

         [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),

         [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)

       : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),

-        [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r)

+        [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r)

);

   __asm__ __volatile__ (

-      /* (vp9_filter += 1) >>= 1 */

+      /* (vpx_filter += 1) >>= 1 */

       "addqh.ph    %[Filter1_l],    %[Filter1_l],     %[t3]           \n\t"

       "addqh.ph    %[Filter1_r],    %[Filter1_r],     %[t3]           \n\t"

-      /* vp9_filter &= ~hev; */

+      /* vpx_filter &= ~hev; */

       "and          %[Filter1_l],    %[Filter1_l],    %[invhev_l]     \n\t"

       "and          %[Filter1_r],    %[Filter1_r],    %[invhev_r]     \n\t"

-      /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */

+      /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */

       "addq_s.ph    %[vps1_l],       %[vps1_l],       %[Filter1_l]    \n\t"

       "addq_s.ph    %[vps1_r],       %[vps1_r],       %[Filter1_r]    \n\t"

-      /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */

+      /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */

       "subq_s.ph    %[vqs1_l],       %[vqs1_l],       %[Filter1_l]    \n\t"

       "subq_s.ph    %[vqs1_r],       %[vqs1_r],       %[Filter1_r]    \n\t"

@@ -369,10 +369,10 @@

   *q1_f0 = vqs1 ^ N128;

-static INLINE void vp9_mbfilter_dspr2(uint32_t *op3, uint32_t *op2,

-                                      uint32_t *op1, uint32_t *op0,

-                                      uint32_t *oq0, uint32_t *oq1,

-                                      uint32_t *oq2, uint32_t *oq3) {

+static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2,

+                                  uint32_t *op1, uint32_t *op0,

+                                  uint32_t *oq0, uint32_t *oq1,

+                                  uint32_t *oq2, uint32_t *oq3) {

   /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */

   const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;

   const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;

@@ -446,14 +446,14 @@

   *oq2 = res_oq2;

-static INLINE void vp9_mbfilter1_dspr2(uint32_t p3, uint32_t p2,

-                                       uint32_t p1, uint32_t p0,

-                                       uint32_t q0, uint32_t q1,

-                                       uint32_t q2, uint32_t q3,

-                                       uint32_t *op2_f1,

-                                       uint32_t *op1_f1, uint32_t *op0_f1,

-                                       uint32_t *oq0_f1, uint32_t *oq1_f1,

-                                       uint32_t *oq2_f1) {

+static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2,

+                                   uint32_t p1, uint32_t p0,

+                                   uint32_t q0, uint32_t q1,

+                                   uint32_t q2, uint32_t q3,

+                                   uint32_t *op2_f1,

+                                   uint32_t *op1_f1, uint32_t *op0_f1,

+                                   uint32_t *oq0_f1, uint32_t *oq1_f1,

+                                   uint32_t *oq2_f1) {

   /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */

   uint32_t  res_op2, res_op1, res_op0;

   uint32_t  res_oq0, res_oq1, res_oq2;

@@ -524,14 +524,14 @@

   *oq2_f1 = res_oq2;

-static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,

-                                           uint32_t *op5, uint32_t *op4,

-                                           uint32_t *op3, uint32_t *op2,

-                                           uint32_t *op1, uint32_t *op0,

-                                           uint32_t *oq0, uint32_t *oq1,

-                                           uint32_t *oq2, uint32_t *oq3,

-                                           uint32_t *oq4, uint32_t *oq5,

-                                           uint32_t *oq6, uint32_t *oq7) {

+static INLINE void wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,

+                                       uint32_t *op5, uint32_t *op4,

+                                       uint32_t *op3, uint32_t *op2,

+                                       uint32_t *op1, uint32_t *op0,

+                                       uint32_t *oq0, uint32_t *oq1,

+                                       uint32_t *oq2, uint32_t *oq3,

+                                       uint32_t *oq4, uint32_t *oq5,

+                                       uint32_t *oq6, uint32_t *oq7) {

   const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4;

   const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;

   const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;

--- a/vpx_dsp/mips/loopfilter_masks_dspr2.h

+++ b/vpx_dsp/mips/loopfilter_masks_dspr2.h

@@ -24,13 +24,13 @@

 #if HAVE_DSPR2

 /* processing 4 pixels at the same time

  * compute hev and mask in the same function */

-static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,

-                                             uint32_t p1, uint32_t p0,

-                                             uint32_t p3, uint32_t p2,

-                                             uint32_t q0, uint32_t q1,

-                                             uint32_t q2, uint32_t q3,

-                                             uint32_t thresh, uint32_t *hev,

-                                             uint32_t *mask) {

+static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,

+                                         uint32_t p1, uint32_t p0,

+                                         uint32_t p3, uint32_t p2,

+                                         uint32_t q0, uint32_t q1,

+                                         uint32_t q2, uint32_t q3,

+                                         uint32_t thresh, uint32_t *hev,

+                                         uint32_t *mask) {

   uint32_t  c, r, r3, r_k;

   uint32_t  s1, s2, s3;

   uint32_t  ones = 0xFFFFFFFF;

@@ -129,16 +129,16 @@

   *mask = s2;

-static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit,

-                                                       uint32_t flimit,

-                                                       uint32_t thresh,

-                                                       uint32_t p1, uint32_t p0,

-                                                       uint32_t p3, uint32_t p2,

-                                                       uint32_t q0, uint32_t q1,

-                                                       uint32_t q2, uint32_t q3,

-                                                       uint32_t *hev,

-                                                       uint32_t *mask,

-                                                       uint32_t *flat) {

+static INLINE void filter_hev_mask_flatmask4_dspr2(uint32_t limit,

+                                                   uint32_t flimit,

+                                                   uint32_t thresh,

+                                                   uint32_t p1, uint32_t p0,

+                                                   uint32_t p3, uint32_t p2,

+                                                   uint32_t q0, uint32_t q1,

+                                                   uint32_t q2, uint32_t q3,

+                                                   uint32_t *hev,

+                                                   uint32_t *mask,

+                                                   uint32_t *flat) {

   uint32_t  c, r, r3, r_k, r_flat;

   uint32_t  s1, s2, s3;

   uint32_t  ones = 0xFFFFFFFF;

@@ -279,12 +279,12 @@

   *flat = flat1;

-static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3,

-                                 uint32_t p2, uint32_t p1,

-                                 uint32_t p0, uint32_t q0,

-                                 uint32_t q1, uint32_t q2,

-                                 uint32_t q3, uint32_t q4,

-                                 uint32_t *flat2) {

+static INLINE void flatmask5(uint32_t p4, uint32_t p3,

+                             uint32_t p2, uint32_t p1,

+                             uint32_t p0, uint32_t q0,

+                             uint32_t q1, uint32_t q2,

+                             uint32_t q3, uint32_t q4,

+                             uint32_t *flat2) {

   uint32_t  c, r, r_k, r_flat;

   uint32_t  ones = 0xFFFFFFFF;

   uint32_t  flat_thresh = 0x01010101;

--- a/vpx_dsp/mips/loopfilter_mb_dspr2.c

+++ b/vpx_dsp/mips/loopfilter_mb_dspr2.c

@@ -19,7 +19,7 @@

 #include "vpx_mem/vpx_mem.h"

 #if HAVE_DSPR2

-void vp9_lpf_horizontal_8_dspr2(unsigned char *s,

+void vpx_lpf_horizontal_8_dspr2(unsigned char *s,

                                 int pitch,

                                 const uint8_t *blimit,

                                 const uint8_t *limit,

@@ -52,7 +52,7 @@

);

   /* prefetch data for store */

-  vp9_prefetch_store(s);

+  prefetch_store(s);

   for (i = 0; i < 2; i++) {

     sp3 = s - (pitch << 2);

@@ -80,13 +80,13 @@

           [sq3] "r" (sq3), [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0)

);

-    vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,

-                                        p1, p0, p3, p2, q0, q1, q2, q3,

-                                        &hev, &mask, &flat);

+    filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,

+                                    p1, p0, p3, p2, q0, q1, q2, q3,

+                                    &hev, &mask, &flat);

     if ((flat == 0) && (mask != 0)) {

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       __asm__ __volatile__ (

           "sw       %[p1_f0],   (%[sp1])    \n\t"

@@ -103,13 +103,13 @@

     } else if ((mask & flat) == 0xFFFFFFFF) {

       /* left 2 element operation */

       PACK_LEFT_0TO3()

-      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

-                         &q0_l, &q1_l, &q2_l, &q3_l);

+      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

+                     &q0_l, &q1_l, &q2_l, &q3_l);

       /* right 2 element operation */

       PACK_RIGHT_0TO3()

-      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

-                         &q0_r, &q1_r, &q2_r, &q3_r);

+      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

+                     &q0_r, &q1_r, &q2_r, &q3_r);

       COMBINE_LEFT_RIGHT_0TO2()

@@ -129,18 +129,18 @@

);

     } else if ((flat != 0) && (mask != 0)) {

       /* filtering */

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       /* left 2 element operation */

       PACK_LEFT_0TO3()

-      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

-                         &q0_l, &q1_l, &q2_l, &q3_l);

+      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

+                     &q0_l, &q1_l, &q2_l, &q3_l);

       /* right 2 element operation */

       PACK_RIGHT_0TO3()

-      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

-                         &q0_r, &q1_r, &q2_r, &q3_r);

+      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

+                     &q0_r, &q1_r, &q2_r, &q3_r);

       if (mask & flat & 0x000000FF) {

         __asm__ __volatile__ (

@@ -318,7 +318,7 @@

-void vp9_lpf_vertical_8_dspr2(unsigned char *s,

+void vpx_lpf_vertical_8_dspr2(unsigned char *s,

                               int pitch,

                               const uint8_t *blimit,

                               const uint8_t *limit,

@@ -350,7 +350,7 @@

       : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)

);

-  vp9_prefetch_store(s + pitch);

+  prefetch_store(s + pitch);

   for (i = 0; i < 2; i++) {

     s1 = s;

@@ -450,39 +450,39 @@

);

-    vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,

-                                        p1, p0, p3, p2, q0, q1, q2, q3,

-                                        &hev, &mask, &flat);

+    filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,

+                                    p1, p0, p3, p2, q0, q1, q2, q3,

+                                    &hev, &mask, &flat);

     if ((flat == 0) && (mask != 0)) {

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       STORE_F0()

     } else if ((mask & flat) == 0xFFFFFFFF) {

       /* left 2 element operation */

       PACK_LEFT_0TO3()

-      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

-                         &q0_l, &q1_l, &q2_l, &q3_l);

+      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

+                     &q0_l, &q1_l, &q2_l, &q3_l);

       /* right 2 element operation */

       PACK_RIGHT_0TO3()

-      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

-                         &q0_r, &q1_r, &q2_r, &q3_r);

+      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

+                     &q0_r, &q1_r, &q2_r, &q3_r);

       STORE_F1()

     } else if ((flat != 0) && (mask != 0)) {

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       /* left 2 element operation */

       PACK_LEFT_0TO3()

-      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

-                         &q0_l, &q1_l, &q2_l, &q3_l);

+      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

+                     &q0_l, &q1_l, &q2_l, &q3_l);

       /* right 2 element operation */

       PACK_RIGHT_0TO3()

-      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

-                         &q0_r, &q1_r, &q2_r, &q3_r);

+      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

+                     &q0_r, &q1_r, &q2_r, &q3_r);

       if (mask & flat & 0x000000FF) {

         __asm__ __volatile__ (

--- a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c

+++ b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c

@@ -19,7 +19,7 @@

 #include "vpx_mem/vpx_mem.h"

 #if HAVE_DSPR2

-void vp9_lpf_horizontal_16_dspr2(unsigned char *s,

+void vpx_lpf_horizontal_16_dspr2(unsigned char *s,

                                  int pitch,

                                  const uint8_t *blimit,

                                  const uint8_t *limit,

@@ -57,7 +57,7 @@

);

   /* prefetch data for store */

-  vp9_prefetch_store(s);

+  prefetch_store(s);

   for (i = 0; i < (2 * count); i++) {

     sp7 = s - (pitch << 3);

@@ -109,17 +109,17 @@

           [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6), [sq7] "r" (sq7)

);

-    vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,

-                                        p1, p0, p3, p2, q0, q1, q2, q3,

-                                        &hev, &mask, &flat);

+    filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,

+                                    p1, p0, p3, p2, q0, q1, q2, q3,

+                                    &hev, &mask, &flat);

-    vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);

+    flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);

     /* f0 */

     if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||

         ((flat2 != 0) && (flat == 0) && (mask != 0))) {

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       __asm__ __volatile__ (

           "sw       %[p1_f0],   (%[sp1])            \n\t"

@@ -138,17 +138,17 @@

       /* f2 */

       PACK_LEFT_0TO3()

       PACK_LEFT_4TO7()

-      vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,

-                              &p3_l, &p2_l, &p1_l, &p0_l,

-                              &q0_l, &q1_l, &q2_l, &q3_l,

-                              &q4_l, &q5_l, &q6_l, &q7_l);

+      wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,

+                          &p3_l, &p2_l, &p1_l, &p0_l,

+                          &q0_l, &q1_l, &q2_l, &q3_l,

+                          &q4_l, &q5_l, &q6_l, &q7_l);

       PACK_RIGHT_0TO3()

       PACK_RIGHT_4TO7()

-      vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,

-                              &p3_r, &p2_r, &p1_r, &p0_r,

-                              &q0_r, &q1_r, &q2_r, &q3_r,

-                              &q4_r, &q5_r, &q6_r, &q7_r);

+      wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,

+                          &p3_r, &p2_r, &p1_r, &p0_r,

+                          &q0_r, &q1_r, &q2_r, &q3_r,

+                          &q4_r, &q5_r, &q6_r, &q7_r);

       COMBINE_LEFT_RIGHT_0TO2()

       COMBINE_LEFT_RIGHT_3TO6()

@@ -188,13 +188,13 @@

       /* f1 */

       /* left 2 element operation */

       PACK_LEFT_0TO3()

-      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

-                         &q0_l, &q1_l, &q2_l, &q3_l);

+      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

+                     &q0_l, &q1_l, &q2_l, &q3_l);

       /* right 2 element operation */

       PACK_RIGHT_0TO3()

-      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

-                         &q0_r, &q1_r, &q2_r, &q3_r);

+      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

+                     &q0_r, &q1_r, &q2_r, &q3_r);

       COMBINE_LEFT_RIGHT_0TO2()

@@ -214,18 +214,18 @@

);

     } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {

       /* f0+f1 */

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       /* left 2 element operation */

       PACK_LEFT_0TO3()

-      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

-                         &q0_l, &q1_l, &q2_l, &q3_l);

+      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

+                     &q0_l, &q1_l, &q2_l, &q3_l);

       /* right 2 element operation */

       PACK_RIGHT_0TO3()

-      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

-                         &q0_r, &q1_r, &q2_r, &q3_r);

+      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

+                     &q0_r, &q1_r, &q2_r, &q3_r);

       if (mask & flat & 0x000000FF) {

         __asm__ __volatile__ (

@@ -398,36 +398,36 @@

     } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {

       /* f0 + f1 + f2 */

       /* f0  function */

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       /* f1  function */

       /* left 2 element operation */

       PACK_LEFT_0TO3()

-      vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,

-                          q0_l, q1_l, q2_l, q3_l,

-                          &p2_l_f1, &p1_l_f1, &p0_l_f1,

-                          &q0_l_f1, &q1_l_f1, &q2_l_f1);

+      mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,

+                      q0_l, q1_l, q2_l, q3_l,

+                      &p2_l_f1, &p1_l_f1, &p0_l_f1,

+                      &q0_l_f1, &q1_l_f1, &q2_l_f1);

       /* right 2 element operation */

       PACK_RIGHT_0TO3()

-      vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,

-                          q0_r, q1_r, q2_r, q3_r,

-                          &p2_r_f1, &p1_r_f1, &p0_r_f1,

-                          &q0_r_f1, &q1_r_f1, &q2_r_f1);

+      mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,

+                      q0_r, q1_r, q2_r, q3_r,

+                      &p2_r_f1, &p1_r_f1, &p0_r_f1,

+                      &q0_r_f1, &q1_r_f1, &q2_r_f1);

       /* f2  function */

       PACK_LEFT_4TO7()

-      vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,

-                              &p3_l, &p2_l, &p1_l, &p0_l,

-                              &q0_l, &q1_l, &q2_l, &q3_l,

-                              &q4_l, &q5_l, &q6_l, &q7_l);

+      wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,

+                          &p3_l, &p2_l, &p1_l, &p0_l,

+                          &q0_l, &q1_l, &q2_l, &q3_l,

+                          &q4_l, &q5_l, &q6_l, &q7_l);

       PACK_RIGHT_4TO7()

-      vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,

-                              &p3_r, &p2_r, &p1_r, &p0_r,

-                              &q0_r, &q1_r, &q2_r, &q3_r,

-                              &q4_r, &q5_r, &q6_r, &q7_r);

+      wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,

+                          &p3_r, &p2_r, &p1_r, &p0_r,

+                          &q0_r, &q1_r, &q2_r, &q3_r,

+                          &q4_r, &q5_r, &q6_r, &q7_r);

       if (mask & flat & flat2 & 0x000000FF) {

         __asm__ __volatile__ (

--- a/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c

+++ b/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c

@@ -19,7 +19,7 @@

 #include "vpx_mem/vpx_mem.h"

 #if HAVE_DSPR2

-void vp9_lpf_vertical_16_dspr2(uint8_t *s,

+void vpx_lpf_vertical_16_dspr2(uint8_t *s,

                                int pitch,

                                const uint8_t *blimit,

                                const uint8_t *limit,

@@ -54,7 +54,7 @@

       : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)

);

-  vp9_prefetch_store(s + pitch);

+  prefetch_store(s + pitch);

   for (i = 0; i < 2; i++) {

     s1 = s;

@@ -247,17 +247,17 @@

);

-    vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,

-                                        p1, p0, p3, p2, q0, q1, q2, q3,

-                                        &hev, &mask, &flat);

+    filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,

+                                    p1, p0, p3, p2, q0, q1, q2, q3,

+                                    &hev, &mask, &flat);

-    vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);

+    flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);

     /* f0 */

     if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||

         ((flat2 != 0) && (flat == 0) && (mask != 0))) {

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       STORE_F0()

     } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&

                (mask == 0xFFFFFFFF)) {

@@ -264,44 +264,44 @@

       /* f2 */

       PACK_LEFT_0TO3()

       PACK_LEFT_4TO7()

-      vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,

-                              &p3_l, &p2_l, &p1_l, &p0_l,

-                              &q0_l, &q1_l, &q2_l, &q3_l,

-                              &q4_l, &q5_l, &q6_l, &q7_l);

+      wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,

+                          &p3_l, &p2_l, &p1_l, &p0_l,

+                          &q0_l, &q1_l, &q2_l, &q3_l,

+                          &q4_l, &q5_l, &q6_l, &q7_l);

       PACK_RIGHT_0TO3()

       PACK_RIGHT_4TO7()

-      vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,

-                              &p3_r, &p2_r, &p1_r, &p0_r,

-                              &q0_r, &q1_r, &q2_r, &q3_r,

-                              &q4_r, &q5_r, &q6_r, &q7_r);

+      wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,

+                          &p3_r, &p2_r, &p1_r, &p0_r,

+                          &q0_r, &q1_r, &q2_r, &q3_r,

+                          &q4_r, &q5_r, &q6_r, &q7_r);

       STORE_F2()

     } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {

       /* f1 */

       PACK_LEFT_0TO3()

-      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

-                         &q0_l, &q1_l, &q2_l, &q3_l);

+      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

+                     &q0_l, &q1_l, &q2_l, &q3_l);

       PACK_RIGHT_0TO3()

-      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

-                         &q0_r, &q1_r, &q2_r, &q3_r);

+      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

+                     &q0_r, &q1_r, &q2_r, &q3_r);

       STORE_F1()

     } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {

       /* f0 + f1 */

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       /* left 2 element operation */

       PACK_LEFT_0TO3()

-      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

-                         &q0_l, &q1_l, &q2_l, &q3_l);

+      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,

+                     &q0_l, &q1_l, &q2_l, &q3_l);

       /* right 2 element operation */

       PACK_RIGHT_0TO3()

-      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

-                         &q0_r, &q1_r, &q2_r, &q3_r);

+      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,

+                     &q0_r, &q1_r, &q2_r, &q3_r);

       if (mask & flat & 0x000000FF) {

         __asm__ __volatile__ (

@@ -465,32 +465,32 @@

     } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {

       /* f0+f1+f2 */

-      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,

-                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);

+      filter1_dspr2(mask, hev, p1, p0, q0, q1,

+                    &p1_f0, &p0_f0, &q0_f0, &q1_f0);

       PACK_LEFT_0TO3()

-      vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,

-                          q0_l, q1_l, q2_l, q3_l,

-                          &p2_l_f1, &p1_l_f1, &p0_l_f1,

-                          &q0_l_f1, &q1_l_f1, &q2_l_f1);

+      mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,

+                      q0_l, q1_l, q2_l, q3_l,

+                      &p2_l_f1, &p1_l_f1, &p0_l_f1,

+                      &q0_l_f1, &q1_l_f1, &q2_l_f1);

       PACK_RIGHT_0TO3()

-      vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,

-                          q0_r, q1_r, q2_r, q3_r,

-                          &p2_r_f1, &p1_r_f1, &p0_r_f1,

-                          &q0_r_f1, &q1_r_f1, &q2_r_f1);

+      mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,

+                      q0_r, q1_r, q2_r, q3_r,

+                      &p2_r_f1, &p1_r_f1, &p0_r_f1,

+                      &q0_r_f1, &q1_r_f1, &q2_r_f1);

       PACK_LEFT_4TO7()

-      vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,

-                              &p3_l, &p2_l, &p1_l, &p0_l,

-                              &q0_l, &q1_l, &q2_l, &q3_l,

-                              &q4_l, &q5_l, &q6_l, &q7_l);

+      wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,

+                          &p3_l, &p2_l, &p1_l, &p0_l,

+                          &q0_l, &q1_l, &q2_l, &q3_l,

+                          &q4_l, &q5_l, &q6_l, &q7_l);

       PACK_RIGHT_4TO7()

-      vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,

-                              &p3_r, &p2_r, &p1_r, &p0_r,

-                              &q0_r, &q1_r, &q2_r, &q3_r,

-                              &q4_r, &q5_r, &q6_r, &q7_r);

+      wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,

+                          &p3_r, &p2_r, &p1_r, &p0_r,

+                          &q0_r, &q1_r, &q2_r, &q3_r,

+                          &q4_r, &q5_r, &q6_r, &q7_r);

       if (mask & flat & flat2 & 0x000000FF) {

         __asm__ __volatile__ (

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -38,77 +38,77 @@

 # Loopfilter

-add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";

-specialize qw/vp9_lpf_vertical_16 sse2 neon_asm msa/;

-$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;

+add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";

+specialize qw/vpx_lpf_vertical_16 sse2 neon_asm msa/;

+$vpx_lpf_vertical_16_neon_asm=vpx_lpf_vertical_16_neon;

-add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";

-specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm msa/;

-$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;

+add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";

+specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm msa/;

+$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;

-add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

-specialize qw/vp9_lpf_vertical_8 sse2 neon msa/;

+add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

+specialize qw/vpx_lpf_vertical_8 sse2 neon msa/;

-add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

-specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm msa/;

-$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;

+add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

+specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm msa/;

+$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;

-add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

-specialize qw/vp9_lpf_vertical_4 mmx neon msa/;

+add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

+specialize qw/vpx_lpf_vertical_4 mmx neon msa/;

-add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

-specialize qw/vp9_lpf_vertical_4_dual sse2 neon msa/;

+add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

+specialize qw/vpx_lpf_vertical_4_dual sse2 neon msa/;

-add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

-specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm msa/;

-$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;

+add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

+specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm msa/;

+$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;

-add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

-specialize qw/vp9_lpf_horizontal_8 sse2 neon msa/;

+add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

+specialize qw/vpx_lpf_horizontal_8 sse2 neon msa/;

-add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

-specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm msa/;

-$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;

+add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

+specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm msa/;

+$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;

-add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

-specialize qw/vp9_lpf_horizontal_4 mmx neon msa/;

+add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";

+specialize qw/vpx_lpf_horizontal_4 mmx neon msa/;

-add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

-specialize qw/vp9_lpf_horizontal_4_dual sse2 neon msa/;

+add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

+specialize qw/vpx_lpf_horizontal_4_dual sse2 neon msa/;

 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {

-  add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";

-  specialize qw/vp9_highbd_lpf_vertical_16 sse2/;

+  add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";

+  specialize qw/vpx_highbd_lpf_vertical_16 sse2/;

-  add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";

-  specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/;

+  add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";

+  specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;

-  add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

-  specialize qw/vp9_highbd_lpf_vertical_8 sse2/;

+  add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

+  specialize qw/vpx_highbd_lpf_vertical_8 sse2/;

-  add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";

-  specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/;

+  add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";

+  specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;

-  add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

-  specialize qw/vp9_highbd_lpf_vertical_4 sse2/;

+  add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

+  specialize qw/vpx_highbd_lpf_vertical_4 sse2/;

-  add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";

-  specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/;

+  add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";

+  specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;

-  add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

-  specialize qw/vp9_highbd_lpf_horizontal_16 sse2/;

+  add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

+  specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;

-  add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

-  specialize qw/vp9_highbd_lpf_horizontal_8 sse2/;

+  add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

+  specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;

-  add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";

-  specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/;

+  add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";

+  specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;

-  add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

-  specialize qw/vp9_highbd_lpf_horizontal_4 sse2/;

+  add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";

+  specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;

-  add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";

-  specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/;

+  add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";

+  specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2/;

 }  # CONFIG_VP9_HIGHBITDEPTH

 if (vpx_config("CONFIG_ENCODERS") eq "yes") {

--- a/vpx_dsp/x86/highbd_loopfilter_sse2.c

+++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c

@@ -508,7 +508,7 @@

 // TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.

-void vp9_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,

                                        const uint8_t *_blimit,

                                        const uint8_t *_limit,

                                        const uint8_t *_thresh,

@@ -519,7 +519,7 @@

     highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd);

-void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,

                                       const uint8_t *_blimit,

                                       const uint8_t *_limit,

                                       const uint8_t *_thresh,

@@ -687,7 +687,7 @@

   filt = _mm_adds_epi16(filt, work_a);

   filt = _mm_adds_epi16(filt, work_a);

   filt = _mm_adds_epi16(filt, work_a);

-  // (vp9_filter + 3 * (qs0 - ps0)) & mask

+  // (vpx_filter + 3 * (qs0 - ps0)) & mask

   filt = signed_char_clamp_bd_sse2(filt, bd);

   filt = _mm_and_si128(filt, mask);

@@ -756,7 +756,7 @@

   _mm_store_si128((__m128i *)(s + 2 * p), q2);

-void vp9_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,

                                            const uint8_t *_blimit0,

                                            const uint8_t *_limit0,

                                            const uint8_t *_thresh0,

@@ -764,12 +764,12 @@

                                            const uint8_t *_limit1,

                                            const uint8_t *_thresh1,

                                            int bd) {

-  vp9_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);

-  vp9_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,

+  vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);

+  vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,

                                    1, bd);

-void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,

                                       const uint8_t *_blimit,

                                       const uint8_t *_limit,

                                       const uint8_t *_thresh,

@@ -891,7 +891,7 @@

   filt = _mm_adds_epi16(filt, work_a);

   filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd);

-  // (vp9_filter + 3 * (qs0 - ps0)) & mask

+  // (vpx_filter + 3 * (qs0 - ps0)) & mask

   filt = _mm_and_si128(filt, mask);

   filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd);

@@ -936,7 +936,7 @@

   _mm_storeu_si128((__m128i *)(s + 1 * p), q1);

-void vp9_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,

                                            const uint8_t *_blimit0,

                                            const uint8_t *_limit0,

                                            const uint8_t *_thresh0,

@@ -944,8 +944,8 @@

                                            const uint8_t *_limit1,

                                            const uint8_t *_thresh1,

                                            int bd) {

-  vp9_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);

-  vp9_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,

+  vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);

+  vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,

                                    bd);

@@ -1054,7 +1054,7 @@

   highbd_transpose(src1, in_p, dest1, out_p, 1);

-void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,

                                     const uint8_t *blimit,

                                     const uint8_t *limit,

                                     const uint8_t *thresh,

@@ -1071,7 +1071,7 @@

   highbd_transpose(src, p, dst, 8, 1);

   // Loop filtering

-  vp9_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,

+  vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,

                                    bd);

   src[0] = t_dst;

@@ -1081,7 +1081,7 @@

   highbd_transpose(src, 8, dst, p, 1);

-void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,

                                          const uint8_t *blimit0,

                                          const uint8_t *limit0,

                                          const uint8_t *thresh0,

@@ -1097,7 +1097,7 @@

   highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);

   // Loop filtering

-  vp9_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,

+  vpx_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,

                                         thresh0, blimit1, limit1, thresh1, bd);

   src[0] = t_dst;

   src[1] = t_dst + 8;

@@ -1108,7 +1108,7 @@

   highbd_transpose(src, 16, dst, p, 2);

-void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,

                                     const uint8_t *blimit,

                                     const uint8_t *limit,

                                     const uint8_t *thresh,

@@ -1125,7 +1125,7 @@

   highbd_transpose(src, p, dst, 8, 1);

   // Loop filtering

-  vp9_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,

+  vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,

                                    bd);

   src[0] = t_dst;

@@ -1135,7 +1135,7 @@

   highbd_transpose(src, 8, dst, p, 1);

-void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,

                                          const uint8_t *blimit0,

                                          const uint8_t *limit0,

                                          const uint8_t *thresh0,

@@ -1151,7 +1151,7 @@

   highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);

   // Loop filtering

-  vp9_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,

+  vpx_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,

                                         thresh0, blimit1, limit1, thresh1, bd);

   src[0] = t_dst;

   src[1] = t_dst + 8;

@@ -1163,7 +1163,7 @@

   highbd_transpose(src, 16, dst, p, 2);

-void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,

+void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,

                                      const uint8_t *blimit,

                                      const uint8_t *limit,

                                      const uint8_t *thresh,

@@ -1192,7 +1192,7 @@

   highbd_transpose(src, 8, dst, p, 2);

-void vp9_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,

+void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,

                                           int p,

                                           const uint8_t *blimit,

                                           const uint8_t *limit,

--- a/vpx_dsp/x86/loopfilter_avx2.c

+++ b/vpx_dsp/x86/loopfilter_avx2.c

@@ -103,7 +103,7 @@

         filt = _mm_adds_epi8(filt, work_a);

         filt = _mm_adds_epi8(filt, work_a);

         filt = _mm_adds_epi8(filt, work_a);

-        /* (vp9_filter + 3 * (qs0 - ps0)) & mask */

+        /* (vpx_filter + 3 * (qs0 - ps0)) & mask */

         filt = _mm_and_si128(filt, mask);

         filter1 = _mm_adds_epi8(filt, t4);

@@ -515,7 +515,7 @@

         filt = _mm_adds_epi8(filt, work_a);

         filt = _mm_adds_epi8(filt, work_a);

         filt = _mm_adds_epi8(filt, work_a);

-        /* (vp9_filter + 3 * (qs0 - ps0)) & mask */

+        /* (vpx_filter + 3 * (qs0 - ps0)) & mask */

         filt = _mm_and_si128(filt, mask);

         filter1 = _mm_adds_epi8(filt, t4);

@@ -976,7 +976,7 @@

-void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p,

+void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,

         const unsigned char *_blimit, const unsigned char *_limit,

         const unsigned char *_thresh, int count) {

     if (count == 1)

--- a/vpx_dsp/x86/loopfilter_mmx.asm

+++ b/vpx_dsp/x86/loopfilter_mmx.asm

@@ -12,7 +12,7 @@

 %include "vpx_ports/x86_abi_support.asm"

-;void vp9_lpf_horizontal_4_mmx

+;void vpx_lpf_horizontal_4_mmx

;(

 ;    unsigned char *src_ptr,

 ;    int src_pixel_step,

@@ -21,8 +21,8 @@

 ;    const char *thresh,

 ;    int  count

;)

-global sym(vp9_lpf_horizontal_4_mmx) PRIVATE

-sym(vp9_lpf_horizontal_4_mmx):

+global sym(vpx_lpf_horizontal_4_mmx) PRIVATE

+sym(vpx_lpf_horizontal_4_mmx):

     push        rbp

     mov         rbp, rsp

     SHADOW_ARGS_TO_STACK 6

@@ -224,7 +224,7 @@

ret

-;void vp9_lpf_vertical_4_mmx

+;void vpx_lpf_vertical_4_mmx

;(

 ;    unsigned char *src_ptr,

 ;    int  src_pixel_step,

@@ -233,8 +233,8 @@

 ;    const char *thresh,

 ;    int count

;)

-global sym(vp9_lpf_vertical_4_mmx) PRIVATE

-sym(vp9_lpf_vertical_4_mmx):

+global sym(vpx_lpf_vertical_4_mmx) PRIVATE

+sym(vpx_lpf_vertical_4_mmx):

     push        rbp

     mov         rbp, rsp

     SHADOW_ARGS_TO_STACK 6

--- a/vpx_dsp/x86/loopfilter_sse2.c

+++ b/vpx_dsp/x86/loopfilter_sse2.c

@@ -100,7 +100,7 @@

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

-    // (vp9_filter + 3 * (qs0 - ps0)) & mask

+    // (vpx_filter + 3 * (qs0 - ps0)) & mask

     filt = _mm_and_si128(filt, mask);

     filter1 = _mm_adds_epi8(filt, t4);

@@ -495,7 +495,7 @@

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

-    // (vp9_filter + 3 * (qs0 - ps0)) & mask

+    // (vpx_filter + 3 * (qs0 - ps0)) & mask

     filt = _mm_and_si128(filt, mask);

     filter1 = _mm_adds_epi8(filt, t4);

     filter2 = _mm_adds_epi8(filt, t3);

@@ -717,7 +717,7 @@

 // TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.

-void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p,

+void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,

                                 const unsigned char *_blimit,

                                 const unsigned char *_limit,

                                 const unsigned char *_thresh, int count) {

@@ -727,7 +727,7 @@

     mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);

-void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,

+void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,

                                const unsigned char *_blimit,

                                const unsigned char *_limit,

                                const unsigned char *_thresh, int count) {

@@ -874,7 +874,7 @@

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

-    // (vp9_filter + 3 * (qs0 - ps0)) & mask

+    // (vpx_filter + 3 * (qs0 - ps0)) & mask

     filt = _mm_and_si128(filt, mask);

     filter1 = _mm_adds_epi8(filt, t4);

@@ -943,7 +943,7 @@

-void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,

+void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,

                                     const uint8_t *_blimit0,

                                     const uint8_t *_limit0,

                                     const uint8_t *_thresh0,

@@ -1115,7 +1115,7 @@

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

-    // (vp9_filter + 3 * (qs0 - ps0)) & mask

+    // (vpx_filter + 3 * (qs0 - ps0)) & mask

     filt = _mm_and_si128(filt, mask);

     filter1 = _mm_adds_epi8(filt, t4);

@@ -1190,7 +1190,7 @@

-void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,

+void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,

                                     const unsigned char *_blimit0,

                                     const unsigned char *_limit0,

                                     const unsigned char *_thresh0,

@@ -1286,7 +1286,7 @@

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

     filt = _mm_adds_epi8(filt, work_a);

-    // (vp9_filter + 3 * (qs0 - ps0)) & mask

+    // (vpx_filter + 3 * (qs0 - ps0)) & mask

     filt = _mm_and_si128(filt, mask);

     filter1 = _mm_adds_epi8(filt, t4);

@@ -1464,7 +1464,7 @@

   } while (++idx8x8 < num_8x8_to_transpose);

-void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,

+void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,

                                   const uint8_t *limit0,

                                   const uint8_t *thresh0,

                                   const uint8_t *blimit1,

@@ -1478,7 +1478,7 @@

   transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);

   // Loop filtering

-  vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,

+  vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,

                                  blimit1, limit1, thresh1);

   src[0] = t_dst;

   src[1] = t_dst + 8;

@@ -1489,7 +1489,7 @@

   transpose(src, 16, dst, p, 2);

-void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,

+void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,

                              const unsigned char *blimit,

                              const unsigned char *limit,

                              const unsigned char *thresh, int count) {

@@ -1505,7 +1505,7 @@

   transpose(src, p, dst, 8, 1);

   // Loop filtering

-  vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);

+  vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);

   src[0] = t_dst;

   dst[0] = s - 4;

@@ -1514,7 +1514,7 @@

   transpose(src, 8, dst, p, 1);

-void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,

+void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,

                                   const uint8_t *limit0,

                                   const uint8_t *thresh0,

                                   const uint8_t *blimit1,

@@ -1528,7 +1528,7 @@

   transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);

   // Loop filtering

-  vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,

+  vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,

                                  blimit1, limit1, thresh1);

   src[0] = t_dst;

   src[1] = t_dst + 8;

@@ -1540,7 +1540,7 @@

   transpose(src, 16, dst, p, 2);

-void vp9_lpf_vertical_16_sse2(unsigned char *s, int p,

+void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,

                               const unsigned char *blimit,

                               const unsigned char *limit,

                               const unsigned char *thresh) {

@@ -1568,7 +1568,7 @@

   transpose(src, 8, dst, p, 2);

-void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p,

+void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,

                                    const uint8_t *blimit, const uint8_t *limit,

                                    const uint8_t *thresh) {

   DECLARE_ALIGNED(16, unsigned char, t_dst[256]);