shithub: libvpx

--- a/vp9/common/vp9_rtcd_defs.pl

+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -422,6 +422,10 @@

 add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

 specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";

+add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";

+specialize qw/vp9_get_sse_sum_16x16 sse2/;

+$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;

 add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

 specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";

@@ -430,6 +434,10 @@

 add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

 specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";

+add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";

+specialize qw/vp9_get_sse_sum_8x8 sse2/;

+$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;

 add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

 specialize qw/vp9_variance8x4/, "$sse2_x86inc";

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -76,18 +76,6 @@

   unsigned int var;

 } diff;

-static void get_sse_sum_8x8(const uint8_t *src, int src_stride,

-                            const uint8_t *ref, int ref_stride,

-                            unsigned int *sse, int *sum) {

-  variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum);

-}

-static void get_sse_sum_16x16(const uint8_t *src, int src_stride,

-                              const uint8_t *ref, int ref_stride,

-                              unsigned int *sse, int *sum) {

-  variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum);

-}

 static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,

                                               const struct buf_2d *ref,

                                               BLOCK_SIZE bs) {

@@ -490,8 +478,8 @@

         unsigned int sse = 0;

         int sum = 0;

         if (x_idx < pixels_wide && y_idx < pixels_high)

-          get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,

-                          d + y_idx * dp + x_idx, dp, &sse, &sum);

+          vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,

+                              d + y_idx * dp + x_idx, dp, &sse, &sum);

         fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);

@@ -1226,9 +1214,9 @@

         int b_offset = b_mi_row * MI_SIZE * src_stride +

                        b_mi_col * MI_SIZE;

-        get_sse_sum_16x16(src + b_offset, src_stride,

-                          pre_src + b_offset, pre_stride,

-                          &d16[j].sse, &d16[j].sum);

+        vp9_get_sse_sum_16x16(src + b_offset, src_stride,

+                              pre_src + b_offset, pre_stride,

+                              &d16[j].sse, &d16[j].sum);

         d16[j].var = d16[j].sse -

             (((uint32_t)d16[j].sum * d16[j].sum) >> 8);

--- a/vp9/encoder/vp9_variance.c

+++ b/vp9/encoder/vp9_variance.c

@@ -156,6 +156,19 @@

   return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \

+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,

+                             const uint8_t *ref_ptr, int ref_stride,

+                             unsigned int *sse, int *sum) {

+  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);

+}

+void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,

+                           const uint8_t *ref_ptr, int ref_stride,

+                           unsigned int *sse, int *sum) {

+  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);

+}

 unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,

                             const uint8_t *ref, int ref_stride,

                             unsigned int *sse) {