shithub: libvpx

Download patch

ref: 348bdc0195afe33644fb15e3e007b04a2e2e8609
parent: 15afee19385524c8ea3975f6fd11fd6282c0aa15
author: Marco <[email protected]>
date: Thu Apr 13 13:19:06 EDT 2017

vp9: Add phase to get averaging filter for 1:2 downsampling.

The scaling filter with zero shift will give sub-sampling for
2x downsampling. Allow for a phase shift to get an averaging filter.

Usage is for source scaling in 1 pass SVC mode for 1:2 downscale.
Reduces aliasing in downsampled image.

Keep the phase to 0/off for now.

Change-Id: Ic547ea0748d151b675f877527e656407fcf4d51e

--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -225,7 +225,7 @@
 #
 # frame based scale
 #
-add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst";
+add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, int phase_scaler";
 specialize qw/vp9_scale_and_extend_frame ssse3/;
 
 }
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2390,7 +2390,8 @@
 
 #if CONFIG_VP9_HIGHBITDEPTH
 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
-                                   YV12_BUFFER_CONFIG *dst, int bd) {
+                                   YV12_BUFFER_CONFIG *dst, int bd,
+                                   int phase_scaler) {
   const int src_w = src->y_crop_width;
   const int src_h = src->y_crop_height;
   const int dst_w = dst->y_crop_width;
@@ -2408,9 +2409,9 @@
     const int src_stride = src_strides[i];
     const int dst_stride = dst_strides[i];
     for (y = 0; y < dst_h; y += 16) {
-      const int y_q4 = y * (16 / factor) * src_h / dst_h;
+      const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
       for (x = 0; x < dst_w; x += 16) {
-        const int x_q4 = x * (16 / factor) * src_w / dst_w;
+        const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
         const uint8_t *src_ptr = srcs[i] +
                                  (y / factor) * src_h / dst_h * src_stride +
                                  (x / factor) * src_w / dst_w;
@@ -2712,7 +2713,7 @@
                                        cm->byte_alignment, NULL, NULL, NULL))
             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                                "Failed to allocate frame buffer");
-          scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
+          scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth, 0);
           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
           alloc_frame_mvs(cm, new_fb);
         }
@@ -2735,7 +2736,7 @@
                                        cm->byte_alignment, NULL, NULL, NULL))
             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                                "Failed to allocate frame buffer");
-          vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf);
+          vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, 0);
           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
           alloc_frame_mvs(cm, new_fb);
         }
@@ -3122,6 +3123,8 @@
                                        uint8_t *dest) {
   VP9_COMMON *const cm = &cpi->common;
   int q = 0, bottom_index = 0, top_index = 0;  // Dummy variables.
+  const int phase_scaler = is_one_pass_cbr_svc(cpi) ? cpi->svc.phase_scaler : 0;
+
   // Flag to check if its valid to compute the source sad (used for
   // scene detection and for superblock content state in CBR mode).
   // The flag may get reset below based on SVC or resizing state.
@@ -3140,8 +3143,9 @@
     // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
     // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
     // result will be saved in scaled_temp and might be used later.
-    cpi->Source = vp9_svc_twostage_scale(
-        cm, cpi->un_scaled_source, &cpi->scaled_source, &cpi->svc.scaled_temp);
+    cpi->Source =
+        vp9_svc_twostage_scale(cm, cpi->un_scaled_source, &cpi->scaled_source,
+                               &cpi->svc.scaled_temp, phase_scaler);
     cpi->svc.scaled_one_half = 1;
   } else if (is_one_pass_cbr_svc(cpi) &&
              cpi->un_scaled_source->y_width == cm->width << 1 &&
@@ -3152,8 +3156,9 @@
     cpi->Source = &cpi->svc.scaled_temp;
     cpi->svc.scaled_one_half = 0;
   } else {
-    cpi->Source = vp9_scale_if_required(
-        cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0));
+    cpi->Source =
+        vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
+                              (cpi->oxcf.pass == 0), phase_scaler);
   }
   // Unfiltered raw source used in metrics calculation if the source
   // has been filtered.
@@ -3160,9 +3165,9 @@
   if (is_psnr_calc_enabled(cpi)) {
 #ifdef ENABLE_KF_DENOISE
     if (is_spatial_denoise_enabled(cpi)) {
-      cpi->raw_source_frame =
-          vp9_scale_if_required(cm, &cpi->raw_unscaled_source,
-                                &cpi->raw_scaled_source, (cpi->oxcf.pass == 0));
+      cpi->raw_source_frame = vp9_scale_if_required(
+          cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
+          (cpi->oxcf.pass == 0), phase_scaler);
     } else {
       cpi->raw_source_frame = cpi->Source;
     }
@@ -3194,9 +3199,9 @@
        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
        cpi->compute_source_sad_onepass))
-    cpi->Last_Source =
-        vp9_scale_if_required(cm, cpi->unscaled_last_source,
-                              &cpi->scaled_last_source, (cpi->oxcf.pass == 0));
+    cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
+                                             &cpi->scaled_last_source,
+                                             (cpi->oxcf.pass == 0), 0);
 
   if (cpi->Last_Source == NULL ||
       cpi->Last_Source->y_width != cpi->Source->y_width ||
@@ -3378,8 +3383,9 @@
                                        &frame_over_shoot_limit);
     }
 
-    cpi->Source = vp9_scale_if_required(
-        cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0));
+    cpi->Source =
+        vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
+                              (cpi->oxcf.pass == 0), 0);
 
     // Unfiltered raw source used in metrics calculation if the source
     // has been filtered.
@@ -3388,7 +3394,7 @@
       if (is_spatial_denoise_enabled(cpi)) {
         cpi->raw_source_frame = vp9_scale_if_required(
             cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
-            (cpi->oxcf.pass == 0));
+            (cpi->oxcf.pass == 0), 0);
       } else {
         cpi->raw_source_frame = cpi->Source;
       }
@@ -3400,7 +3406,7 @@
     if (cpi->unscaled_last_source != NULL)
       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
                                                &cpi->scaled_last_source,
-                                               (cpi->oxcf.pass == 0));
+                                               (cpi->oxcf.pass == 0), 0);
 
     if (frame_is_intra_only(cm) == 0) {
       if (loop_count > 0) {
@@ -3681,20 +3687,23 @@
 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(VP9_COMMON *cm,
                                            YV12_BUFFER_CONFIG *unscaled,
                                            YV12_BUFFER_CONFIG *scaled,
-                                           YV12_BUFFER_CONFIG *scaled_temp) {
+                                           YV12_BUFFER_CONFIG *scaled_temp,
+                                           int phase_scaler) {
   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
       cm->mi_rows * MI_SIZE != unscaled->y_height) {
 #if CONFIG_VP9_HIGHBITDEPTH
     if (cm->bit_depth == VPX_BITS_8) {
-      vp9_scale_and_extend_frame(unscaled, scaled_temp);
-      vp9_scale_and_extend_frame(scaled_temp, scaled);
+      vp9_scale_and_extend_frame(unscaled, scaled_temp, phase_scaler);
+      vp9_scale_and_extend_frame(scaled_temp, scaled, phase_scaler);
     } else {
-      scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth);
-      scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth);
+      scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth,
+                             phase_scaler);
+      scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth,
+                             phase_scaler);
     }
 #else
-    vp9_scale_and_extend_frame(unscaled, scaled_temp);
-    vp9_scale_and_extend_frame(scaled_temp, scaled);
+    vp9_scale_and_extend_frame(unscaled, scaled_temp, phase_scaler);
+    vp9_scale_and_extend_frame(scaled_temp, scaled, phase_scaler);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     return scaled;
   } else {
@@ -3705,7 +3714,8 @@
 YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
                                           YV12_BUFFER_CONFIG *unscaled,
                                           YV12_BUFFER_CONFIG *scaled,
-                                          int use_normative_scaler) {
+                                          int use_normative_scaler,
+                                          int phase_scaler) {
   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
       cm->mi_rows * MI_SIZE != unscaled->y_height) {
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -3712,15 +3722,16 @@
     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
         unscaled->y_height <= (scaled->y_height << 1))
       if (cm->bit_depth == VPX_BITS_8)
-        vp9_scale_and_extend_frame(unscaled, scaled);
+        vp9_scale_and_extend_frame(unscaled, scaled, phase_scaler);
       else
-        scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth);
+        scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
+                               phase_scaler);
     else
       scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
 #else
     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
         unscaled->y_height <= (scaled->y_height << 1))
-      vp9_scale_and_extend_frame(unscaled, scaled);
+      vp9_scale_and_extend_frame(unscaled, scaled, phase_scaler);
     else
       scale_and_extend_frame_nonnormative(unscaled, scaled);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -843,12 +843,14 @@
 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(VP9_COMMON *cm,
                                            YV12_BUFFER_CONFIG *unscaled,
                                            YV12_BUFFER_CONFIG *scaled,
-                                           YV12_BUFFER_CONFIG *scaled_temp);
+                                           YV12_BUFFER_CONFIG *scaled_temp,
+                                           int phase_scaler);
 
 YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
                                           YV12_BUFFER_CONFIG *unscaled,
                                           YV12_BUFFER_CONFIG *scaled,
-                                          int use_normative_scaler);
+                                          int use_normative_scaler,
+                                          int phase_scaler);
 
 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
 
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -1400,7 +1400,7 @@
                  (cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE);
 
     cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
-                                        &cpi->scaled_source, 0);
+                                        &cpi->scaled_source, 0, 0);
   }
 
   vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
--- a/vp9/encoder/vp9_frame_scale.c
+++ b/vp9/encoder/vp9_frame_scale.c
@@ -16,7 +16,7 @@
 #include "vpx_scale/yv12config.h"
 
 void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src,
-                                  YV12_BUFFER_CONFIG *dst) {
+                                  YV12_BUFFER_CONFIG *dst, int phase_scaler) {
   const int src_w = src->y_crop_width;
   const int src_h = src->y_crop_height;
   const int dst_w = dst->y_crop_width;
@@ -34,9 +34,9 @@
     const int src_stride = src_strides[i];
     const int dst_stride = dst_strides[i];
     for (y = 0; y < dst_h; y += 16) {
-      const int y_q4 = y * (16 / factor) * src_h / dst_h;
+      const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
       for (x = 0; x < dst_w; x += 16) {
-        const int x_q4 = x * (16 / factor) * src_w / dst_w;
+        const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
         const uint8_t *src_ptr = srcs[i] +
                                  (y / factor) * src_h / dst_h * src_stride +
                                  (x / factor) * src_w / dst_w;
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -36,6 +36,7 @@
   svc->scaled_temp_is_alloc = 0;
   svc->scaled_one_half = 0;
   svc->current_superframe = 0;
+  svc->phase_scaler = 0;
   for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
   for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
     cpi->svc.ext_frame_flags[sl] = 0;
@@ -654,6 +655,7 @@
   // of base motion vectors if spatial scale factors for any layers are not 2,
   // keep the case of 3 spatial layers with scale factor of 4x4 for base layer.
   // TODO(marpan): Fix this to allow for use_base_mv for scale factors != 2.
+  // Same condition applies to use of non-zero phase_scaler.
   if (cpi->svc.number_spatial_layers > 1) {
     int sl;
     for (sl = 0; sl < cpi->svc.number_spatial_layers - 1; ++sl) {
@@ -663,6 +665,7 @@
           !(lc->scaling_factor_num == lc->scaling_factor_den >> 2 && sl == 0 &&
             cpi->svc.number_spatial_layers == 3)) {
         cpi->svc.use_base_mv = 0;
+        cpi->svc.phase_scaler = 0;
         break;
       }
     }
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -88,6 +88,11 @@
   int force_zero_mode_spatial_ref;
   int current_superframe;
   int use_base_mv;
+  // phase_scaler used to control the downscaling filter for source scaling.
+  // phase_scaler = 0 will do sub-sampling (no weighted average),
+  // phase_scaler = 8 will center the target pixel and use the averaging filter,
+  // for eightap regular: {-1, 6, -19, 78, 78, -19, 6, -1 }.
+  int phase_scaler;
 } SVC;
 
 struct VP9_COMP;
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -745,7 +745,7 @@
                                "Failed to reallocate alt_ref_buffer");
           }
           frames[frame] = vp9_scale_if_required(
-              cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0);
+              cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0, 0);
           ++frame_used;
         }
       }
--- a/vp9/encoder/x86/vp9_frame_scale_ssse3.c
+++ b/vp9/encoder/x86/vp9_frame_scale_ssse3.c
@@ -16,7 +16,8 @@
 #include "vpx_scale/yv12config.h"
 
 extern void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src,
-                                         YV12_BUFFER_CONFIG *dst);
+                                         YV12_BUFFER_CONFIG *dst,
+                                         int phase_scaler);
 
 static void downsample_2_to_1_ssse3(const uint8_t *src, ptrdiff_t src_stride,
                                     uint8_t *dst, ptrdiff_t dst_stride, int w,
@@ -168,7 +169,8 @@
 }
 
 void vp9_scale_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
-                                      YV12_BUFFER_CONFIG *dst) {
+                                      YV12_BUFFER_CONFIG *dst,
+                                      int phase_scaler) {
   const int src_w = src->y_crop_width;
   const int src_h = src->y_crop_height;
   const int dst_w = dst->y_crop_width;
@@ -176,7 +178,7 @@
   const int dst_uv_w = dst_w / 2;
   const int dst_uv_h = dst_h / 2;
 
-  if (dst_w * 2 == src_w && dst_h * 2 == src_h) {
+  if (dst_w * 2 == src_w && dst_h * 2 == src_h && phase_scaler == 0) {
     downsample_2_to_1_ssse3(src->y_buffer, src->y_stride, dst->y_buffer,
                             dst->y_stride, dst_w, dst_h);
     downsample_2_to_1_ssse3(src->u_buffer, src->uv_stride, dst->u_buffer,
@@ -184,7 +186,7 @@
     downsample_2_to_1_ssse3(src->v_buffer, src->uv_stride, dst->v_buffer,
                             dst->uv_stride, dst_uv_w, dst_uv_h);
     vpx_extend_frame_borders(dst);
-  } else if (dst_w == src_w * 2 && dst_h == src_h * 2) {
+  } else if (dst_w == src_w * 2 && dst_h == src_h * 2 && phase_scaler == 0) {
     // The upsample() supports widths up to 1920 * 2.  If greater, fall back
     // to vp9_scale_and_extend_frame_c().
     if (dst_w / 2 <= 1920) {
@@ -196,9 +198,9 @@
                             dst->uv_stride, dst_uv_w, dst_uv_h);
       vpx_extend_frame_borders(dst);
     } else {
-      vp9_scale_and_extend_frame_c(src, dst);
+      vp9_scale_and_extend_frame_c(src, dst, phase_scaler);
     }
   } else {
-    vp9_scale_and_extend_frame_c(src, dst);
+    vp9_scale_and_extend_frame_c(src, dst, phase_scaler);
   }
 }