shithub: libvpx

--- a/vp8/common/rtcd_defs.pl

+++ b/vp8/common/rtcd_defs.pl

@@ -255,19 +255,6 @@

-# Structured Similarity (SSIM)

-#

-if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {

-    $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2";

-    add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";

-    specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64";

-    add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";

-    specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64";

-}

-#

 # Forward DCT

 add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -74,26 +74,7 @@

 #if CONFIG_INTERNAL_STATS

 #include "math.h"

-extern double vp8_calc_ssim

-(

-    YV12_BUFFER_CONFIG *source,

-    YV12_BUFFER_CONFIG *dest,

-    int lumamask,

-    double *weight

-);

-extern double vp8_calc_ssimg

-(

-    YV12_BUFFER_CONFIG *source,

-    YV12_BUFFER_CONFIG *dest,

-    double *ssim_y,

-    double *ssim_u,

-    double *ssim_v

-);

+#include "vpx_dsp/ssim.h"

 #endif

@@ -5741,8 +5722,8 @@

                     cpi->total_sq_error2 += sq_error2;

                     cpi->totalp  += frame_psnr2;

-                    frame_ssim2 = vp8_calc_ssim(cpi->Source,

-                      &cm->post_proc_buffer, 1, &weight);

+                    frame_ssim2 = vpx_calc_ssim(cpi->Source,

+                      &cm->post_proc_buffer, &weight);

                     cpi->summed_quality += frame_ssim2 * weight;

                     cpi->summed_weights += weight;

@@ -5772,7 +5753,7 @@

             if (cpi->b_calculate_ssimg)

                 double y, u, v, frame_all;

-                frame_all =  vp8_calc_ssimg(cpi->Source, cm->frame_to_show,

+                frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show,

                     &y, &u, &v);

                 if (cpi->oxcf.number_of_layers > 1)

--- a/vp8/encoder/ssim.c

+++ /dev/null

@@ -1,233 +1,0 @@

-/*

- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include "onyx_int.h"

-void vp8_ssim_parms_16x16_c

-(

-    unsigned char *s,

-    int sp,

-    unsigned char *r,

-    int rp,

-    unsigned long *sum_s,

-    unsigned long *sum_r,

-    unsigned long *sum_sq_s,

-    unsigned long *sum_sq_r,

-    unsigned long *sum_sxr

-)

-{

-    int i,j;

-    for(i=0;i<16;i++,s+=sp,r+=rp)

-     {

-         for(j=0;j<16;j++)

-         {

-             *sum_s += s[j];

-             *sum_r += r[j];

-             *sum_sq_s += s[j] * s[j];

-             *sum_sq_r += r[j] * r[j];

-             *sum_sxr += s[j] * r[j];

-         }

-     }

-}

-void vp8_ssim_parms_8x8_c

-(

-    unsigned char *s,

-    int sp,

-    unsigned char *r,

-    int rp,

-    unsigned long *sum_s,

-    unsigned long *sum_r,

-    unsigned long *sum_sq_s,

-    unsigned long *sum_sq_r,

-    unsigned long *sum_sxr

-)

-{

-    int i,j;

-    for(i=0;i<8;i++,s+=sp,r+=rp)

-     {

-         for(j=0;j<8;j++)

-         {

-             *sum_s += s[j];

-             *sum_r += r[j];

-             *sum_sq_s += s[j] * s[j];

-             *sum_sq_r += r[j] * r[j];

-             *sum_sxr += s[j] * r[j];

-         }

-     }

-}

-const static int64_t cc1 =  26634; // (64^2*(.01*255)^2

-const static int64_t cc2 = 239708; // (64^2*(.03*255)^2

-static double similarity

-(

-    unsigned long sum_s,

-    unsigned long sum_r,

-    unsigned long sum_sq_s,

-    unsigned long sum_sq_r,

-    unsigned long sum_sxr,

-    int count

-)

-{

-    int64_t ssim_n, ssim_d;

-    int64_t c1, c2;

-    //scale the constants by number of pixels

-    c1 = (cc1*count*count)>>12;

-    c2 = (cc2*count*count)>>12;

-    ssim_n = (2*sum_s*sum_r+ c1)*((int64_t) 2*count*sum_sxr-

-          (int64_t) 2*sum_s*sum_r+c2);

-    ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*

-        ((int64_t)count*sum_sq_s-(int64_t)sum_s*sum_s +

-        (int64_t)count*sum_sq_r-(int64_t) sum_r*sum_r +c2) ;

-    return ssim_n * 1.0 / ssim_d;

-}

-static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp)

-{

-    unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;

-    vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);

-    return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);

-}

-static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp)

-{

-    unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;

-    vp8_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);

-    return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);

-}

-// TODO: (jbb) tried to scale this function such that we may be able to use it

-// for distortion metric in mode selection code ( provided we do a reconstruction)

-long dssim(unsigned char *s,int sp, unsigned char *r,int rp)

-{

-    unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;

-    int64_t ssim3;

-    int64_t ssim_n1,ssim_n2;

-    int64_t ssim_d1,ssim_d2;

-    int64_t ssim_t1,ssim_t2;

-    int64_t c1, c2;

-    // normalize by 256/64

-    c1 = cc1*16;

-    c2 = cc2*16;

-    vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);

-    ssim_n1 = (2*sum_s*sum_r+ c1);

-    ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2);

-    ssim_d1 =((int64_t)sum_s*sum_s +(int64_t)sum_r*sum_r+c1);

-    ssim_d2 = (256 * (int64_t) sum_sq_s-(int64_t) sum_s*sum_s +

-                    (int64_t) 256*sum_sq_r-(int64_t) sum_r*sum_r +c2) ;

-    ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;

-    ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;

-    ssim3 = 256 *ssim_t1 * ssim_t2;

-    if(ssim3 <0 )

-        ssim3=0;

-    return (long)( ssim3  );

-}

-// We are using a 8x8 moving window with starting location of each 8x8 window

-// on the 4x4 pixel grid. Such arrangement allows the windows to overlap

-// block boundaries to penalize blocking artifacts.

-double vp8_ssim2

-(

-    unsigned char *img1,

-    unsigned char *img2,

-    int stride_img1,

-    int stride_img2,

-    int width,

-    int height

-)

-{

-    int i,j;

-    int samples =0;

-    double ssim_total=0;

-    // sample point start with each 4x4 location

-    for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)

-    {

-        for(j=0; j < width-8; j+=4 )

-        {

-            double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2);

-            ssim_total += v;

-            samples++;

-        }

-    }

-    ssim_total /= samples;

-    return ssim_total;

-}

-double vp8_calc_ssim

-(

-    YV12_BUFFER_CONFIG *source,

-    YV12_BUFFER_CONFIG *dest,

-    int lumamask,

-    double *weight

-)

-{

-    double a, b, c;

-    double ssimv;

-    a = vp8_ssim2(source->y_buffer, dest->y_buffer,

-                 source->y_stride, dest->y_stride, source->y_width,

-                 source->y_height);

-    b = vp8_ssim2(source->u_buffer, dest->u_buffer,

-                 source->uv_stride, dest->uv_stride, source->uv_width,

-                 source->uv_height);

-    c = vp8_ssim2(source->v_buffer, dest->v_buffer,

-                 source->uv_stride, dest->uv_stride, source->uv_width,

-                 source->uv_height);

-    ssimv = a * .8 + .1 * (b + c);

-    *weight = 1;

-    return ssimv;

-}

-double vp8_calc_ssimg

-(

-    YV12_BUFFER_CONFIG *source,

-    YV12_BUFFER_CONFIG *dest,

-    double *ssim_y,

-    double *ssim_u,

-    double *ssim_v

-)

-{

-    double ssim_all = 0;

-    double a, b, c;

-    a = vp8_ssim2(source->y_buffer, dest->y_buffer,

-                 source->y_stride, dest->y_stride, source->y_width,

-                 source->y_height);

-    b = vp8_ssim2(source->u_buffer, dest->u_buffer,

-                 source->uv_stride, dest->uv_stride, source->uv_width,

-                 source->uv_height);

-    c = vp8_ssim2(source->v_buffer, dest->v_buffer,

-                 source->uv_stride, dest->uv_stride, source->uv_width,

-                 source->uv_height);

-    *ssim_y = a;

-    *ssim_u = b;

-    *ssim_v = c;

-    ssim_all = (a * 4 + b + c) /6;

-    return ssim_all;

-}

--- a/vp8/encoder/x86/ssim_opt_x86_64.asm

+++ /dev/null

@@ -1,216 +1,0 @@

-;

-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

-;

-;  Use of this source code is governed by a BSD-style license

-;  that can be found in the LICENSE file in the root of the source

-;  tree. An additional intellectual property rights grant can be found

-;  in the file PATENTS.  All contributing project authors may

-;  be found in the AUTHORS file in the root of the source tree.

-;

-%include "vpx_ports/x86_abi_support.asm"

-; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr

-%macro TABULATE_SSIM 0

-        paddusw         xmm15, xmm3  ; sum_s

-        paddusw         xmm14, xmm4  ; sum_r

-        movdqa          xmm1, xmm3

-        pmaddwd         xmm1, xmm1

-        paddd           xmm13, xmm1 ; sum_sq_s

-        movdqa          xmm2, xmm4

-        pmaddwd         xmm2, xmm2

-        paddd           xmm12, xmm2 ; sum_sq_r

-        pmaddwd         xmm3, xmm4

-        paddd           xmm11, xmm3  ; sum_sxr

-%endmacro

-; Sum across the register %1 starting with q words

-%macro SUM_ACROSS_Q 1

-        movdqa          xmm2,%1

-        punpckldq       %1,xmm0

-        punpckhdq       xmm2,xmm0

-        paddq           %1,xmm2

-        movdqa          xmm2,%1

-        punpcklqdq      %1,xmm0

-        punpckhqdq      xmm2,xmm0

-        paddq           %1,xmm2

-%endmacro

-; Sum across the register %1 starting with q words

-%macro SUM_ACROSS_W 1

-        movdqa          xmm1, %1

-        punpcklwd       %1,xmm0

-        punpckhwd       xmm1,xmm0

-        paddd           %1, xmm1

-        SUM_ACROSS_Q    %1

-%endmacro

-;void ssim_parms_sse2(

-;    unsigned char *s,

-;    int sp,

-;    unsigned char *r,

-;    int rp

-;    unsigned long *sum_s,

-;    unsigned long *sum_r,

-;    unsigned long *sum_sq_s,

-;    unsigned long *sum_sq_r,

-;    unsigned long *sum_sxr);

-;

-; TODO: Use parm passing through structure, probably don't need the pxors

-; ( calling app will initialize to 0 ) could easily fit everything in sse2

-; without too much hastle, and can probably do better estimates with psadw

-; or pavgb At this point this is just meant to be first pass for calculating

-; all the parms needed for 16x16 ssim so we can play with dssim as distortion

-; in mode selection code.

-global sym(vp8_ssim_parms_16x16_sse2) PRIVATE

-sym(vp8_ssim_parms_16x16_sse2):

-    push        rbp

-    mov         rbp, rsp

-    SHADOW_ARGS_TO_STACK 9

-    SAVE_XMM 15

-    push        rsi

-    push        rdi

-    ; end prolog

-    mov             rsi,        arg(0) ;s

-    mov             rcx,        arg(1) ;sp

-    mov             rdi,        arg(2) ;r

-    mov             rax,        arg(3) ;rp

-    pxor            xmm0, xmm0

-    pxor            xmm15,xmm15  ;sum_s

-    pxor            xmm14,xmm14  ;sum_r

-    pxor            xmm13,xmm13  ;sum_sq_s

-    pxor            xmm12,xmm12  ;sum_sq_r

-    pxor            xmm11,xmm11  ;sum_sxr

-    mov             rdx, 16      ;row counter

-.NextRow:

-    ;grab source and reference pixels

-    movdqu          xmm5, [rsi]

-    movdqu          xmm6, [rdi]

-    movdqa          xmm3, xmm5

-    movdqa          xmm4, xmm6

-    punpckhbw       xmm3, xmm0 ; high_s

-    punpckhbw       xmm4, xmm0 ; high_r

-    TABULATE_SSIM

-    movdqa          xmm3, xmm5

-    movdqa          xmm4, xmm6

-    punpcklbw       xmm3, xmm0 ; low_s

-    punpcklbw       xmm4, xmm0 ; low_r

-    TABULATE_SSIM

-    add             rsi, rcx   ; next s row

-    add             rdi, rax   ; next r row

-    dec             rdx        ; counter

-    jnz .NextRow

-    SUM_ACROSS_W    xmm15

-    SUM_ACROSS_W    xmm14

-    SUM_ACROSS_Q    xmm13

-    SUM_ACROSS_Q    xmm12

-    SUM_ACROSS_Q    xmm11

-    mov             rdi,arg(4)

-    movd            [rdi], xmm15;

-    mov             rdi,arg(5)

-    movd            [rdi], xmm14;

-    mov             rdi,arg(6)

-    movd            [rdi], xmm13;

-    mov             rdi,arg(7)

-    movd            [rdi], xmm12;

-    mov             rdi,arg(8)

-    movd            [rdi], xmm11;

-    ; begin epilog

-    pop         rdi

-    pop         rsi

-    RESTORE_XMM

-    UNSHADOW_ARGS

-    pop         rbp

-    ret

-;void ssim_parms_sse2(

-;    unsigned char *s,

-;    int sp,

-;    unsigned char *r,

-;    int rp

-;    unsigned long *sum_s,

-;    unsigned long *sum_r,

-;    unsigned long *sum_sq_s,

-;    unsigned long *sum_sq_r,

-;    unsigned long *sum_sxr);

-;

-; TODO: Use parm passing through structure, probably don't need the pxors

-; ( calling app will initialize to 0 ) could easily fit everything in sse2

-; without too much hastle, and can probably do better estimates with psadw

-; or pavgb At this point this is just meant to be first pass for calculating

-; all the parms needed for 16x16 ssim so we can play with dssim as distortion

-; in mode selection code.

-global sym(vp8_ssim_parms_8x8_sse2) PRIVATE

-sym(vp8_ssim_parms_8x8_sse2):

-    push        rbp

-    mov         rbp, rsp

-    SHADOW_ARGS_TO_STACK 9

-    SAVE_XMM 15

-    push        rsi

-    push        rdi

-    ; end prolog

-    mov             rsi,        arg(0) ;s

-    mov             rcx,        arg(1) ;sp

-    mov             rdi,        arg(2) ;r

-    mov             rax,        arg(3) ;rp

-    pxor            xmm0, xmm0

-    pxor            xmm15,xmm15  ;sum_s

-    pxor            xmm14,xmm14  ;sum_r

-    pxor            xmm13,xmm13  ;sum_sq_s

-    pxor            xmm12,xmm12  ;sum_sq_r

-    pxor            xmm11,xmm11  ;sum_sxr

-    mov             rdx, 8      ;row counter

-.NextRow:

-    ;grab source and reference pixels

-    movq            xmm3, [rsi]

-    movq            xmm4, [rdi]

-    punpcklbw       xmm3, xmm0 ; low_s

-    punpcklbw       xmm4, xmm0 ; low_r

-    TABULATE_SSIM

-    add             rsi, rcx   ; next s row

-    add             rdi, rax   ; next r row

-    dec             rdx        ; counter

-    jnz .NextRow

-    SUM_ACROSS_W    xmm15

-    SUM_ACROSS_W    xmm14

-    SUM_ACROSS_Q    xmm13

-    SUM_ACROSS_Q    xmm12

-    SUM_ACROSS_Q    xmm11

-    mov             rdi,arg(4)

-    movd            [rdi], xmm15;

-    mov             rdi,arg(5)

-    movd            [rdi], xmm14;

-    mov             rdi,arg(6)

-    movd            [rdi], xmm13;

-    mov             rdi,arg(7)

-    movd            [rdi], xmm12;

-    mov             rdi,arg(8)

-    movd            [rdi], xmm11;

-    ; begin epilog

-    pop         rdi

-    pop         rsi

-    RESTORE_XMM

-    UNSHADOW_ARGS

-    pop         rbp

-    ret

--- a/vp8/vp8cx.mk

+++ b/vp8/vp8cx.mk

@@ -65,7 +65,6 @@

 VP8_CX_SRCS-yes += encoder/rdopt.c

 VP8_CX_SRCS-yes += encoder/segmentation.c

 VP8_CX_SRCS-yes += encoder/segmentation.h

-VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/ssim.c

 VP8_CX_SRCS-yes += encoder/tokenize.c

 VP8_CX_SRCS-yes += encoder/dct_value_cost.h

 VP8_CX_SRCS-yes += encoder/dct_value_tokens.h

@@ -97,7 +96,6 @@

 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c

 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm

 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm

-VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt_x86_64.asm

 ifeq ($(CONFIG_REALTIME_ONLY),yes)

 VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm