shithub: libvpx

--- a/vpx_dsp/psnrhvs.c

+++ b/vpx_dsp/psnrhvs.c

@@ -23,7 +23,8 @@

 #endif

 #include <string.h>

-void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, int xstride) {

+static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,

+                           int xstride) {

   (void) xstride;

   vpx_fdct8x8(x, y, ystride);

@@ -31,56 +32,57 @@

 /* Normalized inverse quantization matrix for 8x8 DCT at the point of

  * transparency. This is not the JPEG based matrix from the paper,

  this one gives a slightly higher MOS agreement.*/

-float csf_y[8][8] = {{1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411,

-    1.00227514334, 0.678296995242, 0.466224900598, 0.3265091542}, {2.2901594831,

-    1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963, 0.868920337363,

-    0.61280991668, 0.436405793551}, {2.08509755623, 2.04793073064,

-    1.34329019223, 1.09205635862, 0.875748795257, 0.670882927016,

-    0.501731932449, 0.372504254596}, {1.48366094411, 1.68731108984,

-    1.09205635862, 0.772819797575, 0.605636379554, 0.48309405692,

-    0.380429446972, 0.295774038565}, {1.00227514334, 1.2305666963,

-    0.875748795257, 0.605636379554, 0.448996256676, 0.352889268808,

-    0.283006984131, 0.226951348204}, {0.678296995242, 0.868920337363,

-    0.670882927016, 0.48309405692, 0.352889268808, 0.27032073436,

-    0.215017739696, 0.17408067321}, {0.466224900598, 0.61280991668,

-    0.501731932449, 0.380429446972, 0.283006984131, 0.215017739696,

-    0.168869545842, 0.136153931001}, {0.3265091542, 0.436405793551,

-    0.372504254596, 0.295774038565, 0.226951348204, 0.17408067321,

-    0.136153931001, 0.109083846276}};

-float csf_cb420[8][8] = {

+static const float csf_y[8][8] = {

+    {1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334,

+     0.678296995242, 0.466224900598, 0.3265091542},

+    {2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963,

+     0.868920337363, 0.61280991668, 0.436405793551},

+    {2.08509755623, 2.04793073064, 1.34329019223, 1.09205635862, 0.875748795257,

+     0.670882927016, 0.501731932449, 0.372504254596},

+    {1.48366094411, 1.68731108984, 1.09205635862, 0.772819797575,

+     0.605636379554, 0.48309405692, 0.380429446972, 0.295774038565},

+    {1.00227514334, 1.2305666963, 0.875748795257, 0.605636379554,

+     0.448996256676, 0.352889268808, 0.283006984131, 0.226951348204},

+    {0.678296995242, 0.868920337363, 0.670882927016, 0.48309405692,

+     0.352889268808, 0.27032073436, 0.215017739696, 0.17408067321},

+    {0.466224900598, 0.61280991668, 0.501731932449, 0.380429446972,

+     0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001},

+    {0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565,

+     0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276}};

+static const float csf_cb420[8][8] = {

     {1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788,

-        0.898018824055, 0.74725392039, 0.615105596242}, {2.46074210438,

-        1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,

-        1.17428548929, 0.996404342439, 0.830890433625}, {1.18284184739,

-        1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362,

-        0.960060382087, 0.849823426169, 0.731221236837}, {1.14982565193,

-        1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099,

-        0.751437590932, 0.685398513368, 0.608694761374}, {1.05017074788,

-        1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187,

-        0.605503172737, 0.55002013668, 0.495804539034}, {0.898018824055,

-        1.17428548929, 0.960060382087, 0.751437590932, 0.605503172737,

-        0.514674450957, 0.454353482512, 0.407050308965}, {0.74725392039,

-        0.996404342439, 0.849823426169, 0.685398513368, 0.55002013668,

-        0.454353482512, 0.389234902883, 0.342353999733}, {0.615105596242,

-        0.830890433625, 0.731221236837, 0.608694761374, 0.495804539034,

-        0.407050308965, 0.342353999733, 0.295530605237}};

-float csf_cr420[8][8] = {

+     0.898018824055, 0.74725392039, 0.615105596242},

+    {2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,

+     1.17428548929, 0.996404342439, 0.830890433625},

+    {1.18284184739, 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362,

+     0.960060382087, 0.849823426169, 0.731221236837},

+    {1.14982565193, 1.38190029285, 1.02624506078, 0.861317501629,

+     0.801821139099, 0.751437590932, 0.685398513368, 0.608694761374},

+    {1.05017074788, 1.33100189972, 1.03145147362, 0.801821139099,

+     0.676555426187, 0.605503172737, 0.55002013668, 0.495804539034},

+    {0.898018824055, 1.17428548929, 0.960060382087, 0.751437590932,

+     0.605503172737, 0.514674450957, 0.454353482512, 0.407050308965},

+    {0.74725392039, 0.996404342439, 0.849823426169, 0.685398513368,

+     0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733},

+    {0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374,

+     0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237}};

+static const float csf_cr420[8][8] = {

     {2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469,

-        0.867069376285, 0.721500455585, 0.593906509971}, {2.62502345193,

-        1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,

-        1.13381474809, 0.962064122248, 0.802254508198}, {1.26180942886,

-        1.17180569821, 0.944981930573, 0.990876405848, 0.995903384143,

-        0.926972725286, 0.820534991409, 0.706020324706}, {1.11019789803,

-        1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195,

-        0.725539939514, 0.661776842059, 0.587716619023}, {1.01397751469,

-        1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286,

-        0.584635025748, 0.531064164893, 0.478717061273}, {0.867069376285,

-        1.13381474809, 0.926972725286, 0.725539939514, 0.584635025748,

-        0.496936637883, 0.438694579826, 0.393021669543}, {0.721500455585,

-        0.962064122248, 0.820534991409, 0.661776842059, 0.531064164893,

-        0.438694579826, 0.375820256136, 0.330555063063}, {0.593906509971,

-        0.802254508198, 0.706020324706, 0.587716619023, 0.478717061273,

-        0.393021669543, 0.330555063063, 0.285345396658}};

+     0.867069376285, 0.721500455585, 0.593906509971},

+    {2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,

+     1.13381474809, 0.962064122248, 0.802254508198},

+    {1.26180942886, 1.17180569821, 0.944981930573, 0.990876405848,

+     0.995903384143, 0.926972725286, 0.820534991409, 0.706020324706},

+    {1.11019789803, 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195,

+     0.725539939514, 0.661776842059, 0.587716619023},

+    {1.01397751469, 1.28513006198, 0.995903384143, 0.77418706195,

+     0.653238524286, 0.584635025748, 0.531064164893, 0.478717061273},

+    {0.867069376285, 1.13381474809, 0.926972725286, 0.725539939514,

+     0.584635025748, 0.496936637883, 0.438694579826, 0.393021669543},

+    {0.721500455585, 0.962064122248, 0.820534991409, 0.661776842059,

+     0.531064164893, 0.438694579826, 0.375820256136, 0.330555063063},

+    {0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023,

+     0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658}};

 static double convert_score_db(double _score, double _weight) {

   return 10 * (log10(255 * 255) - log10(_weight * _score));

@@ -89,7 +91,7 @@

 static double calc_psnrhvs(const unsigned char *_src, int _systride,

                            const unsigned char *_dst, int _dystride,

                            double _par, int _w, int _h, int _step,

-                           float _csf[8][8]) {

+                           const float _csf[8][8]) {

   float ret;

   int16_t dct_s[8 * 8], dct_d[8 * 8];

   tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8];

@@ -200,11 +202,12 @@

   ret /= pixels;

   return ret;

-double vpx_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,

-                   double *y_psnrhvs, double *u_psnrhvs, double *v_psnrhvs) {

+double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,

+                   const YV12_BUFFER_CONFIG *dest, double *y_psnrhvs,

+                   double *u_psnrhvs, double *v_psnrhvs) {

   double psnrhvs;

-  double par = 1.0;

-  int step = 7;

+  const double par = 1.0;

+  const int step = 7;

   vpx_clear_system_state();

   *y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer,

                             dest->y_stride, par, source->y_crop_width,

--- a/vpx_dsp/ssim.c

+++ b/vpx_dsp/ssim.c

@@ -13,10 +13,10 @@

 #include "vpx_dsp/ssim.h"

 #include "vpx_ports/mem.h"

-void vpx_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,

-                            int rp, unsigned long *sum_s, unsigned long *sum_r,

-                            unsigned long *sum_sq_s, unsigned long *sum_sq_r,

-                            unsigned long *sum_sxr) {

+void vpx_ssim_parms_16x16_c(const uint8_t *s, int sp, const uint8_t *r,

+                            int rp, uint32_t *sum_s, uint32_t *sum_r,

+                            uint32_t *sum_sq_s, uint32_t *sum_sq_r,

+                            uint32_t *sum_sxr) {

   int i, j;

   for (i = 0; i < 16; i++, s += sp, r += rp) {

     for (j = 0; j < 16; j++) {

@@ -28,10 +28,10 @@

-void vpx_ssim_parms_8x8_c(uint8_t *s, int sp, uint8_t *r, int rp,

-                          unsigned long *sum_s, unsigned long *sum_r,

-                          unsigned long *sum_sq_s, unsigned long *sum_sq_r,

-                          unsigned long *sum_sxr) {

+void vpx_ssim_parms_8x8_c(const uint8_t *s, int sp, const uint8_t *r, int rp,

+                          uint32_t *sum_s, uint32_t *sum_r,

+                          uint32_t *sum_sq_s, uint32_t *sum_sq_r,

+                          uint32_t *sum_sxr) {

   int i, j;

   for (i = 0; i < 8; i++, s += sp, r += rp) {

     for (j = 0; j < 8; j++) {

@@ -45,7 +45,8 @@

 #if CONFIG_VP9_HIGHBITDEPTH

-void vpx_highbd_ssim_parms_8x8_c(uint16_t *s, int sp, uint16_t *r, int rp,

+void vpx_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp,

+                                 const uint16_t *r, int rp,

                                  uint32_t *sum_s, uint32_t *sum_r,

                                  uint32_t *sum_sq_s, uint32_t *sum_sq_r,

                                  uint32_t *sum_sxr) {

@@ -65,9 +66,9 @@

 static const int64_t cc1 =  26634;  // (64^2*(.01*255)^2

 static const int64_t cc2 = 239708;  // (64^2*(.03*255)^2

-static double similarity(unsigned long sum_s, unsigned long sum_r,

-                         unsigned long sum_sq_s, unsigned long sum_sq_r,

-                         unsigned long sum_sxr, int count) {

+static double similarity(uint32_t sum_s, uint32_t sum_r,

+                         uint32_t sum_sq_s, uint32_t sum_sq_r,

+                         uint32_t sum_sxr, int count) {

   int64_t ssim_n, ssim_d;

   int64_t c1, c2;

@@ -85,8 +86,8 @@

   return ssim_n * 1.0 / ssim_d;

-static double ssim_8x8(uint8_t *s, int sp, uint8_t *r, int rp) {

-  unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;

+static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) {

+  uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;

   vpx_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,

                      &sum_sxr);

   return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);

@@ -93,8 +94,8 @@

 #if CONFIG_VP9_HIGHBITDEPTH

-static double highbd_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,

-                              unsigned int bd) {

+static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r,

+                              int rp, unsigned int bd) {

   uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;

   const int oshift = bd - 8;

   vpx_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,

@@ -111,8 +112,9 @@

 // We are using a 8x8 moving window with starting location of each 8x8 window

 // on the 4x4 pixel grid. Such arrangement allows the windows to overlap

 // block boundaries to penalize blocking artifacts.

-double vpx_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,

-                 int stride_img2, int width, int height) {

+static double vpx_ssim2(const uint8_t *img1, const uint8_t *img2,

+                        int stride_img1, int stride_img2, int width,

+                        int height) {

   int i, j;

   int samples = 0;

   double ssim_total = 0;

@@ -131,9 +133,9 @@

 #if CONFIG_VP9_HIGHBITDEPTH

-double vpx_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,

-                        int stride_img2, int width, int height,

-                        unsigned int bd) {

+static double vpx_highbd_ssim2(const uint8_t *img1, const uint8_t *img2,

+                               int stride_img1, int stride_img2, int width,

+                               int height, unsigned int bd) {

   int i, j;

   int samples = 0;

   double ssim_total = 0;

@@ -154,7 +156,8 @@

 #endif  // CONFIG_VP9_HIGHBITDEPTH

-double vpx_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,

+double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source,

+                     const YV12_BUFFER_CONFIG *dest,

                      double *weight) {

   double a, b, c;

   double ssimv;

@@ -178,7 +181,8 @@

   return ssimv;

-double vpx_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,

+double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source,

+                      const YV12_BUFFER_CONFIG *dest,

                       double *ssim_y, double *ssim_u, double *ssim_v) {

   double ssim_all = 0;

   double a, b, c;

@@ -231,7 +235,7 @@

 // Replace c1 with n*n * c1 for the final step that leads to this code:

 // The final step scales by 12 bits so we don't lose precision in the constants.

-double ssimv_similarity(Ssimv *sv, int64_t n) {

+static double ssimv_similarity(const Ssimv *sv, int64_t n) {

   // Scale the constants by number of pixels.

   const int64_t c1 = (cc1 * n * n) >> 12;

   const int64_t c2 = (cc2 * n * n) >> 12;

@@ -262,7 +266,7 @@

//

 // 255 * 255 - (sum_s - sum_r) / count * (sum_s - sum_r) / count

//

-double ssimv_similarity2(Ssimv *sv, int64_t n) {

+static double ssimv_similarity2(const Ssimv *sv, int64_t n) {

   // Scale the constants by number of pixels.

   const int64_t c1 = (cc1 * n * n) >> 12;

   const int64_t c2 = (cc2 * n * n) >> 12;

@@ -278,8 +282,8 @@

   return l * v;

-void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch,

-                 Ssimv *sv) {

+static void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2,

+                        int img2_pitch, Ssimv *sv) {

   vpx_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch,

                      &sv->sum_s, &sv->sum_r, &sv->sum_sq_s, &sv->sum_sq_r,

                      &sv->sum_sxr);

@@ -448,8 +452,8 @@

 #if CONFIG_VP9_HIGHBITDEPTH

-double vpx_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,

-                            YV12_BUFFER_CONFIG *dest,

+double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,

+                            const YV12_BUFFER_CONFIG *dest,

                             double *weight, unsigned int bd) {

   double a, b, c;

   double ssimv;

@@ -473,8 +477,8 @@

   return ssimv;

-double vpx_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,

-                             YV12_BUFFER_CONFIG *dest, double *ssim_y,

+double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source,

+                             const YV12_BUFFER_CONFIG *dest, double *ssim_y,

                              double *ssim_u, double *ssim_v, unsigned int bd) {

   double ssim_all = 0;

   double a, b, c;

--- a/vpx_dsp/ssim.h

+++ b/vpx_dsp/ssim.h

@@ -8,8 +8,8 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#ifndef VPX_ENCODER_VP9_SSIM_H_

-#define VPX_ENCODER_VP9_SSIM_H_

+#ifndef VPX_DSP_SSIM_H_

+#define VPX_DSP_SSIM_H_

 #ifdef __cplusplus

 extern "C" {

@@ -29,19 +29,19 @@

 // metrics used for calculating ssim, ssim2, dssim, and ssimc

 typedef struct {

   // source sum ( over 8x8 region )

-  uint64_t sum_s;

+  uint32_t sum_s;

   // reference sum (over 8x8 region )

-  uint64_t sum_r;

+  uint32_t sum_r;

   // source sum squared ( over 8x8 region )

-  uint64_t sum_sq_s;

+  uint32_t sum_sq_s;

   // reference sum squared (over 8x8 region )

-  uint64_t sum_sq_r;

+  uint32_t sum_sq_r;

   // sum of source times reference (over 8x8 region)

-  uint64_t sum_sxr;

+  uint32_t sum_sxr;

   // calculated ssim score between source and reference

   double ssim;

@@ -72,26 +72,29 @@

                       int img2_pitch, int width, int height, Ssimv *sv2,

                       Metrics *m, int do_inconsistency);

-double vpx_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,

+double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source,

+                     const YV12_BUFFER_CONFIG *dest,

                      double *weight);

-double vpx_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,

+double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source,

+                      const YV12_BUFFER_CONFIG *dest,

                       double *ssim_y, double *ssim_u, double *ssim_v);

 double vpx_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,

                          double *ssim_y, double *ssim_u, double *ssim_v);

-double vpx_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,

+double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,

+                   const YV12_BUFFER_CONFIG *dest,

                    double *ssim_y, double *ssim_u, double *ssim_v);

 #if CONFIG_VP9_HIGHBITDEPTH

-double vpx_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,

-                            YV12_BUFFER_CONFIG *dest,

+double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,

+                            const YV12_BUFFER_CONFIG *dest,

                             double *weight,

                             unsigned int bd);

-double vpx_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,

-                             YV12_BUFFER_CONFIG *dest,

+double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source,

+                             const YV12_BUFFER_CONFIG *dest,

                              double *ssim_y,

                              double *ssim_u,

                              double *ssim_v,

@@ -102,4 +105,4 @@

 }  // extern "C"

 #endif

-#endif  // VPX_ENCODER_VP9_SSIM_H_

+#endif  // VPX_DSP_SSIM_H_

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -994,10 +994,10 @@

 # Structured Similarity (SSIM)

 if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {

-    add_proto qw/void vpx_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";

+    add_proto qw/void vpx_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";

     specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";

-    add_proto qw/void vpx_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";

+    add_proto qw/void vpx_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";

     specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";

--- a/vpx_dsp/x86/ssim_opt_x86_64.asm

+++ b/vpx_dsp/x86/ssim_opt_x86_64.asm

@@ -49,11 +49,11 @@

 ;    int sp,

 ;    unsigned char *r,

 ;    int rp

-;    unsigned long *sum_s,

-;    unsigned long *sum_r,

-;    unsigned long *sum_sq_s,

-;    unsigned long *sum_sq_r,

-;    unsigned long *sum_sxr);

+;    uint32_t *sum_s,

+;    uint32_t *sum_r,

+;    uint32_t *sum_sq_s,

+;    uint32_t *sum_sq_r,

+;    uint32_t *sum_sxr);

 ; TODO: Use parm passing through structure, probably don't need the pxors

 ; ( calling app will initialize to 0 ) could easily fit everything in sse2

@@ -139,11 +139,11 @@

 ;    int sp,

 ;    unsigned char *r,

 ;    int rp

-;    unsigned long *sum_s,

-;    unsigned long *sum_r,

-;    unsigned long *sum_sq_s,

-;    unsigned long *sum_sq_r,

-;    unsigned long *sum_sxr);

+;    uint32_t *sum_s,

+;    uint32_t *sum_r,

+;    uint32_t *sum_sq_s,

+;    uint32_t *sum_sq_r,

+;    uint32_t *sum_sxr);

 ; TODO: Use parm passing through structure, probably don't need the pxors

 ; ( calling app will initialize to 0 ) could easily fit everything in sse2