ref: 2808dd12c29b83226563fcf443b72f4be3dcb73d
parent: d705e17f5ef070fc75fa10b81a7e199ff18da01b
parent: 26f4f2dc8edd95c0fb9192d832ca0b601782804b
author: Aℓex Converse <[email protected]>
date: Mon Aug 10 12:41:54 EDT 2015
Merge changes I2aa2a545,I63932eda,Ie3694ecd * changes: ssim: Add missing statics and consts psnrhvs: Add missing consts and static consts. ssim: Replace unsigned long with uint32_t.
--- a/vpx_dsp/psnrhvs.c
+++ b/vpx_dsp/psnrhvs.c
@@ -23,7 +23,8 @@
#endif
#include <string.h>
-void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, int xstride) {
+static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
+ int xstride) {
(void) xstride;
vpx_fdct8x8(x, y, ystride);
}
@@ -31,56 +32,57 @@
/* Normalized inverse quantization matrix for 8x8 DCT at the point of
* transparency. This is not the JPEG based matrix from the paper,
this one gives a slightly higher MOS agreement.*/
-float csf_y[8][8] = {{1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411,
- 1.00227514334, 0.678296995242, 0.466224900598, 0.3265091542}, {2.2901594831,
- 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963, 0.868920337363,
- 0.61280991668, 0.436405793551}, {2.08509755623, 2.04793073064,
- 1.34329019223, 1.09205635862, 0.875748795257, 0.670882927016,
- 0.501731932449, 0.372504254596}, {1.48366094411, 1.68731108984,
- 1.09205635862, 0.772819797575, 0.605636379554, 0.48309405692,
- 0.380429446972, 0.295774038565}, {1.00227514334, 1.2305666963,
- 0.875748795257, 0.605636379554, 0.448996256676, 0.352889268808,
- 0.283006984131, 0.226951348204}, {0.678296995242, 0.868920337363,
- 0.670882927016, 0.48309405692, 0.352889268808, 0.27032073436,
- 0.215017739696, 0.17408067321}, {0.466224900598, 0.61280991668,
- 0.501731932449, 0.380429446972, 0.283006984131, 0.215017739696,
- 0.168869545842, 0.136153931001}, {0.3265091542, 0.436405793551,
- 0.372504254596, 0.295774038565, 0.226951348204, 0.17408067321,
- 0.136153931001, 0.109083846276}};
-float csf_cb420[8][8] = {
+static const float csf_y[8][8] = {
+ {1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334,
+ 0.678296995242, 0.466224900598, 0.3265091542},
+ {2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963,
+ 0.868920337363, 0.61280991668, 0.436405793551},
+ {2.08509755623, 2.04793073064, 1.34329019223, 1.09205635862, 0.875748795257,
+ 0.670882927016, 0.501731932449, 0.372504254596},
+ {1.48366094411, 1.68731108984, 1.09205635862, 0.772819797575,
+ 0.605636379554, 0.48309405692, 0.380429446972, 0.295774038565},
+ {1.00227514334, 1.2305666963, 0.875748795257, 0.605636379554,
+ 0.448996256676, 0.352889268808, 0.283006984131, 0.226951348204},
+ {0.678296995242, 0.868920337363, 0.670882927016, 0.48309405692,
+ 0.352889268808, 0.27032073436, 0.215017739696, 0.17408067321},
+ {0.466224900598, 0.61280991668, 0.501731932449, 0.380429446972,
+ 0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001},
+ {0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565,
+ 0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276}};
+static const float csf_cb420[8][8] = {
{1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788,
- 0.898018824055, 0.74725392039, 0.615105596242}, {2.46074210438,
- 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,
- 1.17428548929, 0.996404342439, 0.830890433625}, {1.18284184739,
- 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362,
- 0.960060382087, 0.849823426169, 0.731221236837}, {1.14982565193,
- 1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099,
- 0.751437590932, 0.685398513368, 0.608694761374}, {1.05017074788,
- 1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187,
- 0.605503172737, 0.55002013668, 0.495804539034}, {0.898018824055,
- 1.17428548929, 0.960060382087, 0.751437590932, 0.605503172737,
- 0.514674450957, 0.454353482512, 0.407050308965}, {0.74725392039,
- 0.996404342439, 0.849823426169, 0.685398513368, 0.55002013668,
- 0.454353482512, 0.389234902883, 0.342353999733}, {0.615105596242,
- 0.830890433625, 0.731221236837, 0.608694761374, 0.495804539034,
- 0.407050308965, 0.342353999733, 0.295530605237}};
-float csf_cr420[8][8] = {
+ 0.898018824055, 0.74725392039, 0.615105596242},
+ {2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,
+ 1.17428548929, 0.996404342439, 0.830890433625},
+ {1.18284184739, 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362,
+ 0.960060382087, 0.849823426169, 0.731221236837},
+ {1.14982565193, 1.38190029285, 1.02624506078, 0.861317501629,
+ 0.801821139099, 0.751437590932, 0.685398513368, 0.608694761374},
+ {1.05017074788, 1.33100189972, 1.03145147362, 0.801821139099,
+ 0.676555426187, 0.605503172737, 0.55002013668, 0.495804539034},
+ {0.898018824055, 1.17428548929, 0.960060382087, 0.751437590932,
+ 0.605503172737, 0.514674450957, 0.454353482512, 0.407050308965},
+ {0.74725392039, 0.996404342439, 0.849823426169, 0.685398513368,
+ 0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733},
+ {0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374,
+ 0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237}};
+static const float csf_cr420[8][8] = {
{2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469,
- 0.867069376285, 0.721500455585, 0.593906509971}, {2.62502345193,
- 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,
- 1.13381474809, 0.962064122248, 0.802254508198}, {1.26180942886,
- 1.17180569821, 0.944981930573, 0.990876405848, 0.995903384143,
- 0.926972725286, 0.820534991409, 0.706020324706}, {1.11019789803,
- 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195,
- 0.725539939514, 0.661776842059, 0.587716619023}, {1.01397751469,
- 1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286,
- 0.584635025748, 0.531064164893, 0.478717061273}, {0.867069376285,
- 1.13381474809, 0.926972725286, 0.725539939514, 0.584635025748,
- 0.496936637883, 0.438694579826, 0.393021669543}, {0.721500455585,
- 0.962064122248, 0.820534991409, 0.661776842059, 0.531064164893,
- 0.438694579826, 0.375820256136, 0.330555063063}, {0.593906509971,
- 0.802254508198, 0.706020324706, 0.587716619023, 0.478717061273,
- 0.393021669543, 0.330555063063, 0.285345396658}};
+ 0.867069376285, 0.721500455585, 0.593906509971},
+ {2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,
+ 1.13381474809, 0.962064122248, 0.802254508198},
+ {1.26180942886, 1.17180569821, 0.944981930573, 0.990876405848,
+ 0.995903384143, 0.926972725286, 0.820534991409, 0.706020324706},
+ {1.11019789803, 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195,
+ 0.725539939514, 0.661776842059, 0.587716619023},
+ {1.01397751469, 1.28513006198, 0.995903384143, 0.77418706195,
+ 0.653238524286, 0.584635025748, 0.531064164893, 0.478717061273},
+ {0.867069376285, 1.13381474809, 0.926972725286, 0.725539939514,
+ 0.584635025748, 0.496936637883, 0.438694579826, 0.393021669543},
+ {0.721500455585, 0.962064122248, 0.820534991409, 0.661776842059,
+ 0.531064164893, 0.438694579826, 0.375820256136, 0.330555063063},
+ {0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023,
+ 0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658}};
static double convert_score_db(double _score, double _weight) {
return 10 * (log10(255 * 255) - log10(_weight * _score));
@@ -89,7 +91,7 @@
static double calc_psnrhvs(const unsigned char *_src, int _systride,
const unsigned char *_dst, int _dystride,
double _par, int _w, int _h, int _step,
- float _csf[8][8]) {
+ const float _csf[8][8]) {
float ret;
int16_t dct_s[8 * 8], dct_d[8 * 8];
tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8];
@@ -200,11 +202,12 @@
ret /= pixels;
return ret;
}
-double vpx_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
- double *y_psnrhvs, double *u_psnrhvs, double *v_psnrhvs) {
+double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest, double *y_psnrhvs,
+ double *u_psnrhvs, double *v_psnrhvs) {
double psnrhvs;
- double par = 1.0;
- int step = 7;
+ const double par = 1.0;
+ const int step = 7;
vpx_clear_system_state();
*y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer,
dest->y_stride, par, source->y_crop_width,
--- a/vpx_dsp/ssim.c
+++ b/vpx_dsp/ssim.c
@@ -13,10 +13,10 @@
#include "vpx_dsp/ssim.h"
#include "vpx_ports/mem.h"
-void vpx_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
- int rp, unsigned long *sum_s, unsigned long *sum_r,
- unsigned long *sum_sq_s, unsigned long *sum_sq_r,
- unsigned long *sum_sxr) {
+void vpx_ssim_parms_16x16_c(const uint8_t *s, int sp, const uint8_t *r,
+ int rp, uint32_t *sum_s, uint32_t *sum_r,
+ uint32_t *sum_sq_s, uint32_t *sum_sq_r,
+ uint32_t *sum_sxr) {
int i, j;
for (i = 0; i < 16; i++, s += sp, r += rp) {
for (j = 0; j < 16; j++) {
@@ -28,10 +28,10 @@
}
}
}
-void vpx_ssim_parms_8x8_c(uint8_t *s, int sp, uint8_t *r, int rp,
- unsigned long *sum_s, unsigned long *sum_r,
- unsigned long *sum_sq_s, unsigned long *sum_sq_r,
- unsigned long *sum_sxr) {
+void vpx_ssim_parms_8x8_c(const uint8_t *s, int sp, const uint8_t *r, int rp,
+ uint32_t *sum_s, uint32_t *sum_r,
+ uint32_t *sum_sq_s, uint32_t *sum_sq_r,
+ uint32_t *sum_sxr) {
int i, j;
for (i = 0; i < 8; i++, s += sp, r += rp) {
for (j = 0; j < 8; j++) {
@@ -45,7 +45,8 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-void vpx_highbd_ssim_parms_8x8_c(uint16_t *s, int sp, uint16_t *r, int rp,
+void vpx_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp,
+ const uint16_t *r, int rp,
uint32_t *sum_s, uint32_t *sum_r,
uint32_t *sum_sq_s, uint32_t *sum_sq_r,
uint32_t *sum_sxr) {
@@ -65,9 +66,9 @@
static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
static const int64_t cc2 = 239708; // (64^2*(.03*255)^2
-static double similarity(unsigned long sum_s, unsigned long sum_r,
- unsigned long sum_sq_s, unsigned long sum_sq_r,
- unsigned long sum_sxr, int count) {
+static double similarity(uint32_t sum_s, uint32_t sum_r,
+ uint32_t sum_sq_s, uint32_t sum_sq_r,
+ uint32_t sum_sxr, int count) {
int64_t ssim_n, ssim_d;
int64_t c1, c2;
@@ -85,8 +86,8 @@
return ssim_n * 1.0 / ssim_d;
}
-static double ssim_8x8(uint8_t *s, int sp, uint8_t *r, int rp) {
- unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
+static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) {
+ uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
vpx_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
&sum_sxr);
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
@@ -93,8 +94,8 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-static double highbd_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,
- unsigned int bd) {
+static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r,
+ int rp, unsigned int bd) {
uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
const int oshift = bd - 8;
vpx_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
@@ -111,8 +112,9 @@
// We are using a 8x8 moving window with starting location of each 8x8 window
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
// block boundaries to penalize blocking artifacts.
-double vpx_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
- int stride_img2, int width, int height) {
+static double vpx_ssim2(const uint8_t *img1, const uint8_t *img2,
+ int stride_img1, int stride_img2, int width,
+ int height) {
int i, j;
int samples = 0;
double ssim_total = 0;
@@ -131,9 +133,9 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-double vpx_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
- int stride_img2, int width, int height,
- unsigned int bd) {
+static double vpx_highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
+ int stride_img1, int stride_img2, int width,
+ int height, unsigned int bd) {
int i, j;
int samples = 0;
double ssim_total = 0;
@@ -154,7 +156,8 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-double vpx_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
double *weight) {
double a, b, c;
double ssimv;
@@ -178,7 +181,8 @@
return ssimv;
}
-double vpx_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v) {
double ssim_all = 0;
double a, b, c;
@@ -231,7 +235,7 @@
// Replace c1 with n*n * c1 for the final step that leads to this code:
// The final step scales by 12 bits so we don't lose precision in the constants.
-double ssimv_similarity(Ssimv *sv, int64_t n) {
+static double ssimv_similarity(const Ssimv *sv, int64_t n) {
// Scale the constants by number of pixels.
const int64_t c1 = (cc1 * n * n) >> 12;
const int64_t c2 = (cc2 * n * n) >> 12;
@@ -262,7 +266,7 @@
//
// 255 * 255 - (sum_s - sum_r) / count * (sum_s - sum_r) / count
//
-double ssimv_similarity2(Ssimv *sv, int64_t n) {
+static double ssimv_similarity2(const Ssimv *sv, int64_t n) {
// Scale the constants by number of pixels.
const int64_t c1 = (cc1 * n * n) >> 12;
const int64_t c2 = (cc2 * n * n) >> 12;
@@ -278,8 +282,8 @@
return l * v;
}
-void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch,
- Ssimv *sv) {
+static void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2,
+ int img2_pitch, Ssimv *sv) {
vpx_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch,
&sv->sum_s, &sv->sum_r, &sv->sum_sq_s, &sv->sum_sq_r,
&sv->sum_sxr);
@@ -448,8 +452,8 @@
#if CONFIG_VP9_HIGHBITDEPTH
-double vpx_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
+double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
double *weight, unsigned int bd) {
double a, b, c;
double ssimv;
@@ -473,8 +477,8 @@
return ssimv;
}
-double vpx_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest, double *ssim_y,
+double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest, double *ssim_y,
double *ssim_u, double *ssim_v, unsigned int bd) {
double ssim_all = 0;
double a, b, c;
--- a/vpx_dsp/ssim.h
+++ b/vpx_dsp/ssim.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VPX_ENCODER_VP9_SSIM_H_
-#define VPX_ENCODER_VP9_SSIM_H_
+#ifndef VPX_DSP_SSIM_H_
+#define VPX_DSP_SSIM_H_
#ifdef __cplusplus
extern "C" {
@@ -29,19 +29,19 @@
// metrics used for calculating ssim, ssim2, dssim, and ssimc
typedef struct {
// source sum ( over 8x8 region )
- uint64_t sum_s;
+ uint32_t sum_s;
// reference sum (over 8x8 region )
- uint64_t sum_r;
+ uint32_t sum_r;
// source sum squared ( over 8x8 region )
- uint64_t sum_sq_s;
+ uint32_t sum_sq_s;
// reference sum squared (over 8x8 region )
- uint64_t sum_sq_r;
+ uint32_t sum_sq_r;
// sum of source times reference (over 8x8 region)
- uint64_t sum_sxr;
+ uint32_t sum_sxr;
// calculated ssim score between source and reference
double ssim;
@@ -72,26 +72,29 @@
int img2_pitch, int width, int height, Ssimv *sv2,
Metrics *m, int do_inconsistency);
-double vpx_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
double *weight);
-double vpx_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v);
double vpx_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v);
-double vpx_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v);
#if CONFIG_VP9_HIGHBITDEPTH
-double vpx_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
+double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
double *weight,
unsigned int bd);
-double vpx_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
+double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
double *ssim_y,
double *ssim_u,
double *ssim_v,
@@ -102,4 +105,4 @@
} // extern "C"
#endif
-#endif // VPX_ENCODER_VP9_SSIM_H_
+#endif // VPX_DSP_SSIM_H_
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -994,10 +994,10 @@
# Structured Similarity (SSIM)
#
if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
- add_proto qw/void vpx_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ add_proto qw/void vpx_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
- add_proto qw/void vpx_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ add_proto qw/void vpx_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";
}
--- a/vpx_dsp/x86/ssim_opt_x86_64.asm
+++ b/vpx_dsp/x86/ssim_opt_x86_64.asm
@@ -49,11 +49,11 @@
; int sp,
; unsigned char *r,
; int rp
-; unsigned long *sum_s,
-; unsigned long *sum_r,
-; unsigned long *sum_sq_s,
-; unsigned long *sum_sq_r,
-; unsigned long *sum_sxr);
+; uint32_t *sum_s,
+; uint32_t *sum_r,
+; uint32_t *sum_sq_s,
+; uint32_t *sum_sq_r,
+; uint32_t *sum_sxr);
;
; TODO: Use parm passing through structure, probably don't need the pxors
; ( calling app will initialize to 0 ) could easily fit everything in sse2
@@ -139,11 +139,11 @@
; int sp,
; unsigned char *r,
; int rp
-; unsigned long *sum_s,
-; unsigned long *sum_r,
-; unsigned long *sum_sq_s,
-; unsigned long *sum_sq_r,
-; unsigned long *sum_sxr);
+; uint32_t *sum_s,
+; uint32_t *sum_r,
+; uint32_t *sum_sq_s,
+; uint32_t *sum_sq_r,
+; uint32_t *sum_sxr);
;
; TODO: Use parm passing through structure, probably don't need the pxors
; ( calling app will initialize to 0 ) could easily fit everything in sse2