ref: 0ef1f216713415d96b8a8fb3ec139d084c0266f1
parent: 6dd8086d4d1bed7dcd5948651d24649809f90e1f
author: Gregory Maxwell <[email protected]>
date: Tue Nov 22 18:06:36 EST 2011
Opusenc: Downmixing, multistream --save-range, multistream set-ctl.
--- a/src/audio-in.c
+++ b/src/audio-in.c
@@ -915,3 +915,95 @@
free(rs);
}
+
+typedef struct {
+ audio_read_func real_reader;
+ void *real_readdata;
+ float *bufs;
+ float *matrix;
+ int in_channels;
+ int out_channels;
+} downmix;
+
+static long read_downmix(void *data, float *buffer, int samples)
+{
+ downmix *d = data;
+ long in_samples = d->real_reader(d->real_readdata, d->bufs, samples);
+ int i,j,k,in_ch,out_ch;
+
+ in_ch=d->in_channels;
+ out_ch=d->out_channels;
+
+ for(i=0;i<in_samples;i++){
+ for(j=0;j<out_ch;j++){
+ float *samp;
+ samp=&buffer[i*out_ch+j];
+ *samp=0;
+ for(k=0;k<in_ch;k++){
+ *samp+=d->bufs[i*in_ch+k]*d->matrix[in_ch*j+k];
+ }
+ }
+ }
+ return in_samples;
+}
+
+int setup_downmix(oe_enc_opt *opt, int out_channels) {
+ static const float stupid_matrix[7][8][2]={
+ /*2*/ {{1,0},{0,1}},
+ /*3*/ {{1,0},{0.7071f,0.7071f},{0,1}},
+ /*4*/ {{1,0},{0,1},{0.866f,0.5f},{0.5f,0.866f}},
+ /*5*/ {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f}},
+ /*6*/ {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.7071f,0.7071f}},
+ /*7*/ {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.6123f,0.6123f},{0.7071f,0.7071f}},
+ /*8*/ {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.866f,0.5f},{0.5f,0.866f},{0.7071f,0.7071f}},
+ };
+ float sum;
+ downmix *d = calloc(1, sizeof(downmix));
+ int i,j;
+
+ if(opt->channels<=out_channels || out_channels>2 || (out_channels==2&&opt->channels>8)) {
+ fprintf(stderr, "Downmix must actually downmix and only knows mono/stereo out.\n");
+ if(opt->channels>8)fprintf(stderr, "Downmix also only knows how to mix >8ch to mono.\n");
+ return 0;
+ }
+
+ d->bufs = malloc(sizeof(float)*opt->channels*4096);
+ d->matrix = malloc(sizeof(float)*opt->channels*out_channels);
+ d->real_reader = opt->read_samples;
+ d->real_readdata = opt->readdata;
+ d->in_channels=opt->channels;
+ d->out_channels=out_channels;
+
+ if(out_channels==1&&d->in_channels>8){
+ for(i=0;i<d->in_channels;i++)d->matrix[i]=1.0f/d->in_channels;
+ }else if(out_channels==2){
+ for(j=0;j<d->out_channels;j++)
+ for(i=0;i<d->in_channels;i++)d->matrix[d->in_channels*j+i]=
+ stupid_matrix[opt->channels-2][i][j];
+ }else{
+ for(i=0;i<d->in_channels;i++)d->matrix[i]=
+ (stupid_matrix[opt->channels-2][i][0])+
+ (stupid_matrix[opt->channels-2][i][1]);
+ }
+ sum=0;
+ for(i=0;i<d->in_channels*d->out_channels;i++)sum+=d->matrix[i];
+ sum=(float)out_channels/sum;
+ for(i=0;i<d->in_channels*d->out_channels;i++)d->matrix[i]*=sum;
+ opt->read_samples = read_downmix;
+ opt->readdata = d;
+
+ opt->channels = out_channels;
+ return out_channels;
+}
+
+void clear_downmix(oe_enc_opt *opt) {
+ downmix *d = opt->readdata;
+
+ opt->read_samples = d->real_reader;
+ opt->readdata = d->real_readdata;
+ opt->channels = d->in_channels; /* other things in cleanup rely on this */
+
+ free(d->bufs);
+ free(d->matrix);
+ free(d);
+}
--- a/src/opusenc.c
+++ b/src/opusenc.c
@@ -130,6 +130,7 @@
printf("\nDiagnostic options:\n");
printf(" --save-range file Saves check values for every frame to a file\n");
printf(" --set-ctl-int x=y Pass the encoder control x with value y (advanced)\n");
+ printf(" Preface with s: to direct the ctl to multistream s\n");
printf(" This may be used multiple times\n");
printf(" --uncoupled Use one mono stream per channel\n");
printf("\nMetadata options:\n");
@@ -145,6 +146,180 @@
printf(" --raw-endianness n 1 for bigendian, 0 for little (defaults to 0)\n");
}
+/*This is some non-exported code copied wholesale from libopus.
+ *Normal programs shouldn't need these functions, but we use them here
+ *to parse deep inside multichannel packets in order to get diagnostic
+ *data for save-range. If you're thinking about copying it and you aren't
+ *making an opus stream diagnostic tool, you're probably doing something
+ *wrong.*/
+static int parse_size(const unsigned char *data, opus_int32 len, short *size)
+{
+ if (len<1)
+ {
+ *size = -1;
+ return -1;
+ } else if (data[0]<252)
+ {
+ *size = data[0];
+ return 1;
+ } else if (len<2)
+ {
+ *size = -1;
+ return -1;
+ } else {
+ *size = 4*data[1] + data[0];
+ return 2;
+ }
+}
+
+static int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
+ int self_delimited, unsigned char *out_toc,
+ const unsigned char *frames[48], short size[48], int *payload_offset)
+{
+ int i, bytes;
+ int count;
+ int cbr;
+ unsigned char ch, toc;
+ int framesize;
+ int last_size;
+ const unsigned char *data0 = data;
+
+ if (size==NULL)
+ return OPUS_BAD_ARG;
+
+ framesize = opus_packet_get_samples_per_frame(data, 48000);
+
+ cbr = 0;
+ toc = *data++;
+ len--;
+ last_size = len;
+ switch (toc&0x3)
+ {
+ /* One frame */
+ case 0:
+ count=1;
+ break;
+ /* Two CBR frames */
+ case 1:
+ count=2;
+ cbr = 1;
+ if (!self_delimited)
+ {
+ if (len&0x1)
+ return OPUS_INVALID_PACKET;
+ size[0] = last_size = len/2;
+ }
+ break;
+ /* Two VBR frames */
+ case 2:
+ count = 2;
+ bytes = parse_size(data, len, size);
+ len -= bytes;
+ if (size[0]<0 || size[0] > len)
+ return OPUS_INVALID_PACKET;
+ data += bytes;
+ last_size = len-size[0];
+ break;
+ /* Multiple CBR/VBR frames (from 0 to 120 ms) */
+ case 3:
+ if (len<1)
+ return OPUS_INVALID_PACKET;
+ /* Number of frames encoded in bits 0 to 5 */
+ ch = *data++;
+ count = ch&0x3F;
+ if (count <= 0 || framesize*count > 5760)
+ return OPUS_INVALID_PACKET;
+ len--;
+ /* Padding flag is bit 6 */
+ if (ch&0x40)
+ {
+ int padding=0;
+ int p;
+ do {
+ if (len<=0)
+ return OPUS_INVALID_PACKET;
+ p = *data++;
+ len--;
+ padding += p==255 ? 254: p;
+ } while (p==255);
+ len -= padding;
+ }
+ if (len<0)
+ return OPUS_INVALID_PACKET;
+ /* VBR flag is bit 7 */
+ cbr = !(ch&0x80);
+ if (!cbr)
+ {
+ /* VBR case */
+ last_size = len;
+ for (i=0;i<count-1;i++)
+ {
+ bytes = parse_size(data, len, size+i);
+ len -= bytes;
+ if (size[i]<0 || size[i] > len)
+ return OPUS_INVALID_PACKET;
+ data += bytes;
+ last_size -= bytes+size[i];
+ }
+ if (last_size<0)
+ return OPUS_INVALID_PACKET;
+ } else if (!self_delimited)
+ {
+ /* CBR case */
+ last_size = len/count;
+ if (last_size*count!=len)
+ return OPUS_INVALID_PACKET;
+ for (i=0;i<count-1;i++)
+ size[i] = last_size;
+ }
+ break;
+ }
+ /* Self-delimited framing has an extra size for the last frame. */
+ if (self_delimited)
+ {
+ bytes = parse_size(data, len, size+count-1);
+ len -= bytes;
+ if (size[count-1]<0 || size[count-1] > len)
+ return OPUS_INVALID_PACKET;
+ data += bytes;
+ /* For CBR packets, apply the size to all the frames. */
+ if (cbr)
+ {
+ if (size[count-1]*count > len)
+ return OPUS_INVALID_PACKET;
+ for (i=0;i<count-1;i++)
+ size[i] = size[count-1];
+ } else if(size[count-1] > last_size)
+ return OPUS_INVALID_PACKET;
+ } else
+ {
+ /* Because it's not encoded explicitly, it's possible the size of the
+ last packet (or all the packets, for the CBR case) is larger than
+ 1275. Reject them here.*/
+ if (last_size > 1275)
+ return OPUS_INVALID_PACKET;
+ size[count-1] = last_size;
+ }
+
+ if (frames)
+ {
+ for (i=0;i<count;i++)
+ {
+ frames[i] = data;
+ data += size[i];
+ }
+ }
+
+ if (out_toc)
+ *out_toc = toc;
+
+ if (payload_offset)
+ *payload_offset = data-data0;
+
+ return count;
+}
+
+
static inline void print_time(double seconds)
{
long long hours, minutes;
@@ -368,23 +543,30 @@
exit(1);
}
}else if(strcmp(long_options[option_index].name,"set-ctl-int")==0){
- int len=strlen(optarg);
- char *spos;
+ int len=strlen(optarg),target;
+ char *spos,*tpos;
spos=strchr(optarg,'=');
if(len<3||spos==NULL||(spos-optarg)<1||(spos-optarg)>=len){
fprintf(stderr, "Invalid set-ctl-int: %s\n", optarg);
- fprintf(stderr, "Syntax is --set-ctl-int intX=intY\n");
+ fprintf(stderr, "Syntax is --set-ctl-int intX=intY or\n");
+ fprintf(stderr, "Syntax is --set-ctl-int intS:intX=intY\n");
exit(1);
}
- if((atoi(optarg)&1)!=0){
+ tpos=strchr(optarg,':');
+ if(tpos==NULL){
+ target=-1;
+ tpos=optarg-1;
+ }else target=atoi(optarg);
+ if((atoi(tpos+1)&1)!=0){
fprintf(stderr, "Invalid set-ctl-int: %s\n", optarg);
fprintf(stderr, "libopus set CTL values are even.\n");
exit(1);
}
- if(opt_ctls==0)opt_ctls_ctlval=malloc(sizeof(int)*2);
- else opt_ctls_ctlval=realloc(opt_ctls_ctlval,sizeof(int)*(opt_ctls+1)*2);
- opt_ctls_ctlval[opt_ctls<<1]=atoi(optarg);
- opt_ctls_ctlval[(opt_ctls<<1)+1]=atoi(spos+1);
+ if(opt_ctls==0)opt_ctls_ctlval=malloc(sizeof(int)*3);
+ else opt_ctls_ctlval=realloc(opt_ctls_ctlval,sizeof(int)*(opt_ctls+1)*3);
+ opt_ctls_ctlval[opt_ctls*3]=target;
+ opt_ctls_ctlval[opt_ctls*3+1]=atoi(tpos+1);
+ opt_ctls_ctlval[opt_ctls*3+2]=atoi(spos+1);
opt_ctls++;
}else if(strcmp(long_options[option_index].name,"save-range")==0){
frange=fopen(optarg,"w");
@@ -465,6 +647,14 @@
exit(1);
}
+ if(downmix==0&&inopt.channels>2&&bitrate>0&&bitrate<(32000*inopt.channels)){
+ if(!quiet)fprintf(stderr,"Notice: Surround bitrate less than 32kbit/sec/channel, downmixing.\n");
+ downmix=inopt.channels>8?1:2;
+ }
+
+ if(downmix&&downmix<inopt.channels)downmix=setup_downmix(&inopt,downmix);
+ else downmix=0;
+
rate=inopt.rate;
chan=inopt.channels;
inopt.skip=0;
@@ -479,7 +669,6 @@
frame_size=frame_size/(48000/coding_rate);
-
/*Scale the resampler complexity, but only for 48000 output because
the near-cutoff behavior matters a lot more at lower rates.*/
if(rate!=coding_rate)setup_resample(&inopt,coding_rate==48000?complexity/2:5,coding_rate);
@@ -496,9 +685,9 @@
/*3*/ {1, 0, 0,2,1},
/*4*/ {2, 0, 0,1,2,3},
/*5*/ {2, 0, 0,4,1,2,3},
- /*6*/ {2,1<<5, 0,4,1,2,3,5},
- /*7*/ {2,1<<6, 0,4,1,2,3,5,6},
- /*6*/ {3,1<<7, 0,6,1,2,3,4,5,7}
+ /*6*/ {2,1<<3, 0,4,1,2,3,5},
+ /*7*/ {2,1<<4, 0,4,1,2,3,5,6},
+ /*6*/ {3,1<<4, 0,6,1,2,3,4,5,7}
};
for(i=0;i<header.channels;i++)mapping[i]=opusenc_streams[header.channels-1][i+2];
force_narrow=opusenc_streams[header.channels-1][1];
@@ -579,10 +768,25 @@
exit(1);
}
+ /*This should be the last set of CTLs, except the lookahead get, so it can override the defaults.*/
for(i=0;i<opt_ctls;i++){
- ret=opus_multistream_encoder_ctl(st,opt_ctls_ctlval[i<<1],opt_ctls_ctlval[(i<<1)+1]);
- if(ret!=OPUS_OK){
- fprintf(stderr,"opus_multistream_encoder_ctl(st,%d,%d) returned: %s\n",opt_ctls_ctlval[i<<1],opt_ctls_ctlval[(i<<1)+1],opus_strerror(ret));
+ int target=opt_ctls_ctlval[i*3];
+ if(target==-1){
+ ret=opus_multistream_encoder_ctl(st,opt_ctls_ctlval[i*3+1],opt_ctls_ctlval[i*3+2]);
+ if(ret!=OPUS_OK){
+ fprintf(stderr,"opus_multistream_encoder_ctl(st,%d,%d) returned: %s\n",opt_ctls_ctlval[i*3+1],opt_ctls_ctlval[i*3+2],opus_strerror(ret));
+ exit(1);
+ }
+ }else if(target<header.nb_streams){
+ OpusEncoder *oe;
+ opus_multistream_encoder_ctl(st,OPUS_MULTISTREAM_GET_ENCODER_STATE(i,&oe));
+ ret=opus_encoder_ctl(oe, opt_ctls_ctlval[i*3+1],opt_ctls_ctlval[i*3+2]);
+ if(ret!=OPUS_OK){
+ fprintf(stderr,"opus_encoder_ctl(st[%d],%d,%d) returned: %s\n",target,opt_ctls_ctlval[i*3+1],opt_ctls_ctlval[i*3+2],opus_strerror(ret));
+ exit(1);
+ }
+ }else{
+ fprintf(stderr,"--set-ctl-int target stream %d is higher than the maximum stream number %d.\n",target,header.nb_streams-1);
exit(1);
}
}
@@ -721,19 +925,32 @@
min_bytes=IMIN(nbBytes,min_bytes);
if(frange!=NULL){
+ const unsigned char *subpkt;
+ int parse_size;
static const char *bw_strings[5]={"NB","MB","WB","SWB","FB"};
static const char *mode_strings[3]={"LP","HYB","MDCT"};
- int streams=1; /*FIXME, mode data should be per stream too*/
OpusEncoder *oe;
opus_uint32 rng;
- fprintf(frange,"%d %d ",frame_size*(48000/coding_rate),nbBytes);
- fprintf(frange,"%s %s %c %d ",mode_strings[((((packet[0]>>3)+48)&92)+4)>>5],
- bw_strings[opus_packet_get_bandwidth(packet)-OPUS_BANDWIDTH_NARROWBAND],
- packet[0]&4?'S':'M',opus_packet_get_samples_per_frame(packet,48000));
- for(i=0;i<streams;i++){
+ fprintf(frange,"%d, %d, ",frame_size*(48000/coding_rate),nbBytes);
+ subpkt=packet;
+ parse_size=nbBytes;
+ for(i=0;i<header.nb_streams;i++){
+ int payload_offset;
+ const unsigned char *frames[48];
+ unsigned char toc;
+ short size[48];
+ payload_offset=0;
+ opus_packet_parse_impl(subpkt,parse_size,i+1!=header.nb_streams,
+ &toc,frames,size,&payload_offset);
+ fprintf(frange,"[%d, %s, %s, %c, %d",payload_offset,
+ mode_strings[((((subpkt[0]>>3)+48)&92)+4)>>5],
+ bw_strings[opus_packet_get_bandwidth(subpkt)-OPUS_BANDWIDTH_NARROWBAND],
+ subpkt[0]&4?'S':'M',opus_packet_get_samples_per_frame(subpkt,48000));
ret=opus_multistream_encoder_ctl(st,OPUS_MULTISTREAM_GET_ENCODER_STATE(i,&oe));
ret=opus_encoder_ctl(oe,OPUS_GET_FINAL_RANGE(&rng));
- fprintf(frange,"%llu%c",(unsigned long long)rng,i+1==streams?'\n':' ');
+ fprintf(frange,", %llu]%s",(unsigned long long)rng,i+1==header.nb_streams?"\n":", ");
+ parse_size-=payload_offset;
+ subpkt+=payload_offset;
}
}
@@ -865,6 +1082,7 @@
if(rate!=coding_rate)clear_resample(&inopt);
clear_padder(&inopt);
+ if(downmix)clear_downmix(&inopt);
in_format->close_func(inopt.readdata);
if(fin)fclose(fin);
if(fout)fclose(fout);
--- a/src/opusenc.h
+++ b/src/opusenc.h
@@ -37,6 +37,8 @@
void clear_scaler(oe_enc_opt *opt);
void setup_padder(oe_enc_opt *opt);
void clear_padder(oe_enc_opt *opt);
+int setup_downmix(oe_enc_opt *opt, int out_channels);
+void clear_downmix(oe_enc_opt *opt);
typedef struct
{