ref: 7b92abb9f17b4ec813344cad2aa931d17926c7b2
parent: a2be3388fb5905efa4652adf859ea3b7e1ceaea6
author: Mark Harris <[email protected]>
date: Sat Nov 11 12:55:47 EST 2023
opusenc: Allow downmixing ambix/discrete channels Also fix informational display of input and output channels so that it is correct when downmixing, and includes format information.
--- a/man/opusenc.1
+++ b/man/opusenc.1
@@ -101,10 +101,15 @@
Set expected packet loss in percent (default: 0).
.TP
.B --downmix-mono
-Downmix to mono.
+Downmix stereo, surround, ambisonics, or discrete audio channels to mono.
+Audio that is already mono is unchanged.
+Ambisonic downmixes include a downmix of any non-diegetic channels.
+Independent discrete channels are downmixed by weighting each channel equally.
.TP
.B --downmix-stereo
-Downmix multichannel speaker configurations to stereo.
+Downmix surround or ambisonics to stereo. Mono and stereo audio is unchanged.
+Ambisonic downmixes include any non-diegetic channels.
+Independent discrete channels are downmixed to mono.
.TP
.B --no-phase-inv
Disable use of phase inversion for intensity stereo.
@@ -312,13 +317,16 @@
The length will always be ignored when it is implausible (very small or very
large), but some stdin usage may still need this option to avoid truncation.
.TP
-.B --channels <ambix, discrete>
+.BR --channels " " ambix | discrete
Override the format of the input channels.
-The "ambix" option indicates that the input is ambisonics using ACN channel
+.IP
+"ambix" indicates that the input is ambisonics using ACN channel
ordering with SN3D normalization. All channels in a full ambisonics order must
be included. A pair of non-diegetic stereo channels can be optionally placed
-after the ambisonics channels. The option "discrete" forces uncoupled
-channels.
+after the ambisonics channels.
+.IP
+"discrete" indicates that the input channels are independent discrete channels
+with no assigned meaning or speaker position.
.SS "Diagnostic options"
.TP
.BI --serial " N"
--- a/src/audio-in.c
+++ b/src/audio-in.c
@@ -112,10 +112,10 @@
/* Define the supported formats here */
input_format formats[] = {
- {wav_id, 12, wav_open, wav_close, "wav", N_("WAV file reader")},
- {aiff_id, 12, aiff_open, wav_close, "aiff", N_("AIFF/AIFC file reader")},
- {flac_id, 0x10000, flac_open, flac_close, "flac", N_("FLAC file reader")},
- {oggflac_id, 33, flac_open, flac_close, "ogg", N_("Ogg FLAC file reader")},
+ {wav_id, 12, wav_open, wav_close, "WAV", N_("WAV file reader")},
+ {aiff_id, 12, aiff_open, wav_close, "AIFF", N_("AIFF/AIFC file reader")},
+ {flac_id, 0x10000, flac_open, flac_close, "FLAC", N_("FLAC file reader")},
+ {oggflac_id, 33, flac_open, flac_close, "Ogg FLAC", N_("Ogg FLAC file reader")},
{NULL, 0, NULL, NULL, NULL, NULL}
};
@@ -923,18 +923,17 @@
long in_samples = d->real_reader(d->real_readdata, d->bufs, samples);
int i,j,k,in_ch,out_ch;
- in_ch=d->in_channels;
- out_ch=d->out_channels;
+ in_ch = d->in_channels;
+ out_ch = d->out_channels;
- for (i=0;i<in_samples;i++) {
- for (j=0;j<out_ch;j++) {
- float *samp;
- samp=&buffer[i*out_ch+j];
- *samp=0;
- for (k=0;k<in_ch;k++) {
- *samp+=d->bufs[i*in_ch+k]*d->matrix[in_ch*j+k];
+ for (i=0; i<in_samples; ++i) {
+ for (j=0; j<out_ch; ++j) {
+ float *samp = &buffer[i*out_ch+j];
+ *samp = 0;
+ for (k=0; k<in_ch; ++k) {
+ *samp += d->bufs[i*in_ch+k] * d->matrix[in_ch*j+k];
+ }
}
- }
}
return in_samples;
}
@@ -941,7 +940,7 @@
int setup_downmix(oe_enc_opt *opt, int out_channels)
{
- static const float stupid_matrix[7][8][2] = {
+ static const float surround_downmix_matrix[7][8][2] = {
/*2*/ {{1,0},{0,1}},
/*3*/ {{1,0},{0.7071f,0.7071f},{0,1}},
/*4*/ {{1,0},{0,1},{0.866f,0.5f},{0.5f,0.866f}},
@@ -950,17 +949,30 @@
/*7*/ {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.6123f,0.6123f},{0.7071f,0.7071f}},
/*8*/ {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.866f,0.5f},{0.5f,0.866f},{0.7071f,0.7071f}},
};
- float sum;
downmix *d;
- int i,j;
+ int i, j;
- if (opt->channels<=out_channels || out_channels>2 || opt->channels<=0 || out_channels<=0) {
- fprintf(stderr, _("Downmix must actually downmix and only knows mono/stereo out.\n"));
- return 0;
+ if ((opt->channels_format == CHANNELS_FORMAT_DEFAULT && opt->channels <= 8)
+ || (opt->channels_format == CHANNELS_FORMAT_AMBIX)) {
+ if (out_channels != 1 && out_channels != 2) {
+ fprintf(stderr, _("Downmix must be to mono or stereo.\n"));
+ out_channels = 2;
+ }
+ } else if (out_channels != 1) {
+ fprintf(stderr, _("Discrete channels can only be downmixed to mono.\n"));
+ out_channels = 1;
}
- if (out_channels==2 && opt->channels>8) {
- fprintf(stderr, _("Downmix only knows how to mix >8ch to mono.\n"));
+ if (opt->channels_format == CHANNELS_FORMAT_DEFAULT) {
+ if (opt->channels <= out_channels) {
+ /* nothing to do */
+ return 0;
+ }
+ }
+
+ if (opt->channels <= 1) {
+ /* metadata-only change */
+ opt->channels_format = CHANNELS_FORMAT_DEFAULT;
return 0;
}
@@ -969,27 +981,58 @@
d->matrix = malloc(sizeof(float)*opt->channels*out_channels);
d->real_reader = opt->read_samples;
d->real_readdata = opt->readdata;
- d->in_channels=opt->channels;
- d->out_channels=out_channels;
+ d->in_channels = opt->channels;
+ d->out_channels = out_channels;
- if (out_channels==1&&d->in_channels>8) {
- for (i=0;i<d->in_channels;i++)d->matrix[i]=1.0f/d->in_channels;
- } else if (out_channels==2) {
- for (j=0;j<d->out_channels;j++)
- for (i=0;i<d->in_channels;i++)d->matrix[d->in_channels*j+i]=
- stupid_matrix[opt->channels-2][i][j];
+ if (opt->channels_format == CHANNELS_FORMAT_DEFAULT && d->in_channels <= 8) {
+ /* surround downmix */
+ float sum;
+ if (out_channels == 2) {
+ for (j = 0; j < out_channels; ++j)
+ for (i = 0; i < d->in_channels; ++i)
+ d->matrix[d->in_channels*j+i] =
+ surround_downmix_matrix[d->in_channels-2][i][j];
+ } else {
+ for (i = 0; i < d->in_channels; ++i)
+ d->matrix[i] =
+ (surround_downmix_matrix[d->in_channels-2][i][0]) +
+ (surround_downmix_matrix[d->in_channels-2][i][1]);
+ }
+ sum = 0;
+ for (i = 0; i < d->in_channels*out_channels; ++i)
+ sum += d->matrix[i];
+ sum = (float)out_channels / sum;
+ for (i = 0; i < d->in_channels*out_channels; ++i)
+ d->matrix[i] *= sum;
+ } else if (opt->channels_format == CHANNELS_FORMAT_AMBIX) {
+ /* downmix according to RFC 8486 section 4 */
+ int order_plus_one = sqrt(d->in_channels);
+ int nondiegetic_channels =
+ d->in_channels - order_plus_one * order_plus_one == 2 ? 2 : 0;
+ int use_y = out_channels == 2 && d->in_channels >= 4;
+ for (i = 1; i < d->in_channels*out_channels; ++i)
+ d->matrix[i] = 0.0f;
+ d->matrix[0] = 1.0f / (1 + use_y + nondiegetic_channels);
+ if (out_channels == 2) {
+ d->matrix[d->in_channels] = d->matrix[0];
+ if (use_y) {
+ d->matrix[1] = d->matrix[0];
+ d->matrix[d->in_channels+1] = -d->matrix[0];
+ }
+ }
+ if (nondiegetic_channels == 2) {
+ d->matrix[d->in_channels-2] =
+ d->matrix[out_channels*d->in_channels-1] =
+ d->matrix[0] * out_channels;
+ }
} else {
- for (i=0;i<d->in_channels;i++)d->matrix[i]=
- (stupid_matrix[opt->channels-2][i][0])+
- (stupid_matrix[opt->channels-2][i][1]);
+ for (i = 0; i < d->in_channels; ++i)
+ d->matrix[i] = 1.0f / d->in_channels;
}
- sum=0;
- for (i=0;i<d->in_channels*d->out_channels;i++)sum+=d->matrix[i];
- sum=(float)out_channels/sum;
- for (i=0;i<d->in_channels*d->out_channels;i++)d->matrix[i]*=sum;
+
opt->read_samples = read_downmix;
opt->readdata = d;
-
+ opt->channels_format = CHANNELS_FORMAT_DEFAULT;
opt->channels = out_channels;
return out_channels;
}
--- a/src/opusenc.c
+++ b/src/opusenc.c
@@ -176,7 +176,7 @@
printf(" --raw-chan n Set number of channels for raw input (default: 2)\n");
printf(" --raw-endianness n 1 for big endian, 0 for little (default: 0)\n");
printf(" --ignorelength Ignore the data length in Wave headers\n");
- printf(" --channels Override the format of the input channels (ambix, discrete)\n");
+ printf(" --channels fmt Override the format of the input channels (ambix, discrete)\n");
printf("\nDiagnostic options:\n");
printf(" --serial n Force use of a specific stream serial number\n");
printf(" --save-range file Save check values for every frame to a file\n");
@@ -360,9 +360,30 @@
if(nondiegetic_chs!=0&&nondiegetic_chs!=2) fatal("Error: invalid number of ambisonics channels.\n");
}
+static const char *channels_format_name(int channels_format, int channels)
+{
+ static const char *format_name[8] =
+ {
+ "mono", "stereo", "linear surround", "quadraphonic",
+ "5.0 surround", "5.1 surround", "6.1 surround", "7.1 surround"
+ };
+
+ if (channels_format == CHANNELS_FORMAT_DEFAULT) {
+ if (channels >= 1 && channels <= 8) {
+ return format_name[channels-1];
+ }
+ } else if (channels_format == CHANNELS_FORMAT_AMBIX) {
+ return "ambix";
+ }
+ return "discrete";
+}
+
int main(int argc, char **argv)
{
- static const input_format raw_format = {NULL, 0, raw_open, wav_close, "raw",N_("RAW file reader")};
+ static const input_format raw_format =
+ {
+ NULL, 0, raw_open, wav_close, "Raw", N_("Raw file reader")
+ };
struct option long_options[] =
{
{"quiet", no_argument, NULL, 0},
@@ -452,6 +473,8 @@
int serialno;
opus_int32 lookahead=0;
int mapping_family;
+ int orig_channels;
+ int orig_channels_format;
#ifdef WIN_UNICODE
int argc_utf8;
char **argv_utf8;
@@ -877,25 +900,19 @@
"Channel count must be in the range 1 to 255.\n", inopt.channels);
}
- if (downmix>0&&inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
- /*Ambisonics channels should be downmixed to mono or stereo, and then
- encoded using channel mapping family 0.*/
- fatal("Error: downmixing is currently unimplemented for ambisonics input.\n");
- }
-
- if (downmix>0&&inopt.channels_format==CHANNELS_FORMAT_DISCRETE) {
- /*Downmix of uncoupled channels not specified.*/
- fatal("Error: downmixing is currently unimplemented for independent input.\n");
- }
-
if (inopt.channels_format==CHANNELS_FORMAT_DEFAULT) {
if (downmix==0&&inopt.channels>2&&bitrate>0&&bitrate<(16000*inopt.channels)) {
if (!quiet) fprintf(stderr,"Notice: Surround bitrate less than 16 kbit/s per channel, downmixing.\n");
downmix=inopt.channels>8?1:2;
}
+ } else if (inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
+ validate_ambisonics_channel_count(inopt.channels);
}
- if (downmix>0&&downmix<inopt.channels) downmix=setup_downmix(&inopt,downmix);
+ orig_channels = inopt.channels;
+ orig_channels_format = inopt.channels_format;
+
+ if (downmix>0) downmix=setup_downmix(&inopt, downmix);
else downmix=0;
rate=inopt.rate;
@@ -906,7 +923,6 @@
((double)inopt.total_samples_per_channel * (48000./(double)rate));
if (inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
- validate_ambisonics_channel_count(chan);
/*Use channel mapping 3 for orders {1, 2, 3} with 4 to 18 channels
(including the non-diegetic stereo track). For other orders with no
demixing matrices currently available, use channel mapping 2.*/
@@ -1051,32 +1067,35 @@
if (!quiet) {
int opus_app;
- fprintf(stderr,"Encoding using %s",opus_version);
+ fprintf(stderr, "Encoding using %s", opus_version);
ret = ope_encoder_ctl(enc, OPUS_GET_APPLICATION(&opus_app));
if (ret != OPE_OK) fprintf(stderr, "\n");
- else if (opus_app==OPUS_APPLICATION_VOIP) fprintf(stderr," (VoIP)\n");
- else if (opus_app==OPUS_APPLICATION_AUDIO) fprintf(stderr," (audio)\n");
- else if (opus_app==OPUS_APPLICATION_RESTRICTED_LOWDELAY) fprintf(stderr," (low-delay)\n");
- else fprintf(stderr," (unknown application)\n");
- fprintf(stderr,"-----------------------------------------------------\n");
- fprintf(stderr," Input: %0.6g kHz, %d channel%s\n",
- rate/1000.,chan,chan<2?"":"s");
- fprintf(stderr," Output: %d channel%s (",chan,chan<2?"":"s");
- if (data.nb_coupled>0) fprintf(stderr,"%d coupled",data.nb_coupled*2);
+ else if (opus_app==OPUS_APPLICATION_VOIP) fprintf(stderr, " (VoIP)\n");
+ else if (opus_app==OPUS_APPLICATION_AUDIO) fprintf(stderr, " (audio)\n");
+ else if (opus_app==OPUS_APPLICATION_RESTRICTED_LOWDELAY) fprintf(stderr, " (low-delay)\n");
+ else fprintf(stderr, " (unknown application)\n");
+ fprintf(stderr, "-----------------------------------------------------\n");
+ fprintf(stderr, " Input: %s, %0.6g kHz, %d channel%s, %s\n",
+ in_format->format, rate/1000.,
+ orig_channels, orig_channels==1?"":"s",
+ channels_format_name(orig_channels_format, orig_channels));
+ fprintf(stderr, " Output: Opus, %d channel%s (", chan, chan==1?"":"s");
+ if (data.nb_coupled>0) fprintf(stderr, "%d coupled", data.nb_coupled*2);
if (data.nb_streams-data.nb_coupled>0) fprintf(stderr,
- "%s%d uncoupled",data.nb_coupled>0?", ":"",
+ "%s%d uncoupled", data.nb_coupled>0?", ":"",
data.nb_streams-data.nb_coupled);
- fprintf(stderr,")\n %0.2gms packets, %0.6g kbit/s%s\n",
+ fprintf(stderr, "), %s\n %0.2gms packets, %0.6g kbit/s%s\n",
+ channels_format_name(inopt.channels_format, chan),
frame_size/(48000/1000.), bitrate/1000.,
with_hard_cbr?" CBR":with_cvbr?" CVBR":" VBR");
- fprintf(stderr," Preskip: %d\n",lookahead);
+ fprintf(stderr, " Preskip: %d\n", lookahead);
if (data.frange!=NULL) {
fprintf(stderr, " Writing final range file %s\n", range_file);
}
- fprintf(stderr,"\n");
+ fprintf(stderr, "\n");
}
- if (strcmp(outFile,"-")==0) {
+ if (strcmp(outFile, "-")==0) {
#if defined WIN32 || defined _WIN32
_setmode(_fileno(stdout), _O_BINARY);
#endif