shithub: libtags

Download patch

ref: c074cc2a8b2d8e2721457f3c8723298dfd1fd212
parent: 1a9591bd4f932a6ca2fba0eb84cb6b790c8c7250
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Sun Mar 17 13:58:15 EDT 2024

redo parts of the API to fit more tag-kind-specific info - attached image type

--- a/examples/readtags.c
+++ b/examples/readtags.c
@@ -33,19 +33,48 @@
 
 static bool image;
 
+static const char *
+imagetype(int type)
+{
+	static const char *types[] = {
+		[ITother] = "Other",
+		[IT32x32_file_icon] = "32x32 pixels 'file icon' (PNG only)",
+		[ITother_file_icon] = "Other file icon",
+		[ITcover_front] = "Cover (front)",
+		[ITcover_back] = "Cover (back)",
+		[ITleaflet] = "Leaflet page",
+		[ITmedia] = "Media (e.g. label side of CD)",
+		[ITlead] = "Lead artist/lead performer/soloist",
+		[ITartist] = "Artist/performer",
+		[ITconductor] = "Conductor",
+		[ITband] = "Band/orchestra",
+		[ITcomposer] = "Composer",
+		[ITlyricist] = "Lyricist/text writer",
+		[ITlocation] = "Recording location",
+		[ITrecording] = "During recording",
+		[ITperformance] = "During performance",
+		[ITmovie_capture] = "Movie/video screen capture",
+		[ITfish] = "A bright coloured fish",
+		[ITillustration] = "Illustration",
+		[ITlogo_band] = "Band/artist logotype",
+		[ITlogo_publisher] = "Publisher/studio logotype",
+	};
+	return type >= 0 && type < ITnum ? types[type] : "???";
+}
+
 static void
-tag(Tagctx *ctx, int t, const char *k, const char *v, int offset, int size, Tagread f)
+tag(Tagctx *ctx, int type, Tag *tag)
 {
-	USED(k); USED(f);
 	if(image){
-		if(t != Timage)
+		if(type != Timage)
 			return;
+		int size = tag->image.size;
 		char *raw = malloc(size);
 		Aux *aux = ctx->aux;
 		int prevoffset = lseek(aux->fd, 0, 1);
-		if(lseek(aux->fd, offset, 0) != offset ||
+		if(lseek(aux->fd, tag->image.offset, 0) != tag->image.offset ||
 		   read(aux->fd, raw, size) != size ||
-		   (f != NULL && f(raw, &size) != 0)){
+		   (tag->image.decode != NULL && tag->image.decode(raw, &size) != 0)){
 			fprintf(stderr, "failed to read the image\n");
 			exit(1);
 		}
@@ -54,12 +83,10 @@
 		exit(0);
 		return;
 	}
-	if(t == Timage)
-		printf("%-12s %s %d %d\n", t2s[t], v, offset, size);
-	else if(t == Tunknown)
-		printf("%-12s %s\n", k, v);
+	if(type == Timage)
+		printf("%-12s %s %d %d (%s)\n", t2s[type], tag->image.mime, tag->image.offset, tag->image.size, imagetype(tag->image.type));
 	else
-		printf("%-12s %s\n", t2s[t], v);
+		printf("%-12s %s\n", type == Tunknown ? tag->text.k : t2s[type], tag->text.v);
 }
 
 static void
@@ -118,7 +145,6 @@
 		else{
 			if(tagsget(&ctx) != 0){
 				fprintf(stderr, "no tags or failed to read tags\n");
-				return 1;
 			}else if(image){
 				fprintf(stderr, "no images found\n");
 				return 1;
--- a/flac.c
+++ b/flac.c
@@ -7,7 +7,7 @@
 tagflac(Tagctx *ctx)
 {
 	uint8_t *d;
-	int sz, last;
+	int sz, last, type;
 	uint64_t g;
 
 	d = (uint8_t*)ctx->buf;
@@ -44,7 +44,8 @@
 			if(sz < 8+4+20 || ctx->read(ctx, d, 8) != 8) /* type, mime length */
 				return -1;
 			sz -= 8;
-			n = beuint(&d[4]);
+			type = beuint(d); /* type */
+			n = beuint(&d[4]); /* mime length */
 			mime = ctx->buf+20;
 			if(n < 0 || n >= sz-4-20 || n >= ctx->bufsz-20 || ctx->read(ctx, mime, n) != n)
 				return -1;
@@ -57,8 +58,14 @@
 			sz -= 20;
 			if((n = beuint(&d[16])) < 0)
 				return -1;
-			if(n > 0)
-				tagscallcb(ctx, Timage, "", mime, offset, n, nil);
+			if(n > 0){
+				tagscallcb(ctx, Timage, &(Tag){.image = {
+					.mime = mime,
+					.offset = offset,
+					.size = n,
+					.type = type,
+				}});
+			}
 			if(ctx->seek(ctx, sz, 1) <= 0)
 				return -1;
 		}else if((d[0] & 0x7f) == 4){ /* 4 = vorbis comment */
--- a/harness.c
+++ b/harness.c
@@ -1,6 +1,7 @@
 #include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include "tags.h"
@@ -28,9 +29,22 @@
 __AFL_FUZZ_INIT()
 
 static void
-tag(Tagctx *ctx, int t, const char *k, const char *v, int offset, int size, Tagread f)
+tag(Tagctx *ctx, int t, Tag *tag)
 {
-	USED(ctx); USED(t);  USED(k); USED(v); USED(offset); USED(size); USED(f);
+	if(t == Timage){
+		static uint8_t *buf;
+		static int bufsz;
+		int size = tag->image.size;
+		if(bufsz < size){
+			buf = realloc(buf, size);
+			bufsz = size;
+		}
+		Aux *aux = ctx->aux;
+		memcpy(buf, aux->in+tag->image.offset, size);
+		if(tag->image.decode != NULL)
+			tag->image.decode(buf, &size);
+		assert(size >= 0 && size <= bufsz);
+	}
 }
 
 static void
--- a/id3v2.c
+++ b/id3v2.c
@@ -159,11 +159,11 @@
 {
 	int n, offset;
 	char *b, *tag;
-	Tagread f;
+	Tagdecode decf;
 
 	tag = ctx->buf;
 	n = 0;
-	f = unsync ? unsyncread : nil;
+	decf = unsync ? unsyncread : nil;
 	if(strcmp((char*)d, "APIC") == 0){
 		offset = ctx->seek(ctx, 0, 1);
 		if((n = ctx->read(ctx, tag, 255)) == 255){ /* APIC mime and description should fit */
@@ -180,12 +180,20 @@
 					break;
 				}
 			}
-			tagscallcb(ctx, Timage, "APIC", b, offset+n, tsz-n, f);
-			n = 256;
+			if(tsz > n){
+				tagscallcb(ctx, Timage, &(Tag){.image = {
+					.type = b[strlen(b)+1],
+					.mime = b,
+					.offset = offset+n,
+					.size = tsz-n,
+					.decode = decf,
+				}});
+			}
+			n = 255;
 		}
 	}else if(strcmp((char*)d, "PIC") == 0){
 		offset = ctx->seek(ctx, 0, 1);
-		if((n = ctx->read(ctx, tag, 256)) == 256){ /* PIC description should fit */
+		if((n = ctx->read(ctx, tag, 255)) == 255){ /* PIC description should fit */
 			b = tag + 1; /* mime type */
 			for(n = 5; n < 253; n++){
 				if(tag[0] == 0 || tag[0] == 3){ /* one zero byte */
@@ -198,9 +206,16 @@
 					break;
 				}
 			}
-			if(tsz > n)
-				tagscallcb(ctx, Timage, "PIC", strcmp(b, "JPG") == 0 ? "image/jpeg" : "image/png", offset+n, tsz-n, f);
-			n = 256;
+			if(tsz > n){
+				tagscallcb(ctx, Timage, &(Tag){.image = {
+					.type = b[strlen(b)+1],
+					.mime = strcmp(b, "JPG") == 0 ? "image/jpeg" : "image/png",
+					.offset = offset+n,
+					.size = tsz-n,
+					.decode = decf,
+				}});
+			}
+			n = 255;
 		}
 	}else if(strcmp((char*)d, "RVA2") == 0 && tsz >= 6+5){
 		/* replay gain. 6 = "track\0", 5 = other */
--- a/m4a.c
+++ b/m4a.c
@@ -186,10 +186,16 @@
 			d[sz] = 0;
 			txtcb(ctx, type, "", d);
 			sz = 0;
-		}else if(type == Timage && dtype == 13) /* jpeg cover image */
-			tagscallcb(ctx, Timage, "", "image/jpeg", ctx->seek(ctx, 0, 1), sz, nil);
-		else if(type == Timage && dtype == 14) /* png cover image */
-			tagscallcb(ctx, Timage, "", "image/png", ctx->seek(ctx, 0, 1), sz, nil);
+		}else if(type == Timage){
+			tagscallcb(ctx, Timage, &(Tag){.image = {
+				.mime = dtype == 13 ?
+					"image/jpeg" :
+					(dtype == 14 ? "image/png" : ""),
+				.type = ITcover_front,
+				.offset = ctx->seek(ctx, 0, 1),
+				.size = sz,
+			}});
+		}
 	}
 
 	return 0;
--- a/tags.c
+++ b/tags.c
@@ -24,25 +24,24 @@
 };
 
 void
-tagscallcb(Tagctx *ctx, int type, const char *k, char *s, int offset, int size, Tagread f)
+tagscallcb(Tagctx *ctx, int type, Tag *tag)
 {
-	char *e;
+	char *s, *e;
 
-	if(f == nil && size == 0){
-		while((uint8_t)*s <= ' ' && *s)
-			s++;
+	if(type != Timage){
+		for(s = tag->text.v; (uint8_t)*s <= ' ' && *s; s++);
 		e = s + strlen(s);
 		while(e != s && (uint8_t)e[-1] <= ' ')
 			e--;
 		if(*e != 0)
-		  *e = 0;
+			*e = 0;
+		if(*s == 0)
+			return;
+		tag->text.v = s;
 	}
-	if(*s){
-		ctx->tag(ctx, type, k, s, offset, size, f);
-		if(type != Tunknown){
-			ctx->found |= 1<<type;
-		}
-	}
+	ctx->tag(ctx, type, tag);
+	if(type != Tunknown)
+		ctx->found |= 1<<type;
 }
 
 int
--- a/tags.h
+++ b/tags.h
@@ -4,8 +4,9 @@
 extern "C" {
 #endif
 
+typedef union Tag Tag;
 typedef struct Tagctx Tagctx;
-typedef int (*Tagread)(void *buf, int *cnt);
+typedef int (*Tagdecode)(void *buf, int *cnt);
 
 /* Tag type. */
 enum {
@@ -54,6 +55,57 @@
 	Fogg __attribute__((deprecated("use Fvorbis instead"))) = Fvorbis,
 };
 
+/* Image types, according to id3v2. */
+enum {
+	ITother,
+	IT32x32_file_icon,
+	ITother_file_icon,
+	ITcover_front,
+	ITcover_back,
+	ITleaflet,
+	ITmedia,
+	ITlead,
+	ITartist,
+	ITconductor,
+	ITband,
+	ITcomposer,
+	ITlyricist,
+	ITlocation,
+	ITrecording,
+	ITperformance,
+	ITmovie_capture,
+	ITfish,
+	ITillustration,
+	ITlogo_band,
+	ITlogo_publisher,
+
+	ITnum,
+};
+
+/* Tag itself, either text or image, depending on the "type" passed to the tag callback. */
+union Tag {
+	/* Any tag type except Timage */
+	struct {
+		char *k; /* "TPE1", "replaygain_album_peak" etc */
+		char *v; /* value */
+	}text;
+
+	/* Tag type Timage */
+	struct {
+		/* If not NULL, reading the image cover requires additional decoding of the data.
+		 * In that case you will need to read the raw data (specified by "offset" and "size")
+		 * and call this function on the buffer (tag->decode(offset, &size)).
+		 * "Size" will be updated with the actual image size after decoding if the function
+		 * returned 0, else there was an error.
+		 */
+		Tagdecode decode;
+		const char *mime; /* "image/png", "image/jpeg" etc */
+		int offset; /* offset from the beginning of the file */
+		int size; /* number of bytes occupied by the raw image data (not decoded) */
+		int type; /* type of the image (ITcover_front, ...) */
+	}image;
+};
+
 /* Tag parser context. You need to set it properly before parsing an audio file using libtags. */
 struct Tagctx {
 	/* Read function. This is what libtags uses to read the file. */
@@ -63,19 +115,16 @@
 	int (*seek)(Tagctx *ctx, int offset, int whence);
 
 	/* Callback that is used by libtags to inform about the tags of a file.
-	 * "type" is the tag's type (Tartist, ...) or Tunknown if libtags doesn't know how to map a tag kind to
-	 * any of these. "k" is the raw key like "TPE1", "TPE2", etc. "s" is the null-terminated string unless "type" is
-	 * Timage. "offset" and "size" define the placement and size of the image cover ("type" = Timage)
-	 * inside the file, and "f" is not NULL in case reading the image cover requires additional
-	 * operations on the data, in which case you need to read the image cover as a stream and call this
-	 * function to apply these operations on the contents read.
+	 * "type" is the tag's type (Tartist, ...) or Tunknown if libtags doesn't
+	 * know how to map a tag kind to any of these.
+	 * ANY tag type that isn't Timage is considered a text tag.
 	 */
-	void (*tag)(Tagctx *ctx, int type, const char *k, const char *s, int offset, int size, Tagread f);
+	void (*tag)(Tagctx *ctx, int type, Tag *tag);
 
 	/* Approximate millisecond-to-byte offsets within the file, if available. This callback is optional. */
 	void (*toc)(Tagctx *ctx, int ms, int offset);
 
-	/* Auxiliary data. Not used by libtags. */
+	/* Auxiliary data. Not used by libtags, left for the library user. */
 	void *aux;
 
 	/* Memory buffer to work in. */
--- a/tagspriv.h
+++ b/tagspriv.h
@@ -65,11 +65,12 @@
 /*
  * METADATA_BLOCK_PICTURE reader function.
  */
-int mbpdec(void *buf, int *cnt);
+int cbmbp(Tagctx *ctx, char *v, int ssz, int off, int picsz);
 
-void tagscallcb(Tagctx *ctx, int type, const char *k, char *s, int offset, int size, Tagread f);
+void tagscallcb(Tagctx *ctx, int type, Tag *tag);
 
-#define txtcb(ctx, type, k, s) tagscallcb(ctx, type, k, (char*)s, 0, 0, nil)
+#define txtcb(ctx, type, k_, v_) \
+	tagscallcb(ctx, type, &(Tag){.text = {.k = (k_), .v = (char*)(v_)}})
 
 int tagflac(Tagctx *ctx);
 int tagid3v1(Tagctx *ctx);
--- a/vorbis.c
+++ b/vorbis.c
@@ -43,7 +43,7 @@
 		txtcb(ctx, Tunknown, k, v);
 }
 
-int
+static int
 mbpdec(void *buf, int *cnt)
 {
 	int sz, n;
@@ -53,7 +53,7 @@
 	if((n = debase64(v, *cnt, v, *cnt)) <= 0)
 		return -1;
 
-	beuint(v); /* id3v2 APIC type */
+	/* skip id3v2 APIC type */
 	v += 4; n -= 4;
 	sz = beuint(v); /* mime size */
 	v += 4; n -= 4;
@@ -77,11 +77,48 @@
 }
 
 int
+cbmbp(Tagctx *ctx, char *v, int ssz, int off, int picsz)
+{
+	char *mime;
+	int type, n, sz;
+
+	n = ssz; /* at most this amount is available */
+	n &= ~3; /* modulo 4 sextets, so debase64 gets complete bytes */
+	n = debase64((uint8_t*)v, n, (uint8_t*)ctx->buf, ctx->bufsz);
+	/* https://xiph.org/flac/format.html#metadata_block_picture */
+	if(n <= 4+4+0+4+0+4+4+4+4+4+0)
+		return 0;
+	v = ctx->buf;
+	type = beuint(v); /* id3v2 APIC type */
+	v += 4; n -= 4;
+	sz = beuint(v); /* mime size */
+	v += 4; n -= 4;
+	if(sz < 0 || sz >= n-4-4-4-4-4-4)
+		return -1;
+	mime = v;
+	v += sz; n -= sz; /* skip MIME */
+	sz = beuint(v); /* description size */
+	v += 4; n -= 4;
+	if(sz < 0 || sz >= n-4-4-4-4-4)
+		return -1;
+	*v = 0; /* null-terminate MIME */
+	tagscallcb(ctx, Timage, &(Tag){.image = {
+		.mime = mime,
+		.offset = off,
+		.size = picsz,
+		.type = type,
+		.decode = mbpdec,
+	}});
+
+	return 0;
+}
+
+int
 tagvorbis(Tagctx *ctx)
 {
-	char *v, *mime;
+	char *v;
 	uint8_t *d, h[4];
-	int sz, picsz, numtags, i, npages, pgend, skip, off, n;
+	int sz, numtags, i, npages, pgend, skip;
 
 	d = (uint8_t*)ctx->buf;
 	/* need to find vorbis frame with type=3 */
@@ -148,32 +185,13 @@
 			*v++ = 0;
 			if(strcasecmp(ctx->buf, "metadata_block_picture") != 0)
 				cbvorbiscomment(ctx, ctx->buf, v);
-			else{
-				/* off and picsz will point at the base64-encoded picture block */
-				off = ctx->seek(ctx, 0, 1) - sz + (v - ctx->buf);
-				picsz = sz + skip - (v - ctx->buf);
-				n = sz - (v - ctx->buf); /* at most this amount is available */
-				n &= ~3; /* modulo 4 sextets, so debase64 gets complete bytes */
-				n = debase64((uint8_t*)v, n, (uint8_t*)ctx->buf, ctx->bufsz);
-				/* https://xiph.org/flac/format.html#metadata_block_picture */
-				if(n > 4+4+0+4+0+4+4+4+4+4+0){
-					v = ctx->buf;
-					beuint(v); /* id3v2 APIC type */
-					v += 4; n -= 4;
-					sz = beuint(v); /* mime size */
-					v += 4; n -= 4;
-					if(sz < 0 || sz >= n-4-4-4-4-4-4)
-						return -1;
-					mime = v;
-					v += sz; n -= sz; /* skip MIME */
-					sz = beuint(v); /* description size */
-					v += 4; n -= 4;
-					if(sz < 0 || sz >= n-4-4-4-4-4)
-						return -1;
-					*v = 0; /* null-terminate MIME */
-					tagscallcb(ctx, Timage, "", mime, off, picsz, mbpdec);
-				}
-			}
+			else if(cbmbp(ctx, v,
+					sz - (v - ctx->buf), /* at most this amount is available */
+					ctx->seek(ctx, 0, 1) - sz + (v - ctx->buf), /* offset */
+					sz + skip - (v - ctx->buf) /* total pic size (still encoded) */
+					) != 0)
+				return -1;
+
 			if(ctx->seek(ctx, skip, 1) < 0)
 				return -1;
 		}