shithub: pdffs

Download patch

ref: 97218f13586cad7be42bdd1ce28072d10df1c2f9
author: Sigrid Haflínudóttir <[email protected]>
date: Thu Aug 20 12:47:18 EDT 2020

just put it out

--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,2 @@
+*.[678]
+[678].*
--- /dev/null
+++ b/README.md
@@ -1,0 +1,3 @@
+# PDF as a file system
+
+Yeah. WIP.
--- /dev/null
+++ b/TODO
@@ -1,0 +1,1 @@
+pdfstring: octal char parsing not implemented
--- /dev/null
+++ b/filter.c
@@ -1,0 +1,18 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+/*
+7.4
+
+ASCIIHex
+ASCII85
+LZW
+Flate
+RunLength
+CCITTFax
+JBIG2
+DCT
+JPX
+Crypt
+*/
--- /dev/null
+++ b/main.c
@@ -1,0 +1,21 @@
+#include <u.h>
+#include <libc.h>
+
+void
+main(int argc, char **argv)
+{
+	USED(argc); USED(argv);
+
+	quotefmtinstall();
+
+#ifdef TEST
+#define T(x) \
+	void x(void); \
+	x();
+
+	T(test_pdfstring);
+	T(test_pdfname);
+#endif
+
+	exits(nil);
+}
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,16 @@
+</$objtype/mkfile
+
+CFLAGS=$CFLAGS -DTEST
+
+TARG=pdfs
+
+OFILES=\
+	filter.$O\
+	main.$O\
+	name.$O\
+	pdfs.$O\
+	string.$O\
+
+default:V:	all
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/name.c
@@ -1,0 +1,102 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+int
+pdfname(char *p, char **e, int len)
+{
+	int i, o;
+
+	if(len < 2){
+		werrstr("too short");
+		goto err;
+	}
+
+	if(p[0] != '/'){
+		werrstr("invalid first char");
+		goto err;
+	}
+
+	for(i = 1, o = 0; i < len; i++){
+		if(p[i] < '!' || p[i] > '~'){
+			werrstr("invalid char");
+			goto err;
+		}
+		if(p[i] == '#'){
+			i++;
+			if(i+2 > len){
+				werrstr("hex too short");
+				goto err;
+			}
+			if(dec16((uchar*)p+o, 1, p+i, 2) != 1){
+				werrstr("invalid hex");
+				goto err;
+			}
+			o++;
+			i++;
+		}else{
+			p[o++] = p[i];
+		}
+	}
+
+	p[o] = 0;
+	*e = p + i;
+
+	return o;
+err:
+	werrstr("name: %r");
+	return -1;
+}
+
+#ifdef TEST
+static struct {
+	char *i;
+	int   len;
+	char *o;
+	int   r;
+	int   e;
+}t[] = {
+	{"/SimpleName",            11, "SimpleName", 10},
+	{"WrongName",               9, nil,          -1},
+	{"/.$()",                   5, ".$()",        4},
+	{"/#30",                    4, "0",           1},
+	{"/#3",                     3, nil,          -1},
+	{"/#G0",                    4, nil,          -1},
+	{"/#",                      2, nil,          -1},
+	{"/Wrong Char",            11, nil,          -1},
+	{"/\xff",                   2, nil,          -1},
+};
+
+void
+test_pdfname(void)
+{
+	char *e;
+	int i, r;
+
+	fprint(2, "pdfname\n");
+	for(i = 0; i < nelem(t); i++){
+		fprint(2, "\t%d: ", i);
+		r = pdfname(t[i].i, &e, t[i].len);
+		if(r != t[i].r){
+			fprint(2, "expected r=%d, got %d", t[i].r, r);
+			if(r < 0)
+				fprint(2, " (%r)\n");
+			else
+				fprint(2, "\n");
+			continue;
+		}else if(r >= 0){
+			if(t[i].i+t[i].len != e){
+				fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
+				continue;
+			}else if(strcmp(t[i].o, t[i].i) != 0){
+				fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
+				continue;
+			}
+		}
+		fprint(2, "OK");
+		if(r < 0)
+			fprint(2, " (%r)");
+		fprint(2, "\n");
+	}
+}
+#endif
--- /dev/null
+++ b/pdf.c
@@ -1,0 +1,5 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+static char whitespace[] = {0x00, 0x09, 0x10, 0x0c, 0x0d, 0x20};
--- /dev/null
+++ b/pdf.h
@@ -1,0 +1,44 @@
+enum {
+	Obool,   /* 7.3.2 */
+	Onum,    /* 7.3.3 */
+	Ostr,    /* 7.3.4 */
+	Oname,   /* 7.3.5 */
+	Oarray,  /* 7.3.6 */
+	Odict,   /* 7.3.7 */
+	Ostream, /* 7.3.8 */
+	Onull,   /* 7.3.9 */
+	Oindir,  /* 7.3.10 */
+};
+
+typedef struct Object Object;
+
+struct Object {
+	int type;
+	union {
+		int bool;
+		double num;
+		char *str;
+		struct {
+			int id;
+			int gen;
+		}indir;
+	};
+};
+
+/*
+ * 7.3.4 String Objects
+ *
+ * Rewrites the string in place with null termination and returns the
+ * length in bytes, without the null terminator.
+ * Returns < 0 if parsing failed.
+ * (*e) is advanced to the position after the string pointed by (p).
+ */
+int pdfstring(char *p, char **e, int len);
+
+/*
+ * 7.3.5 Name Objects
+ *
+ * Works the same way as pdfstring, but for name objects.
+ */
+int
+pdfname(char *p, char **e, int len);
--- /dev/null
+++ b/string.c
@@ -1,0 +1,182 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+static int esc[] = {
+	['n'] = '\n',
+	['r'] = '\r',
+	['t'] = '\t',
+	['b'] = '\b',
+	['f'] = '\f',
+	['('] = '(',
+	[')'] = ')',
+	['\\'] = '\\',
+	['\n'] = -1,
+};
+
+static int
+pdfstringhex(char *p, char **e, int len)
+{
+	int i;
+
+	for(i = 1; i < len; i += 1){
+		if(p[i] == '>')
+			break;
+	}
+	if(i >= len){
+		werrstr("hex not closed");
+		return -1;
+	}
+	p[i] = '0'; /* the final zero may be missing */
+	*e = p+i+1;
+	i = dec16((uchar*)p, i/2, p+1, i) == i/2 ? i/2 : -1;
+	if(i < 0)
+		werrstr("invalid hex");
+	p[i] = 0;
+	return i;
+}
+
+int
+pdfstring(char *p, char **e, int len)
+{
+	Rune r;
+	int c, i, o, n, paren;
+
+	if(len < 2){
+		werrstr("too short");
+		goto err;
+	}
+
+	paren = 0;
+	for(i = o = 0; i < len;){
+		if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
+			werrstr("rune error at byte %d", n);
+			return -1;
+		}
+
+		if(i == 0){
+			if(r == '('){
+				paren = 1;
+				i++;
+				continue;
+			}
+			if(r == '<'){
+				len = pdfstringhex(p, e, len);
+				if(len < 0)
+					goto err;
+				return len;
+			}
+			werrstr("invalid first char");
+			goto err;
+		}
+
+		if(r == '\\'){
+			if(++i >= len){
+				werrstr("escaped char out of string len");
+				goto err;
+			}
+			if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
+				werrstr("rune error at byte %d", i);
+				goto err;
+			}
+			if(r >= '0' && r <= '9'){ /* octal */
+				werrstr("octal chars not implemented"); /* FIXME */
+				goto err;
+			}
+			if(r >= nelem(esc) || (c = esc[r]) == 0){
+				werrstr("unknown escape char at byte %d", i);
+				goto err;
+			}
+			r = c;
+			i += n;
+			if(c < 0)
+				continue;
+		}else if(r == '('){
+			paren++;
+			i++;
+			continue;
+		}else if(r == ')'){
+			paren--;
+			i++;
+			if(paren == 0)
+				break;
+			continue;
+		}else{
+			i += n;
+		}
+
+		o += runetochar(p+o, &r);
+	}
+
+	if(paren > 0){
+		werrstr("non-closed paren");
+		goto err;
+	}
+
+	p[o] = 0;
+	*e = p + i;
+
+	return o;
+err:
+	werrstr("string: %r");
+	return -1;
+}
+
+#ifdef TEST
+static struct {
+	char *i;
+	int   len;
+	char *o;
+	int   r;
+	int   e;
+}t[] = {
+	{"(simple string)",        15, "simple string", 13},
+	{"(non-closed paren",      17, nil,             -1},
+	{"wrong first char",       16, nil,             -1},
+	{"(parens((()((())))()))", 22, "parens",         6},
+	{"()",                      2, "",               0},
+	{")",                       1, nil,             -1},
+	{"(\\)\\()",                6, ")(",             2},
+	{"(\\\\)",                  4, "\\",             1},
+	{"a",                       1, nil,             -1},
+	{"(1\\\n2)",                6, "12",             2},
+	{"<323130>",                8, "210",            3},
+	{"<32313>",                 7, "210",            3},
+	{"<>",                      2, "",               0},
+	{"<",                       1, nil,             -1},
+	{"<zz>",                    4, nil,             -1},
+};
+
+void
+test_pdfstring(void)
+{
+	char *e;
+	int i, r;
+
+	fprint(2, "pdfstring\n");
+	for(i = 0; i < nelem(t); i++){
+		fprint(2, "\t%d: ", i);
+		r = pdfstring(t[i].i, &e, t[i].len);
+		if(r != t[i].r){
+			fprint(2, "expected r=%d, got %d", t[i].r, r);
+			if(r < 0)
+				fprint(2, " (%r)\n");
+			else
+				fprint(2, "\n");
+			continue;
+		}else if(r >= 0){
+			if(t[i].i+t[i].len != e){
+				fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
+				continue;
+			}else if(strcmp(t[i].o, t[i].i) != 0){
+				fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
+				continue;
+			}
+		}
+		fprint(2, "OK");
+		if(r < 0)
+			fprint(2, " (%r)");
+		fprint(2, "\n");
+	}
+}
+#endif