ref: 97218f13586cad7be42bdd1ce28072d10df1c2f9
author: Sigrid Haflínudóttir <[email protected]>
date: Thu Aug 20 12:47:18 EDT 2020
just put it out
--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,2 @@
+*.[678]
+[678].*
--- /dev/null
+++ b/README.md
@@ -1,0 +1,3 @@
+# PDF as a file system
+
+Yeah. WIP.
--- /dev/null
+++ b/TODO
@@ -1,0 +1,1 @@
+pdfstring: octal char parsing not implemented
--- /dev/null
+++ b/filter.c
@@ -1,0 +1,18 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+/*
+7.4
+
+ASCIIHex
+ASCII85
+LZW
+Flate
+RunLength
+CCITTFax
+JBIG2
+DCT
+JPX
+Crypt
+*/
--- /dev/null
+++ b/main.c
@@ -1,0 +1,21 @@
+#include <u.h>
+#include <libc.h>
+
+void
+main(int argc, char **argv)
+{
+ USED(argc); USED(argv);
+
+ quotefmtinstall();
+
+#ifdef TEST
+#define T(x) \
+ void x(void); \
+ x();
+
+ T(test_pdfstring);
+ T(test_pdfname);
+#endif
+
+ exits(nil);
+}
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,16 @@
+</$objtype/mkfile
+
+CFLAGS=$CFLAGS -DTEST
+
+TARG=pdfs
+
+OFILES=\
+ filter.$O\
+ main.$O\
+ name.$O\
+ pdfs.$O\
+ string.$O\
+
+default:V: all
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/name.c
@@ -1,0 +1,102 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+int
+pdfname(char *p, char **e, int len)
+{
+ int i, o;
+
+ if(len < 2){
+ werrstr("too short");
+ goto err;
+ }
+
+ if(p[0] != '/'){
+ werrstr("invalid first char");
+ goto err;
+ }
+
+ for(i = 1, o = 0; i < len; i++){
+ if(p[i] < '!' || p[i] > '~'){
+ werrstr("invalid char");
+ goto err;
+ }
+ if(p[i] == '#'){
+ i++;
+ if(i+2 > len){
+ werrstr("hex too short");
+ goto err;
+ }
+ if(dec16((uchar*)p+o, 1, p+i, 2) != 1){
+ werrstr("invalid hex");
+ goto err;
+ }
+ o++;
+ i++;
+ }else{
+ p[o++] = p[i];
+ }
+ }
+
+ p[o] = 0;
+ *e = p + i;
+
+ return o;
+err:
+ werrstr("name: %r");
+ return -1;
+}
+
+#ifdef TEST
+static struct {
+ char *i;
+ int len;
+ char *o;
+ int r;
+ int e;
+}t[] = {
+ {"/SimpleName", 11, "SimpleName", 10},
+ {"WrongName", 9, nil, -1},
+ {"/.$()", 5, ".$()", 4},
+ {"/#30", 4, "0", 1},
+ {"/#3", 3, nil, -1},
+ {"/#G0", 4, nil, -1},
+ {"/#", 2, nil, -1},
+ {"/Wrong Char", 11, nil, -1},
+ {"/\xff", 2, nil, -1},
+};
+
+void
+test_pdfname(void)
+{
+ char *e;
+ int i, r;
+
+ fprint(2, "pdfname\n");
+ for(i = 0; i < nelem(t); i++){
+ fprint(2, "\t%d: ", i);
+ r = pdfname(t[i].i, &e, t[i].len);
+ if(r != t[i].r){
+ fprint(2, "expected r=%d, got %d", t[i].r, r);
+ if(r < 0)
+ fprint(2, " (%r)\n");
+ else
+ fprint(2, "\n");
+ continue;
+ }else if(r >= 0){
+ if(t[i].i+t[i].len != e){
+ fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
+ continue;
+ }else if(strcmp(t[i].o, t[i].i) != 0){
+ fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
+ continue;
+ }
+ }
+ fprint(2, "OK");
+ if(r < 0)
+ fprint(2, " (%r)");
+ fprint(2, "\n");
+ }
+}
+#endif
--- /dev/null
+++ b/pdf.c
@@ -1,0 +1,5 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+static char whitespace[] = {0x00, 0x09, 0x10, 0x0c, 0x0d, 0x20};
--- /dev/null
+++ b/pdf.h
@@ -1,0 +1,44 @@
+enum {
+ Obool, /* 7.3.2 */
+ Onum, /* 7.3.3 */
+ Ostr, /* 7.3.4 */
+ Oname, /* 7.3.5 */
+ Oarray, /* 7.3.6 */
+ Odict, /* 7.3.7 */
+ Ostream, /* 7.3.8 */
+ Onull, /* 7.3.9 */
+ Oindir, /* 7.3.10 */
+};
+
+typedef struct Object Object;
+
+struct Object {
+ int type;
+ union {
+ int bool;
+ double num;
+ char *str;
+ struct {
+ int id;
+ int gen;
+ }indir;
+ };
+};
+
+/*
+ * 7.3.4 String Objects
+ *
+ * Rewrites the string in place with null termination and returns the
+ * length in bytes, without the null terminator.
+ * Returns < 0 if parsing failed.
+ * (*e) is advanced to the position after the string pointed by (p).
+ */
+int pdfstring(char *p, char **e, int len);
+
+/*
+ * 7.3.5 Name Objects
+ *
+ * Works the same way as pdfstring, but for name objects.
+ */
+int
+pdfname(char *p, char **e, int len);
--- /dev/null
+++ b/string.c
@@ -1,0 +1,182 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+static int esc[] = {
+ ['n'] = '\n',
+ ['r'] = '\r',
+ ['t'] = '\t',
+ ['b'] = '\b',
+ ['f'] = '\f',
+ ['('] = '(',
+ [')'] = ')',
+ ['\\'] = '\\',
+ ['\n'] = -1,
+};
+
+static int
+pdfstringhex(char *p, char **e, int len)
+{
+ int i;
+
+ for(i = 1; i < len; i += 1){
+ if(p[i] == '>')
+ break;
+ }
+ if(i >= len){
+ werrstr("hex not closed");
+ return -1;
+ }
+ p[i] = '0'; /* the final zero may be missing */
+ *e = p+i+1;
+ i = dec16((uchar*)p, i/2, p+1, i) == i/2 ? i/2 : -1;
+ if(i < 0)
+ werrstr("invalid hex");
+ p[i] = 0;
+ return i;
+}
+
+int
+pdfstring(char *p, char **e, int len)
+{
+ Rune r;
+ int c, i, o, n, paren;
+
+ if(len < 2){
+ werrstr("too short");
+ goto err;
+ }
+
+ paren = 0;
+ for(i = o = 0; i < len;){
+ if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
+ werrstr("rune error at byte %d", n);
+ return -1;
+ }
+
+ if(i == 0){
+ if(r == '('){
+ paren = 1;
+ i++;
+ continue;
+ }
+ if(r == '<'){
+ len = pdfstringhex(p, e, len);
+ if(len < 0)
+ goto err;
+ return len;
+ }
+ werrstr("invalid first char");
+ goto err;
+ }
+
+ if(r == '\\'){
+ if(++i >= len){
+ werrstr("escaped char out of string len");
+ goto err;
+ }
+ if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
+ werrstr("rune error at byte %d", i);
+ goto err;
+ }
+ if(r >= '0' && r <= '9'){ /* octal */
+ werrstr("octal chars not implemented"); /* FIXME */
+ goto err;
+ }
+ if(r >= nelem(esc) || (c = esc[r]) == 0){
+ werrstr("unknown escape char at byte %d", i);
+ goto err;
+ }
+ r = c;
+ i += n;
+ if(c < 0)
+ continue;
+ }else if(r == '('){
+ paren++;
+ i++;
+ continue;
+ }else if(r == ')'){
+ paren--;
+ i++;
+ if(paren == 0)
+ break;
+ continue;
+ }else{
+ i += n;
+ }
+
+ o += runetochar(p+o, &r);
+ }
+
+ if(paren > 0){
+ werrstr("non-closed paren");
+ goto err;
+ }
+
+ p[o] = 0;
+ *e = p + i;
+
+ return o;
+err:
+ werrstr("string: %r");
+ return -1;
+}
+
+#ifdef TEST
+static struct {
+ char *i;
+ int len;
+ char *o;
+ int r;
+ int e;
+}t[] = {
+ {"(simple string)", 15, "simple string", 13},
+ {"(non-closed paren", 17, nil, -1},
+ {"wrong first char", 16, nil, -1},
+ {"(parens((()((())))()))", 22, "parens", 6},
+ {"()", 2, "", 0},
+ {")", 1, nil, -1},
+ {"(\\)\\()", 6, ")(", 2},
+ {"(\\\\)", 4, "\\", 1},
+ {"a", 1, nil, -1},
+ {"(1\\\n2)", 6, "12", 2},
+ {"<323130>", 8, "210", 3},
+ {"<32313>", 7, "210", 3},
+ {"<>", 2, "", 0},
+ {"<", 1, nil, -1},
+ {"<zz>", 4, nil, -1},
+};
+
+void
+test_pdfstring(void)
+{
+ char *e;
+ int i, r;
+
+ fprint(2, "pdfstring\n");
+ for(i = 0; i < nelem(t); i++){
+ fprint(2, "\t%d: ", i);
+ r = pdfstring(t[i].i, &e, t[i].len);
+ if(r != t[i].r){
+ fprint(2, "expected r=%d, got %d", t[i].r, r);
+ if(r < 0)
+ fprint(2, " (%r)\n");
+ else
+ fprint(2, "\n");
+ continue;
+ }else if(r >= 0){
+ if(t[i].i+t[i].len != e){
+ fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
+ continue;
+ }else if(strcmp(t[i].o, t[i].i) != 0){
+ fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
+ continue;
+ }
+ }
+ fprint(2, "OK");
+ if(r < 0)
+ fprint(2, " (%r)");
+ fprint(2, "\n");
+ }
+}
+#endif