ref: df075d407417a84a3dacf44044fd0907b295f39e
dir: /pdf.c/
#include <u.h> #include <libc.h> #include <bio.h> #include <ctype.h> #include "pdf.h" /* * pre-1.5 xref section reader * PDF>=1.5 may have BOTH (or either) old xref format and xref streams */ static int xrefread(Pdf *pdf, int xref0, int nxref) { int i, j, sz, n, newnxref; Xref xref; char *s, *e; Xref *x; s = nil; if((x = realloc(pdf->xref, (pdf->nxref + nxref)*sizeof(Xref))) == nil) goto err; pdf->xref = x; /* read the entire thing at once */ sz = nxref*20; if((s = malloc(sz)) == nil) goto err; for(i = 0; i < sz; i += n){ if((n = Bread(pdf->bio, s+i, sz-i)) < 1) goto err; } /* store non-free objects only */ newnxref = pdf->nxref; for(e = s, i = 0; i < nxref; i++, e += 20){ if(!isspace(e[10]) || !isspace(e[18]) || !isspace(e[19])){ werrstr("invalid xref line (%d/%d)", i, nxref); goto err; } xref.id = xref0 + i; xref.off = strtoul(e, nil, 10); /* search in already existing xrefs, update if found */ for(j = 0; j < pdf->nxref; j++){ if(pdf->xref[j].id != xref.id) continue; if(e[17] == 'f') /* it was freed */ pdf->xref[j].id = 0; else if(e[17] == 'n') pdf->xref[j].off = xref.off; break; } if(j >= pdf->nxref && e[17] == 'n') /* that's a new one, insert unless it's free */ pdf->xref[newnxref++] = xref; } free(s); s = nil; /* scale down */ for(i = j = 0; i < newnxref; i++){ if(pdf->xref[i].id != 0) pdf->xref[j++] = pdf->xref[i]; } if((x = realloc(pdf->xref, j*sizeof(Xref))) == nil) goto err; pdf->xref = x; pdf->nxref = j; return 0; err: free(s); return -1; } static int trailerread(Pdf *pdf) { Object *o; if((o = pdfobj(pdf, pdf->bio)) == nil) goto err; if(o->type != Odict){ werrstr("isn't a dictionary"); goto err; } pdf->root = pdfref(dictget(o, "Root")); pdf->info = pdfref(dictget(o, "Info")); pdfobjfree(o); return 0; err: pdfobjfree(o); return -1; } Pdf * pdfopen(int fd) { Pdf *pdf; Biobuf *b; Object *o; char tmp[64], *s, *x; int xref0; /* 7.5.4 xref subsection first object number */ int nxref; /* 7.5.4 xref subsection number of objects */ int xreftb; /* 7.5.4 xref table offset from the beginning of the file */ int i, n, off, w[3]; Stream *stream; fmtinstall('T', Tfmt); b = nil; o = nil; if((pdf = calloc(1, sizeof(*pdf))) == nil || (b = Bfdopen(fd, OREAD)) == nil) goto err; pdf->bio = b; /* check header */ if(Bread(b, tmp, 8) != 8 || strncmp(tmp, "%PDF-", 5) != 0 || !isdigit(tmp[5]) || tmp[6] != '.' || !isdigit(tmp[7])){ werrstr("not a pdf"); goto err; } /* 7.5.4, 7.5.8 xref table */ /* read a block of data */ n = sizeof(tmp)-1; Bseek(b, -n, 2); if(Bread(b, tmp, n) != n){ badtrailer: werrstr("invalid trailer"); goto err; } tmp[n] = 0; /* search for a valid string that the block ends with */ for(i = n-1, s = &tmp[i]; i > 0 && *s != 0; i--, s--); s++; /* find "startxref" */ if((x = strrchr(s, 'f')) == nil || !isws(x[1]) || x-8 < s+1 || memcmp(x-8, "startxref", 9) != 0) goto badtrailer; x++; if((xreftb = strtol(x, nil, 10)) < 1) goto badtrailer; /* read xref */ if(Bseek(b, xreftb, 0) != xreftb){ werrstr("xref position out of range"); goto err; } morexref: off = Bseek(b, 0, 1); n = sizeof(tmp)-1; if((n = Bread(b, tmp, n)) < 16){ badxref: werrstr("invalid xref: %r"); goto err; } tmp[n] = 0; if(memcmp(tmp, "xref", 4) == 0){ /* 7.5.4 xref */ x = tmp+4; nxref = -1; if((xref0 = strtol(x, &x, 10)) < 0 || (nxref = strtol(x, &x, 10)) < 1){ werrstr("xref0=%d nxref=%d", xref0, nxref); goto badxref; } /* skip whitespace and move to the first subsection */ for(; isws(*x) && x < tmp+n; x++); n = x-tmp+off; if(Bseek(b, n, 0) != n || xrefread(pdf, xref0, nxref) != 0) goto badxref; goto morexref; /* there could be more updates, try it */ }else if(memcmp(tmp, "trailer", 7) == 0){ /* 7.5.5 file trailer */ /* move to the trailer dictionary */ n = off + 8; if(Bseek(b, n, 0) != n || trailerread(pdf) != 0){ werrstr("invalid trailer: %r"); goto err; } }else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */ Bseek(b, xreftb, 0); if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(o)) == nil){ werrstr("failed to stream xref: %r"); goto badxref; } if(dictints(o, "W", w, nelem(w)) != 3){ werrstr("W isn't 3 elements"); goto badxref; } streamclose(stream); pdf->root = dictget(o, "Root"); pdf->info = dictget(o, "Info"); } /* root is required, info is optional */ if(pdf->root == nil){ werrstr("no root"); goto err; } return pdf; err: werrstr("pdfopen: %r [at %p]", (void*)Boffset(b)); pdfclose(pdf); pdfobjfree(o); return nil; } void pdfclose(Pdf *pdf) { if(pdf == nil) return; if(pdf->bio != nil) Bterm(pdf->bio); free(pdf->xref); free(pdf); }