ref: e864dc493153ca5083018c94a3737165613ef0d5
parent: 17b3ae4f1a1ce71b26bd2d383c9be513af29bb66
author: sirjofri <[email protected]>
date: Thu Jul 11 11:29:18 EDT 2024
adds first libxpath functionality
--- a/README
+++ b/README
@@ -10,6 +10,10 @@
- xb:
/$objtype/bin/xb
/sys/man/1/xb
+- libxpath: (not from 9atom)
+ /sys/include/xpath.h
+ /$objtype/lib/libxpath.a
+ /sys/man/2/xpath
Installation:
@@ -21,3 +25,17 @@
mk nuke
This will remove all the installed files.
+
+
+Libxpath
+
+currently supported rules:
+
+- root path: /path/from/root
+- attribute path: /path/to/@attribute
+- text path: /path/to/text()
+- attribute filter: /path/to[@attribute='value']/filtered
+- select all path: /path/to//all/children
+- numbered element: /path/to/second[2]/element
+
+There are probably bugs.
--- /dev/null
+++ b/libxpath/mkfile
@@ -1,0 +1,12 @@
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libxpath.a
+
+OFILES=\
+ xmllookpath.$O\
+
+HFILES=\
+ /sys/include/xml.h\
+ /sys/include/xpath.h\
+
+</sys/src/cmd/mksyslib
--- /dev/null
+++ b/libxpath/xmllookpath.c
@@ -1,0 +1,301 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+#include <regexp.h>
+
+Reprog *fattr = nil;
+Reprog *fnum = nil;
+Reprog *fattrend = nil;
+
+static int
+attrmatches(Elem *e, char *attr, char *value)
+{
+ Attr *a;
+ for (a = e->attrs; a; a = a->next) {
+ if (strcmp(a->name, attr) == 0
+ && strcmp(a->value, value) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static int
+bufsize(int m)
+{
+ int b = 32;
+ return (m/b + 1) * b;
+}
+
+static void
+dbgprintnode(Elem *e)
+{
+ Attr *a;
+ fprint(2, "<%s", e->name);
+ for (a = e->attrs; a; a = a->next)
+ fprint(2, " %s='%s'", a->name, a->value);
+ fprint(2, " />");
+}
+
+static void
+appendresult(XpResult *a, XpResult b)
+{
+ int n;
+
+ if (b.num < 1)
+ return;
+ if (!a->type) {
+ *a = b;
+ goto Out;
+ }
+ if (a->type != b.type)
+ sysfatal("error: incompatible type");
+ n = a->num + b.num;
+ switch (a->type) {
+ case XTelem:
+ if (n >= a->size) {
+ a->elems = realloc(a->elems, bufsize(n) * sizeof(Elem*));
+ }
+ memcpy(&a->elems[a->num], b.elems, b.num * sizeof(Elem*));
+ a->num = n;
+ free(b.elems);
+ break;
+ case XTstring:
+ if (n >= a->size) {
+ a->strings = realloc(a->strings, bufsize(n) * sizeof(char*));
+ }
+ memcpy(&a->strings[a->num], b.strings, b.num * sizeof(char*));
+ a->num = n;
+ free(b.strings);
+ break;
+ }
+
+Out:
+ if (xmldebug) {
+ fprint(2, "appendresult:\n");
+ fprint(2, " type: %s\n", a->type == XTelem ? "elems" : "string");
+ switch (a->type) {
+ case XTelem:
+ for (n = 0; n < a->num; n++) {
+ fprint(2, " e: ");
+ dbgprintnode(a->elems[n]);
+ fprint(2, "\n");
+ }
+ break;
+ case XTstring:
+ for (n = 0; n < a->num; n++) {
+ fprint(2, " s: %s\n", a->strings[n]);
+ }
+ }
+ }
+}
+
+static void
+buildsinglestring(XpResult *a, char *s)
+{
+ a->type = XTstring;
+ a->num = a->size = 1;
+ a->strings = malloc(sizeof(char*));
+ a->strings[0] = s;
+}
+
+static void
+buildsingleelem(XpResult *a, Elem *e)
+{
+ a->type = XTelem;
+ a->num = a->size = 1;
+ a->elems = malloc(sizeof(Elem*));
+ a->elems[0] = e;
+}
+
+static char*
+catchallpath(char *path, char *new, int catchall)
+{
+ if (!catchall)
+ return path;
+ path--;
+ *path = '/';
+ path--;
+ *path = '/';
+ if (new) {
+ new--;
+ *new = '/';
+ }
+ return path;
+}
+
+/*
+ * search for element using XPath, starting at ep.
+ */
+XpResult
+xmllookpath(Elem *ep, char *path)
+{
+ Resub match[3];
+ Elem *el, *rel;
+ Attr *a;
+ char *attr, *val;
+ char *new;
+ int id, i;
+ int isroot;
+ char *s;
+ XpResult r, nr, mr;
+ int catchall;
+ int newcatchall;
+
+ if (!fattr)
+ fattr = regcomp("\\[@(.+)=\\'(.+)\\'\\]");
+ if (!fnum)
+ fnum = regcomp("\\[([0-9]+)\\]");
+ if (!fattrend)
+ fattrend = regcomp("@(.+)$");
+
+ if (xmldebug) {
+ fprint(2, "xmllookpath: %s %s\n", ep->name, path);
+ }
+
+ memset(&r, 0, sizeof(XpResult));
+
+ if (!path || !*path) {
+ if (xmldebug)
+ fprint(2, " final, return %s\n", ep->name);
+ buildsingleelem(&r, ep);
+ return r;
+ }
+
+ /* handle starting '/' as document root and '//' as catchall */
+ isroot = 0;
+ catchall = 0;
+ if (path[0] == '/') {
+ if (path[1] == '/') {
+ /* catchall */
+ catchall = 1;
+ path += 2;
+ } else {
+ /* root */
+ isroot = 1;
+ path++;
+ }
+ }
+ if (isroot) {
+ while (ep->parent)
+ ep = ep->parent;
+ }
+
+ newcatchall = 0;
+ new = strchr(catchall ? path + 2 : path, '/');
+ if (new) {
+ *new = 0;
+ new++;
+ if (new[0] == '/') {
+ newcatchall = 1;
+ new++;
+ }
+ }
+
+ if (xmldebug) {
+ fprint(2, " query is root: %d\n", isroot);
+ fprint(2, " query is catchall: %d\n", catchall);
+ fprint(2, " query is newcatchall: %d\n", newcatchall);
+ fprint(2, " testing path part: %s\n", path);
+ fprint(2, " new path part: %s\n", new);
+ }
+
+ if (catchall) {
+ if (xmldebug)
+ fprint(2, " rule catchall matches: %s\n", path);
+ for (el = ep->child; el; el = el->next) {
+ nr = xmllookpath(el, path);
+ if (nr.type) {
+ if (xmldebug)
+ fprint(2, " found element\n");
+ for (i = 0; i < nr.num; i++) {
+ appendresult(&r, xmllookpath(nr.elems[i], new));
+ }
+ free(nr.elems);
+ continue;
+ }
+ if (xmldebug)
+ fprint(2, " found child element\n");
+ appendresult(&r, xmllookpath(el, catchallpath(path, new, catchall)));
+ }
+ return r;
+ }
+ memset(match, 0, 3*sizeof(Resub));
+ if (regexec(fattr, path, match, 3)) {
+ if (xmldebug)
+ fprint(2, " rule [a=b] matches: %s\n", path);
+ *match[0].sp = 0;
+ attr = match[1].sp;
+ *match[1].ep = 0;
+ val = match[2].sp;
+ *match[2].ep = 0;
+
+ for (el = ep->child; el; el = el->next) {
+ if (!attrmatches(el, attr, val))
+ continue;
+ appendresult(&r, xmllookpath(el, new));
+ }
+ return r;
+ }
+ memset(match, 0, 3*sizeof(Resub));
+ if (regexec(fnum, path, match, 3)) {
+ if (xmldebug)
+ fprint(2, " rule [n] matches: %s\n", path);
+ *match[0].sp = 0;
+ *match[1].ep = 0;
+ id = atoi(match[1].sp);
+
+ i = 0;
+ for (el = ep->child; el; el = el->next) {
+ if (strcmp(el->name, path) != 0)
+ continue;
+ i++;
+ if (i == id) {
+ return xmllookpath(el, new);
+ }
+ }
+ return r;
+ }
+ memset(match, 0, 3*sizeof(Resub));
+ if (regexec(fattrend, path, match, 3)) {
+ if (xmldebug)
+ fprint(2, " rule @attr matches: %s - %s\n", ep->name, path);
+ *match[1].ep = 0;
+ attr = match[1].sp;
+ for (a = ep->attrs; a; a = a->next) {
+ if (strcmp(a->name, attr) != 0)
+ continue;
+ buildsinglestring(&r, a->value);
+ if (xmldebug)
+ fprint(2, " value: %s\n", a->value);
+ return r;
+ }
+ if (xmldebug)
+ fprint(2, " no value\n");
+ return r;
+ }
+ if (strcmp(path, "text()") == 0) {
+ if (xmldebug)
+ fprint(2, " rule text() matches: %s\n", path);
+ buildsinglestring(&r, ep->pcdata);
+ return r;
+ }
+
+ new = catchallpath(new, nil, newcatchall);
+ if (xmldebug)
+ fprint(2, " no match, run for all childrennnn: %s\n", new);
+
+ rel = isroot ? ep : ep->child;
+ for (el = rel; el; el = el->next) {
+ if (xmldebug) {
+ fprint(2, " runchildren: ");
+ dbgprintnode(el);
+ fprint(2, "\n");
+ }
+ if (newcatchall || strcmp(el->name, path) == 0) {
+ appendresult(&r, xmllookpath(el, new));
+ }
+ }
+
+ return r;
+}
--- a/mkfile
+++ b/mkfile
@@ -3,8 +3,11 @@
INSTALLFILES=\
/sys/man/1/xb \
/sys/man/2/xml \
+ /sys/man/2/xpath \
/sys/include/xml.h \
+ /sys/include/xpath.h \
/$objtype/lib/libxml.a \
+ /$objtype/lib/libxpath.a \
/$objtype/bin/xb \
CFLAGS=$CFLAGS -I..
@@ -21,12 +24,15 @@
/sys/man/2/xml: xml
cp $prereq $target
-/sys/include/xml.h: xml.h
+/sys/man/2/xpath: xpath
cp $prereq $target
-/$objtype/lib/libxml.a:
- cd libxml && mk install && cd ..
+/sys/include/%.h: %.h
+ cp $prereq $target
+/$objtype/lib/%.a:V:
+ cd $stem && mk install && cd ..
+
/$objtype/bin/xb: $O.xb
cp $prereq $target
@@ -38,7 +44,8 @@
clean:V:
cd libxml && mk clean && cd ..
+ cd libxpath && mk clean && cd ..
rm -f [$OS].* *.[$OS]
-nuke:V:
+nuke:V: clean
rm -f $INSTALLFILES
--- /dev/null
+++ b/xpath
@@ -1,0 +1,54 @@
+.TH XPATH 2
+.SH NAME
+xmllookpath
+\- XPath support
+.SH SYNOPSIS
+.de PB
+.PP
+.ft L
+.nf
+..
+.PB
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+.PB
+enum {
+ XTelems = 1,
+ XTstring = 2,
+}
+.PB
+struct XpResult {
+ int type; /* type of XpResult */
+ int num; /* number of results */
+ union { /* array of results */
+ char **strings; /* if type == XTstring */
+ Elem **elems; /* if type == XTelems */
+ };
+ ...
+};
+.PB
+.PD 0
+.ta +\w'\fLXpResult 'u
+XpResult xmllookpath(Elem *ep, char *xpath)
+.SH DESCRIPTION
+.PP
+.I Libxpath
+is an extension library to
+.IR libxml .
+It provides XPath functionality for looking up certain nodes in an
+existing in-memory XML DOM model.
+.PP
+.I Xmllookpath
+is the main function for querying the XML document using an XPath string.
+It's using
+.I ep
+as the reference element within the DOM model.
+.SH SOURCE
+/sys/src/libxpath
+.SH "SEE ALSO"
+.IR xml (2).
+.SH BUGS
+The current implementation of XPath is incomplete and very limited.
+A future implementation should be able to support the full set of XPath.
--- /dev/null
+++ b/xpath.h
@@ -1,0 +1,19 @@
+#pragma lib "libxpath.a"
+
+enum {
+ XTelem = 1,
+ XTstring = 2,
+};
+
+typedef struct XpResult XpResult;
+struct XpResult {
+ int type;
+ int size;
+ int num;
+ union {
+ char **strings;
+ Elem **elems;
+ };
+};
+
+XpResult xmllookpath(Elem *, char *);