shithub: rssfill

Download patch

ref: 62e0c1460b517f101896a5079378e83e4bcd8483
parent: 99636f59fa87c0b351b0fa934970ff63a889d608
author: sirjofri <[email protected]>
date: Mon Apr 19 08:23:06 EDT 2021

adds xmlpull including CDATA support.

--- a/mkfile
+++ b/mkfile
@@ -2,7 +2,8 @@
 
 BIN=$home/bin/$objtype
 TARG=rssfill
-OFILES=rssfill.$O
+OFILES=rssfill.$O xmlpull.$O
 HFILES=rssfill.h xmlpull.h
+CFLAGS=-DPLAN9
 
 </sys/src/cmd/mkone
--- a/rssfill.c
+++ b/rssfill.c
@@ -24,11 +24,11 @@
 writefeedfiles(Feed *f)
 {
 	int fd;
-	char file[1024];
+	char *file = nil;
 	long d;
 	Tm t;
 	Dir dir;
-	
+
 	if(f != nil){
 		while(f->n != nil)
 			f = f->n;
@@ -41,18 +41,20 @@
 							sysfatal("tmparse: %r");
 						else
 							fprint(2, "tmparse: auto parsed date\n");
-				
+
 				d = tmnorm(&t);
-				
-				snprint(file, 1023, "%s/%s%ld", directory, prefix, d);
-				
+
+				if(file)
+					free(file);
+				file = smprint("%s/%s%ld", directory, prefix, d);
+
 				fd = create(file, OWRITE, 0666);
 				if(!fd)
 					sysfatal("error creating file %s: %r", file);
-				
+
 				if(chatty)
 					fprint(2, "writing file %s\n", file);
-				
+
 				if(dry){
 					f = f->p;
 					continue;
@@ -67,11 +69,11 @@
 					fprint(fd, "\n%s\n", f->desc);
 				if(f->cont != nil)
 					fprint(fd, "\n%s\n", f->cont);
-				
+
 				nulldir(&dir);
 				dir.mtime = d;
 				dirfwstat(fd, &dir);
-				
+
 				close(fd);
 			}
 			f = f->p;
@@ -276,22 +278,28 @@
 			if(!strcmp(x->na, "href") && st == LINK)
 				f->link = strdup(x->va);
 			break;
+		case CDATA:
 		case TEXT:
 			switch(st){
 			case TITLE:
-				f->title = strdup(x->na);
+				if (!f->title || strlen(f->title) == 0)
+					f->title = strdup(x->na);
 				break;
 			case LINK:
-				f->link = strdup(x->na);
+				if (!f->link || strlen(f->link) == 0)
+					f->link = strdup(x->na);
 				break;
 			case DESC:
-				f->desc = strdup(x->na);
+				if (!f->desc || strlen(f->desc) == 0)
+					f->desc = strdup(x->na);
 				break;
 			case CONTENT:
-				f->cont = strdup(x->na);
+				if (!f->cont || strlen(f->cont) == 0)
+					f->cont = strdup(x->na);
 				break;
 			case DATE:
-				f->date = strdup(x->na);
+				if (!f->date || strlen(f->date) == 0)
+					f->date = strdup(x->na);
 				break;
 			default:
 				break;
@@ -299,13 +307,13 @@
 			break;
 		case END_TAG:
 			if((!strcmp(x->na, "item") || !strcmp(x->na, "entry")) && st == ITEM){
-				if(searchfeed(r, f->title, f->link, f->desc, f->date) == nil){
+			//	if(searchfeed(r, f->title, f->link, f->desc, f->date) == nil){
 					r = addfeed(r, f);
 					f = nil;
-				} else {
-					freefeed(f);
-					f = nil;
-				}
+			//	} else {
+			//		freefeed(f);
+			//		f = nil;
+			//	}
 							
 				st = NONE;
 				break;
--- /dev/null
+++ b/xmlpull.c
@@ -1,0 +1,467 @@
+/*
+ * Copy me if you can.
+ * by 20h
+ */
+
+#ifndef PLAN9
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#endif
+#ifdef PLAN9
+#include <u.h>
+#include <libc.h>
+#endif
+#include "xmlpull.h"
+
+void *
+reallocp(void *p, int s, short d)
+{
+	p = realloc(p, s);
+	if(p == nil){
+		perror("realloc");
+		exits("realloc");
+	}
+
+	if(d != 0)
+		memset(p, 0, s);
+
+	return (void *)p;
+}
+
+void
+freexmlpull(xmlpull *x)
+{
+	if(x != nil){
+		if(x->na != nil)
+			free(x->na);
+		if(x->va != nil)
+			free(x->va);
+		free(x);
+	}
+
+	return;
+}
+
+xmlpull *
+openxmlpull(int fd)
+{
+	xmlpull *ret;
+
+	ret = reallocp(nil, sizeof(xmlpull), 2);
+	ret->na = nil;
+	ret->va = nil;
+	ret->lm = nil;
+	ret->ln = 0;
+	ret->lv = 0;
+	ret->la = 0;
+	ret->ev = START_DOCUMENT;
+	ret->nev = START_DOCUMENT;
+	ret->fd = fd;
+
+	return ret;
+}
+
+char
+getchara(xmlpull *x)
+{
+	char g;
+
+	if(read(x->fd, &g, 1) <= 0){
+		x->ev = END_DOCUMENT;
+		return (char)0;
+	}
+
+	return g;
+}
+
+char *
+addchara(char *b, int *l, char c)
+{
+	b = reallocp(b, ++(*l) + 1, 0);
+	b[(*l) - 1] = c;
+	b[*l] = '\0';
+
+	return b;
+}
+
+char *
+readuntil(xmlpull *x, char *b, int *l, char w, char t)
+{
+	char g;
+	
+	while((g = getchara(x)) != 0){
+		//print("||%c>%c||", g, w);
+		if(g == w){
+			b = addchara(b, l, '\0');
+			return b;
+		}
+
+		switch(g){
+		case '/':
+		case '>':
+			if(t != 0){
+				addchara(b, l, g);
+				return nil;
+			}
+		case '\t':
+		case '\r':
+		case '\n':
+		case ' ':
+			if(t != 0)
+				return b;
+			b = addchara(b, l, g);
+			break;
+		case '\\':
+			g = getchara(x);
+			//print("%c", g);
+			if(g == 0)
+				return nil;
+			b = addchara(b, l, g);
+			break;
+		default:
+			b = addchara(b, l, g);
+			break;
+		}
+	}
+
+	return nil;
+}
+
+
+char *
+parseattrib(xmlpull *x)
+{
+	char g, *b;
+
+	while((g = getchara(x)) != 0){
+		//print("%c", g);
+		switch(g){
+		case '\t':
+		case '\r':
+		case '\n':
+		case ' ':
+			continue;
+		case '/':
+		case '>':
+			x->na = addchara(x->na, &x->ln, g);
+			return nil;
+		default:
+			x->na = addchara(x->na, &x->ln, g);
+			g = (char)0;
+		}
+		if(g == (char)0)
+			break;
+	}
+
+	if((b = readuntil(x, x->na, &x->ln, '=', 2)) == nil)
+		return nil;
+	x->na = b;
+	
+	if((g = getchara(x)) == 0)
+		return nil;
+
+	//print("magic char: %c\n", g);
+	switch(g){
+	case '"':
+	case '\'':
+		if((b = readuntil(x, x->va, &x->lv, g, 0)) == nil)
+			return nil;
+		x->va = b;
+		return x->va;
+	default:
+		if((b = readuntil(x, x->va, &x->lv, '>', 2)) == nil)
+			return nil;
+		x->va = b;
+		return x->na;
+	}
+	
+	return x->na;
+}
+
+char *
+readname(xmlpull *x)
+{
+	char g;
+
+	while((g = getchara(x)) != 0){
+		//print("%c", g);
+		switch(g){
+		case '\n':
+		case '\t':
+		case '\r':
+		case ' ':
+		case '>':
+		case '/':
+			x->na = addchara(x->na, &x->ln, g);
+			return x->na;
+		default:
+			x->na = addchara(x->na, &x->ln, g);
+		}
+	}
+
+	return nil;
+}
+
+char *
+readcdata(xmlpull *x)
+{
+	char g;
+	while((g = getchara(x)) != 0){
+		x->na = addchara(x->na, &x->ln, g);
+		if(strncmp("]]>", &x->na[x->ln-3], 3) == 0) {
+			x->na[x->ln-3] = '\0';
+			break;
+		}
+	}
+
+	x->na[x->ln-1] = '\0'; /* if while breaks */
+	//print("X: '%s'\n", x->na);
+	return x->na;
+}
+
+int
+checkcdata(xmlpull *x)
+{
+	char name[7];
+	int i = 7;
+	while(i) {
+		name[7-i] = getchara(x);
+		i--;
+	}
+	if(strncmp("[CDATA[", name, 7) != 0) {
+		return 0;
+	}
+	x->ev = CDATA;
+	x->na = nil;
+	x->nev = TEXT;
+	
+	/* read cdata contents in na */
+	x->na = readcdata(x);
+	return 1;
+}
+
+xmlpull *
+nextxmlpull(xmlpull *x)
+{
+	char g;
+
+	if(x->va != nil)
+		free(x->va);
+
+	if(x->ev == START_TAG){
+		if(x->lm != nil)
+			free(x->lm);
+		x->lm = x->na;
+		x->la = x->ln;
+	} else
+		if(x->na != nil)
+			free(x->na);
+
+	x->na = nil;
+	x->va = nil;
+	x->ln = 0;
+	x->lv = 0;
+	g = '\0';
+
+	switch(x->nev){
+	case START_DOCUMENT:
+		if((x->na = readuntil(x, x->na, &x->ln, '<', 0)) == nil)
+			x->nev = END_DOCUMENT;
+		else
+			x->nev = START_TAG;
+		x->ev = START_DOCUMENT;
+		break;
+	case START_TAG:
+		g = getchara(x);
+		//print("%c", g);
+		if(g == '/')
+			x->ev = END_TAG;
+		else if(g == '!' && checkcdata(x))
+			break;
+		else {
+			x->na = addchara(x->na, &x->ln, g);
+			x->ev = START_TAG;
+		}
+
+		if(readname(x) == nil)
+			x->nev = END_DOCUMENT;
+		else {
+			if(!strncmp(x->na, "!--", 3)){
+				x->na[x->ln - 1] = '\0';
+				x->nev = TEXT_C;
+				return x;
+			}
+			if(x->ev == END_TAG){
+				x->na[x->ln - 1] = '\0';
+				x->nev = TEXT;
+			} else {
+				switch(x->na[x->ln - 1]){
+				case '/':
+					getchara(x);
+					x->ev = START_END_TAG;
+					x->nev = TEXT;
+					x->na[x->ln - 1] = '\0';
+					break;
+				case '>':
+					x->nev = TEXT;
+					x->na[x->ln - 1] = '\0';
+					break;
+				default:
+					x->na[x->ln - 1] = '\0';
+					x->nev = ATTR;
+				
+				}
+			}
+		}
+		break;
+	case TEXT_C:
+		g = '>';
+	case TEXT:
+		if(g != '>')
+			g = '<';
+
+		if((x->na = readuntil(x, x->na, &x->ln, g, 0)) == nil){
+			x->ev = END_DOCUMENT;
+			x->nev = END_DOCUMENT + 1;
+		} else {
+			if(x->nev == TEXT_C)
+				x->nev = TEXT;
+			else
+				x->nev = START_TAG;
+			x->ev = TEXT;
+		}
+		break;
+	case ATTR:
+		if(parseattrib(x) == nil){
+			//print("%c\n", x->na[x->ln - 1]);
+			switch(x->na[x->ln - 1]){
+			case '/':
+				free(x->na);
+				x->na = x->lm;
+				x->ln = x->la;
+				x->lm = nil;
+				x->la = 0;
+
+				getchara(x);
+				x->ev = END_TAG;
+				x->nev = TEXT;
+				return x;
+			case '>':
+			default:
+				x->na[x->ln - 1] = '\0';
+			}
+			x->ev = ATTR;
+			x->nev = TEXT;
+			return nextxmlpull(x);
+		} else
+			x->nev = ATTR;
+		x->ev = ATTR;
+		break;
+	case END_DOCUMENT:
+		x->ev = END_DOCUMENT;
+		x->nev = END_DOCUMENT + 1;
+		break;
+	default:
+		return nil;
+	}
+
+	return x;
+}
+
+xmlpull *
+writexmlpull(xmlpull *x)
+{
+	char *b;
+
+	b = nil;
+
+	switch(x->nev){
+	case START_DOCUMENT:
+		if(write(x->fd, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39) < 0)
+			return nil;
+		return x;
+	case START_TAG:
+		if(x->na == nil)
+			return nil;
+
+		b = reallocp(b, x->ln + 3, 2);
+		snprint(b, x->ln + 3, "<%s ", x->na);
+		if(write(x->fd, b, strlen(b)) < 0){
+			free(b);
+			return nil;
+		}
+		free(b);
+		return x;
+	case START_END_TAG:
+		if(x->na == nil)
+			return nil;
+
+		b = reallocp(b, x->ln + 4, 2);
+		snprint(b, x->ln + 4, "<%s/>", x->na);
+		if(write(x->fd, b, strlen(b)) < 0){
+			free(b);
+			return nil;
+		}
+		free(b);
+		return x;
+	case TEXT:
+		if(x->na == nil)
+			return nil;
+		if(write(x->fd, x->na, x->ln) < 0)
+			return nil;
+		return x;
+	case TEXT_C:
+		if(x->na == nil)
+			return nil;
+
+		b = reallocp(b, x->ln + 5, 2);
+		snprint(b, x->ln + 5, "%s -->", x->na);
+		if(write(x->fd, b, strlen(b)) < 0){
+			free(b);
+			return nil;
+		}
+		free(b);
+		return x;
+	case ATTR:
+		if(x->na == nil)
+			return nil;
+
+		b = reallocp(b, x->ln + x->lv + 5, 2);
+		snprint(b, x->ln + x->lv + 5, "%s=\"%s\" ", x->na, (x->va == nil) ? "" : x->va);
+		if(write(x->fd, b, strlen(b)) < 0){
+			free(b);
+			return nil;
+		}
+		free(b);
+		return x;
+	case END_TAG:
+		if(x->na == nil)
+			return nil;
+
+		b = reallocp(b, x->ln + 4, 2);
+		snprint(b, x->ln + 4, "</%s>", x->na);
+		if(write(x->fd, b, strlen(b)) < 0){
+			free(b);
+			return nil;
+		}
+		free(b);
+		return x;
+	case END_TAG_S:
+		if(write(x->fd, "/>", 2) < 0)
+			return nil;
+		return x;
+	case END_TAG_N:
+		if(write(x->fd, ">", 1) < 0)
+			return nil;
+		return x;
+	case END_DOCUMENT:
+		close(x->fd);
+		return nil;	
+	default:
+		break;
+	}
+
+	return nil;
+}
+
--- a/xmlpull.h
+++ b/xmlpull.h
@@ -3,9 +3,11 @@
  * by 20h
  */
 
+/*
 #ifdef nil
 #pragma lib "libxmlpull.a"
 #endif
+*/
 
 #ifndef XMLPULL_H
 #define XMLPULL_H
@@ -21,6 +23,7 @@
 	START_DOCUMENT = 0x0,
 	START_TAG,
 	START_END_TAG,
+	CDATA,
 	TEXT,
 	TEXT_C,
 	ATTR,