shithub: riscv

Download patch

ref: 6c91d99ce29f8cb6c47c0c535c9162f248f1fdb1
parent: 7a29aa57cbc0a5ccf015ded763b54046235275ec
author: cinap_lenrek <cinap_lenrek@centraldogma>
date: Tue Sep 20 00:14:29 EDT 2011

uhtml: remove trailing utf BOM marker, html2ms: fix underline handling and escaping

--- a/sys/src/cmd/html2ms.c
+++ b/sys/src/cmd/html2ms.c
@@ -142,8 +142,24 @@
 }
 
 void
-ona(Text *text, Tag *)
+restoreunderline(Text *text, Tag *tag)
 {
+	text->underline = tag->restore;
+	emit(text, "");
+}
+
+void
+ona(Text *text, Tag *tag)
+{
+	int i;
+
+	for(i=0; i<tag->nattr; i++)
+		if(cistrcmp(tag->attr[i].attr, "href") == 0)
+			break;
+	if(i == tag->nattr)
+		return;
+	tag->restore = text->underline;
+	tag->close = restoreunderline;
 	text->underline = 1;
 }
 
@@ -207,10 +223,13 @@
 			if(n != 7 || cistrncmp(buf, "[CDATA[", 7))
 				continue;
 			while((c = Bgetc(&in)) > 0){
-				if(c == ']')
-					if(Bgetc(&in) == ']')
-						if(Bgetc(&in) == '>')
-							return;
+				if(c == ']'){
+					if(Bgetc(&in) == ']'){
+						if(Bgetc(&in) != '>')
+							Bungetc(&in);
+						return;
+					}
+				}
 			}
 		}
 	}
@@ -425,10 +444,9 @@
 			case '\r':
 			case ' ':
 			case '\t':
-				if(text->pre == 0){
-					text->space = 1;
+				text->space = 1;
+				if(text->pre == 0)
 					continue;
-				}
 			default:
 				if(r == '\n' || r == '\r')
 					text->pos = 0;
@@ -435,8 +453,8 @@
 				if(text->space){
 					text->space = 0;
 					if(text->underline){
-						emit(text, "");
-						text->pos = Bprint(&out, ".UL ");
+						emit(text, ".UL ");
+						text->pos = 1;
 					} else if(text->pos >= 70){
 						text->pos = 0;
 						Bputc(&out, '\n');
@@ -445,16 +463,15 @@
 						Bputc(&out, ' ');
 					}
 				}
-				if(text->pos == 0 && r == '.'){
-					text->pos++;
-					Bputc(&out, ' ');
-				}
-				text->pos++;
-				if(r == 0xA0){
+				if(text->pos == 0 && r == '.')
+					text->pos += Bprint(&out, "\\&");
+				else if(r == '\\')
+					text->pos += Bprint(&out, "\\&\\");
+				else if(r == 0xA0){
 					r = ' ';
-					Bputc(&out, '\\');
+					text->pos += Bprint(&out, "\\");
 				}
-				Bprint(&out, "%C", r);
+				text->pos += Bprint(&out, "%C", r);
 			}
 		}
 	}
@@ -473,7 +490,10 @@
 	Binit(&out, 1, OWRITE);
 
 	memset(&text, 0, sizeof(text));
+
+	text.font = "R";
 	text.output = 1;
+
 	parsetext(&text, nil);
 	emit(&text, "\n");
 }
--- a/sys/src/cmd/page.c
+++ b/sys/src/cmd/page.c
@@ -623,9 +623,9 @@
 		p->data = "lp -dstdout";
 		p->open = popengs;
 	}
-	else if(cistrncmp(buf, "<?xml", 5) == 0 ||
-		cistrncmp(buf, "<!DOCTYPE", 9) == 0 ||
-		cistrncmp(buf, "<HTML", 5) == 0){
+	else if(cistrstr(buf, "<?xml") ||
+		cistrstr(buf, "<!DOCTYPE") ||
+		cistrstr(buf, "<HTML")){
 		p->data = "uhtml | html2ms | troff -ms | lp -dstdout";
 		p->open = popengs;
 	}
--- a/sys/src/cmd/uhtml.c
+++ b/sys/src/cmd/uhtml.c
@@ -62,6 +62,11 @@
 	if((nbuf = read(0, buf, sizeof(buf)-1)) < 0)
 		sysfatal("read: %r");
 	buf[nbuf] = 0;
+
+	/* useless BOM marker */
+	if(memcmp(buf, "\xEF\xBB\xBF", 3)==0)
+		memmove(buf, buf+3, nbuf-3);
+
 	for(;;){
 		if(s = cistrstr(buf, "encoding="))
 			if(s = strval(s+9)){