shithub: riscv

Download patch

ref: 917da0089dcaa013979a69aaeaeff0c08cbc7e26
parent: 8003c8b1e2d5d6e2a22ca7e552b53e631db86df4
author: cinap_lenrek <[email protected]>
date: Thu Sep 24 08:23:17 EDT 2015

cpp: handle 4 byte utf sequences (21-bit runes)

--- a/sys/src/cmd/cpp/lex.c
+++ b/sys/src/cmd/cpp/lex.c
@@ -29,6 +29,7 @@
 
 #define	UTF2(c)		((c)>=0xA0 && (c)<0xE0)		/* 2-char UTF seq */
 #define	UTF3(c)		((c)>=0xE0 && (c)<0xF0)		/* 3-char UTF seq */
+#define	UTF4(c)		((c)>=0xF0 && (c)<0xF8)		/* 4-char UTF seq */
 
 /* character classes */
 #define	C_WS	1
@@ -259,7 +260,7 @@
 			case C_ALPH:
 				for (j=0; j<=256; j++)
 					if ('a'<=j&&j<='z' || 'A'<=j&&j<='Z'
-					  || UTF2(j) || UTF3(j) || j=='_')
+					  || UTF2(j) || UTF3(j) || UTF4(j) || j=='_')
 						bigfsm[j][fp->state] = nstate;
 				continue;
 			case C_NUM:
@@ -274,7 +275,7 @@
 	/* install special cases for ? (trigraphs),  \ (splicing), runes */
 	for (i=0; i<MAXSTATE; i++) {
 		for (j=0; j<0xFF; j++)
-			if (j=='?' || j=='\\' || UTF2(j) || UTF3(j)) {
+			if (j=='?' || j=='\\' || UTF2(j) || UTF3(j) || UTF4(j)) {
 				if (bigfsm[j][i]>0)
 					bigfsm[j][i] = ~bigfsm[j][i];
 				bigfsm[j][i] &= ~QBSBIT;
@@ -391,6 +392,10 @@
 				}
 				if (UTF3(c)) {
 					runelen = 3;
+					goto reswitch;
+				}
+				if (UTF4(c)) {
+					runelen = 4;
 					goto reswitch;
 				}
 				error(WARNING, "Lexical botch in cpp");