shithub: opusfile

Download patch

ref: cfa5956105a71501fc6aa1a583c118ba423975aa
parent: 116b703442e52a5dca635b5050d3d7165d2c9662
author: Timothy B. Terriberry <[email protected]>
date: Mon Aug 12 05:48:32 EDT 2013

Minor UTF-8/UTF-16 cleanups.

- Reject 'not a character' values 0xFFFE and 0xFFFF.
- Remove some unnecessary string length checks.

--- a/examples/win32utf8.c
+++ b/examples/win32utf8.c
@@ -32,8 +32,9 @@
       dst[di++]=(char)(0x80|c0&0x3F);
       continue;
     }
-    else if(c0>=0xD800&&c0<0xDC00&&si+1<len){
+    else if(c0>=0xD800&&c0<0xDC00){
       unsigned c1;
+      /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
       c1=_src[si+1];
       if(c1>=0xDC00&&c1<0xE000){
         unsigned w;
@@ -48,9 +49,9 @@
         continue;
       }
     }
-    /*Anything else is either a valid 3-byte sequence, or an invalid
-       surrogate pair.
-      In the latter case, we just encode the value as a 3-byte
+    /*Anything else is either a valid 3-byte sequence, an invalid surrogate
+       pair, or 'not a character'.
+      In the latter two cases, we just encode the value as a 3-byte
        sequence anyway (producing technically invalid UTF-8).
       Later error handling will detect the problem, with a better
        chance of giving a useful error message.*/
--- a/src/stream.c
+++ b/src/stream.c
@@ -153,8 +153,9 @@
         dst[di++]=(wchar_t)c0;
         continue;
       }
-      else if(si+1<len){
+      else{
         int c1;
+        /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
         c1=(unsigned char)_src[si+1];
         if((c1&0xC0)==0x80){
           /*Found at least one continuation byte.*/
@@ -169,8 +170,9 @@
               continue;
             }
           }
-          else if(si+2<len){
+          else{
             int c2;
+            /*This is safe, because c1 was not 0 and _src is NUL-terminated.*/
             c2=(unsigned char)_src[si+2];
             if((c2&0xC0)==0x80){
               /*Found at least two continuation bytes.*/
@@ -178,16 +180,19 @@
                 wchar_t w;
                 /*Start byte says this is a 3-byte sequence.*/
                 w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F;
-                if(w>=0x800U&&(w<0xD800||w>=0xE000)){
-                  /*This is a 3-byte sequence that is not overlong and not a
-                     UTF-16 surrogate pair value.*/
+                if(w>=0x800U&&(w<0xD800||w>=0xE000)&&w<0xFFFE){
+                  /*This is a 3-byte sequence that is not overlong, not a
+                     UTF-16 surrogate pair value, and not a 'not a character'
+                     value.*/
                   dst[di++]=w;
                   si+=2;
                   continue;
                 }
               }
-              else if(si+3<len){
+              else{
                 int c3;
+                /*This is safe, because c2 was not 0 and _src is
+                   NUL-terminated.*/
                 c3=(unsigned char)_src[si+3];
                 if((c3&0xC0)==0x80){
                   /*Found at least three continuation bytes.*/