shithub: werc

Download patch

ref: 4061f18dd84e55a23c34e17ac7b9d42bbce79620
parent: f38159a04e48339ae83be6776741bfd416f1e2f8
author: sl <[email protected]>
date: Wed Feb 18 04:25:10 EST 2009

Add urlencode.awk to contrib/, not used for now.

--- /dev/null
+++ b/bin/contrib/urlencode.awk
@@ -1,0 +1,126 @@
+# Taken from http://www.shelldorado.com/scripts/cmds/urlencode
+##########################################################################
+# Title      :  urlencode - encode URL data
+# Author     :  Heiner Steven ([email protected])
+# Date       :  2000-03-15
+# Requires   :  awk
+# Categories :  File Conversion, WWW, CGI
+# SCCS-Id.   :  @(#) urlencode  1.4 06/10/29
+##########################################################################
+# Description
+#   Encode data according to
+#       RFC 1738: "Uniform Resource Locators (URL)" and
+#       RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
+#
+#   This encoding is used i.e. for the MIME type
+#   "application/x-www-form-urlencoded"
+#
+# Notes
+#    o  The default behaviour is not to encode the line endings. This
+#   may not be what was intended, because the result will be
+#   multiple lines of output (which cannot be used in an URL or a
+#   HTTP "POST" request). If the desired output should be one
+#   line, use the "-l" option.
+#
+#    o  The "-l" option assumes, that the end-of-line is denoted by
+#   the character LF (ASCII 10). This is not true for Windows or
+#   Mac systems, where the end of a line is denoted by the two
+#   characters CR LF (ASCII 13 10).
+#   We use this for symmetry; data processed in the following way:
+#       cat | urlencode -l | urldecode -l
+#   should (and will) result in the original data
+#
+#    o  Large lines (or binary files) will break many AWK
+#       implementations. If you get the message
+#       awk: record `...' too long
+#        record number xxx
+#   consider using GNU AWK (gawk).
+#
+#    o  urlencode will always terminate it's output with an EOL
+#       character
+#
+# Thanks to Stefan Brozinski for pointing out a bug related to non-standard
+# locales.
+#
+# See also
+#   urldecode
+##########################################################################
+
+PN=`basename "$0"`          # Program name
+VER='1.4'
+
+: ${AWK=awk}
+
+Usage () {
+    echo >&2 "$PN - encode URL data, $VER
+usage: $PN [-l] [file ...]
+    -l:  encode line endings (result will be one line of output)
+
+The default is to encode each input line on its own."
+    exit 1
+}
+
+Msg () {
+    for MsgLine
+    do echo "$PN: $MsgLine" >&2
+    done
+}
+
+Fatal () { Msg "$@"; exit 1; }
+
+set -- `getopt hl "$@" 2>/dev/null` || Usage
+[ $# -lt 1 ] && Usage           # "getopt" detected an error
+
+EncodeEOL=no
+while [ $# -gt 0 ]
+do
+    case "$1" in
+        -l) EncodeEOL=yes;;
+    --) shift; break;;
+    -h) Usage;;
+    -*) Usage;;
+    *)  break;;         # First file name
+    esac
+    shift
+done
+
+LANG=C  export LANG
+$AWK '
+    BEGIN {
+    # We assume an awk implementation that is just plain dumb.
+    # We will convert an character to its ASCII value with the
+    # table ord[], and produce two-digit hexadecimal output
+    # without the printf("%02X") feature.
+
+    EOL = "%0A"     # "end of line" string (encoded)
+    split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
+    hextab [0] = 0
+    for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
+    if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
+    }
+    {
+    encoded = ""
+    for ( i=1; i<=length ($0); ++i ) {
+        c = substr ($0, i, 1)
+        if ( c ~ /[a-zA-Z0-9.-]/ ) {
+        encoded = encoded c     # safe character
+        } else if ( c == " " ) {
+        encoded = encoded "+"   # special handling
+        } else {
+        # unsafe character, encode it as a two-digit hex-number
+        lo = ord [c] % 16
+        hi = int (ord [c] / 16);
+        encoded = encoded "%" hextab [hi] hextab [lo]
+        }
+    }
+    if ( EncodeEOL ) {
+        printf ("%s", encoded EOL)
+    } else {
+        print encoded
+    }
+    }
+    END {
+        #if ( EncodeEOL ) print ""
+    }
+' "$@"
+