ref: f8a9a3fbf021dbc5f2aae0256bc1ab468b747438
parent: 3e54fa29f02e02e505e8df4b88306703adca0ed4
author: qwx <[email protected]>
date: Wed Jan 20 18:31:42 EST 2021
move awk scripts to their own directory
--- /dev/null
+++ b/awk/descstat.awk
@@ -1,0 +1,110 @@
+#!/bin/awk -f
+# assumption: no missing values/indices in input data
+
+function swap(X, a, b, t)
+{
+ t = X[a]
+ X[a] = X[b]
+ X[b] = t
+}
+
+# from numerical recipes 3rd ed; rearranges X
+function select(X, k, i, j, n, m, l, ir)
+{
+ l = 1
+ ir = length(X)
+ for(;;){
+ if(ir <= l+1){
+ if(ir == l+1 && X[ir] < X[l])
+ swap(X, l, ir)
+ return int(X[k])
+ }else{
+ m = (l + ir) / 2
+ swap(X, m, l+1)
+ if(X[l] > X[ir])
+ swap(X, l, ir)
+ if(X[l+1] > X[ir])
+ swap(X, l+1, ir)
+ if(X[l] > X[l+1])
+ swap(X, l, l+1)
+ i = l + 1
+ j = ir
+ m = X[l+1]
+ for(;;){
+ do i++; while(X[i] < m)
+ do j--; while(X[j] > m)
+ if(j < i)
+ break
+ swap(X, i, j)
+ }
+ X[l+1] = X[j]
+ X[j] = m
+ if(j >= k)
+ ir = j - 1
+ if(j <= k)
+ l = i
+ }
+ }
+}
+
+function max(X, n)
+{
+ n = "-inf"
+ for(i in X)
+ if(X[i] > n)
+ n = X[i]
+ return n
+}
+
+function min(X, n)
+{
+ n = "inf"
+ for(i in X)
+ if(X[i] < n)
+ n = X[i]
+ return n
+}
+
+function sum(X, i, n)
+{
+ for(i in X)
+ n += X[i]
+ return n
+}
+
+function mean(X)
+{
+ return sum(X) / length(X)
+}
+
+function var(X, i, n, m)
+{
+ m = mean(X)
+ for(i in X)
+ n += (X[i] - m) ^ 2
+ return n / (length(X) - 1)
+}
+
+function sd(X)
+{
+ return sqrt(var(X))
+}
+
+# FIXME: this is wrong and produces wrong results in subsequent stuff
+# select is busted
+# rearranges X
+function median(X, n)
+{
+ n = select(X, int(length(X) / 2 + 1))
+ if(length(X) % 2 != 0)
+ return n
+ else
+ return (select(X, int(length(X) / 2)) + n) / 2
+}
+
+function freq(X)
+{
+ delete ans
+ for(i in X)
+ ans[X[i]]++
+}
--- /dev/null
+++ b/awk/hex.awk
@@ -1,0 +1,14 @@
+#!/bin/awk -f
+function hex(s, v){
+ if(s ~ /^0x/)
+ s = substr(s, 3)
+ for(n=1; n<=length(s); n++)
+ v = v * 16 + h[substr(s, n, 1)]
+ return v
+}
+BEGIN{
+ for(n=0; n<16; n++){
+ h[sprintf("%x", n)] = n
+ h[sprintf("%X", n)] = n
+ }
+}
--- /dev/null
+++ b/awk/sort.awk
@@ -1,0 +1,105 @@
+#!/bin/awk -f
+# 1988, aho, kernighan and weinberger: the awk programming language, pp 153-174
+
+function swap(X, a, b, t)
+{
+ t = X[a]
+ X[a] = X[b]
+ X[b] = t
+}
+
+# quicksort
+function qsort(X, left, right, i, last){
+ if(left >= right)
+ return
+ swap(X, left, left + int((right - left + 1) * rand()))
+ last = left
+ for(i=left+1; i<=right; i++)
+ if(X[i] < X[left])
+ swap(X, ++last, i)
+ swap(X, left, last)
+ qsort(X, left, last-1)
+ qsort(X, last+1, right)
+}
+
+function heapify(X, left, right, p, c){
+ for(p=left; (c=2*p)<=right; p=c){
+ if(c < right && X[c+1] > X[c])
+ c++
+ if(X[p] < X[c])
+ swap(X, c, p)
+ }
+}
+
+# heapsort
+function hsort(X, n, i){
+ for(i=int(n/2); i>=1; i--) # phase 1
+ heapify(X, i, n)
+ for(i=n; i>1; i--){ # phase 2
+ swap(X, 1, i)
+ heapify(X, 1, i-1)
+ }
+}
+
+# insertion sort
+function isort(X, n, i, j, t){
+ for(i=2; i<=n; i++)
+ for(j=i; j>1 && X[j-1] > X[j]; j--)
+ swap(X, j-1, j)
+}
+
+# graph topological sorting
+# input: predecessor successor pairs
+# representation: 3 tables:
+# pcnt, scnt: predecessor and successor counts for a node
+# slist[a,i]: ith successor of a node
+function addnode(a, b){
+ if(!(a in pcnt))
+ pcnt[a] = 0 # add a to pcnt
+ pcnt[b]++
+ slist[a, ++scnt[a]] = b # add b to a's successors
+}
+
+# breadth-first search
+# no topological sort is possible if graph has cycles
+function bfs(){
+ # queue nodes with no predecessors
+ for(node in pcnt){
+ nodecnt++
+ if(pcnt[node] == 0)
+ X[++back] = node
+ }
+ # pop nodes, queue new nodes with no predecessors
+ for(front=1; front<=back; front++){
+ node = X[front]
+ for(i=1; i<=scnt[node]; i++)
+ if(--pcnt[slist[node,i]] == 0)
+ q[++back] = slist[node,i]
+ }
+ # nodes never queued up are involved in a cycle
+ if(back != nodecnt)
+ print "error: cyclic graph"
+}
+
+# depth-first search for cycles
+function dfs(node, i, s){
+ visited[node] = 1
+ for(i=1; i<=scnt[node]; i++)
+ if(visited[s = slist[node,i]] == 0)
+ dfs(s)
+ else if(visited[s] == 1)
+ print "cycle with back edge (" node ", " s ")"
+ visited[node] = 2
+ X[pncnt++] = node
+}
+
+# depth-first (reverse) topological sort
+function rtsort(){
+ for(node in pcnt){
+ nodecnt++
+ if(pcnt[node] == 0)
+ dfs(node)
+ }
+ if(pncnt != nodecnt)
+ print "error: cyclic graph"
+}
--- a/descstat.awk
+++ /dev/null
@@ -1,110 +1,0 @@
-#!/bin/awk -f
-# assumption: no missing values/indices in input data
-
-function swap(X, a, b, t)
-{
- t = X[a]
- X[a] = X[b]
- X[b] = t
-}
-
-# from numerical recipes 3rd ed; rearranges X
-function select(X, k, i, j, n, m, l, ir)
-{
- l = 1
- ir = length(X)
- for(;;){
- if(ir <= l+1){
- if(ir == l+1 && X[ir] < X[l])
- swap(X, l, ir)
- return int(X[k])
- }else{
- m = (l + ir) / 2
- swap(X, m, l+1)
- if(X[l] > X[ir])
- swap(X, l, ir)
- if(X[l+1] > X[ir])
- swap(X, l+1, ir)
- if(X[l] > X[l+1])
- swap(X, l, l+1)
- i = l + 1
- j = ir
- m = X[l+1]
- for(;;){
- do i++; while(X[i] < m)
- do j--; while(X[j] > m)
- if(j < i)
- break
- swap(X, i, j)
- }
- X[l+1] = X[j]
- X[j] = m
- if(j >= k)
- ir = j - 1
- if(j <= k)
- l = i
- }
- }
-}
-
-function max(X, n)
-{
- n = "-inf"
- for(i in X)
- if(X[i] > n)
- n = X[i]
- return n
-}
-
-function min(X, n)
-{
- n = "inf"
- for(i in X)
- if(X[i] < n)
- n = X[i]
- return n
-}
-
-function sum(X, i, n)
-{
- for(i in X)
- n += X[i]
- return n
-}
-
-function mean(X)
-{
- return sum(X) / length(X)
-}
-
-function var(X, i, n, m)
-{
- m = mean(X)
- for(i in X)
- n += (X[i] - m) ^ 2
- return n / (length(X) - 1)
-}
-
-function sd(X)
-{
- return sqrt(var(X))
-}
-
-# FIXME: this is wrong and produces wrong results in subsequent stuff
-# select is busted
-# rearranges X
-function median(X, n)
-{
- n = select(X, int(length(X) / 2 + 1))
- if(length(X) % 2 != 0)
- return n
- else
- return (select(X, int(length(X) / 2)) + n) / 2
-}
-
-function freq(X)
-{
- delete ans
- for(i in X)
- ans[X[i]]++
-}
--- a/hex.awk
+++ /dev/null
@@ -1,14 +1,0 @@
-#!/bin/awk -f
-function hex(s, v){
- if(s ~ /^0x/)
- s = substr(s, 3)
- for(n=1; n<=length(s); n++)
- v = v * 16 + h[substr(s, n, 1)]
- return v
-}
-BEGIN{
- for(n=0; n<16; n++){
- h[sprintf("%x", n)] = n
- h[sprintf("%X", n)] = n
- }
-}
--- a/sort.awk
+++ /dev/null
@@ -1,105 +1,0 @@
-#!/bin/awk -f
-# 1988, aho, kernighan and weinberger: the awk programming language, pp 153-174
-
-function swap(X, a, b, t)
-{
- t = X[a]
- X[a] = X[b]
- X[b] = t
-}
-
-# quicksort
-function qsort(X, left, right, i, last){
- if(left >= right)
- return
- swap(X, left, left + int((right - left + 1) * rand()))
- last = left
- for(i=left+1; i<=right; i++)
- if(X[i] < X[left])
- swap(X, ++last, i)
- swap(X, left, last)
- qsort(X, left, last-1)
- qsort(X, last+1, right)
-}
-
-function heapify(X, left, right, p, c){
- for(p=left; (c=2*p)<=right; p=c){
- if(c < right && X[c+1] > X[c])
- c++
- if(X[p] < X[c])
- swap(X, c, p)
- }
-}
-
-# heapsort
-function hsort(X, n, i){
- for(i=int(n/2); i>=1; i--) # phase 1
- heapify(X, i, n)
- for(i=n; i>1; i--){ # phase 2
- swap(X, 1, i)
- heapify(X, 1, i-1)
- }
-}
-
-# insertion sort
-function isort(X, n, i, j, t){
- for(i=2; i<=n; i++)
- for(j=i; j>1 && X[j-1] > X[j]; j--)
- swap(X, j-1, j)
-}
-
-# graph topological sorting
-# input: predecessor successor pairs
-# representation: 3 tables:
-# pcnt, scnt: predecessor and successor counts for a node
-# slist[a,i]: ith successor of a node
-function addnode(a, b){
- if(!(a in pcnt))
- pcnt[a] = 0 # add a to pcnt
- pcnt[b]++
- slist[a, ++scnt[a]] = b # add b to a's successors
-}
-
-# breadth-first search
-# no topological sort is possible if graph has cycles
-function bfs(){
- # queue nodes with no predecessors
- for(node in pcnt){
- nodecnt++
- if(pcnt[node] == 0)
- X[++back] = node
- }
- # pop nodes, queue new nodes with no predecessors
- for(front=1; front<=back; front++){
- node = X[front]
- for(i=1; i<=scnt[node]; i++)
- if(--pcnt[slist[node,i]] == 0)
- q[++back] = slist[node,i]
- }
- # nodes never queued up are involved in a cycle
- if(back != nodecnt)
- print "error: cyclic graph"
-}
-
-# depth-first search for cycles
-function dfs(node, i, s){
- visited[node] = 1
- for(i=1; i<=scnt[node]; i++)
- if(visited[s = slist[node,i]] == 0)
- dfs(s)
- else if(visited[s] == 1)
- print "cycle with back edge (" node ", " s ")"
- visited[node] = 2
- X[pncnt++] = node
-}
-
-# depth-first (reverse) topological sort
-function rtsort(){
- for(node in pcnt){
- nodecnt++
- if(pcnt[node] == 0)
- dfs(node)
- }
- if(pncnt != nodecnt)
- print "error: cyclic graph"
-}