txt2tkn.awk
Generated on Tue Dec 05 17:39:06 Eastern Standard Time 2006 from txt2tkn.awk
# Program : TXT2TKN.AWK
# Purpose : Create blocks of text from input stream, count and sort.
# Author : Bob Jonkman
# Date : 23 June 2006
# Usage : gawk -f txt2tkn.awk [-v TOKENSIZE=x] inputfile
BEGIN { RS = "" ;
FS = "" ;
QUOTE = "\""
if (!TOKENSIZE)
TOKENSIZE = 5
print("Frequency,Token")
}
{ while (length($0) > (charcount + TOKENSIZE))
{
charcount ++
token = substr($0,charcount,TOKENSIZE)
if (token == "\n")
{
printf("%c",token)
token = "!!"
}
tokentable[token]++
}
}
END { for (i in tokentable)
printf("%10i,%s\n",tokentable[i],QUOTE i QUOTE) | "sort /r"
}
1 files processed.