Code Files

txt2tkn.awk

Generated on Tue Dec 05 17:39:06 Eastern Standard Time 2006 from txt2tkn.awk


# Program   : TXT2TKN.AWK
# Purpose   : Create blocks of text from input stream, count and sort.
# Author    : Bob Jonkman
# Date      : 23 June 2006

# Usage     : gawk -f txt2tkn.awk [-v TOKENSIZE=x] inputfile

BEGIN       {   RS = "" ;
                FS = "" ;
                QUOTE = "\""

                if (!TOKENSIZE)
                    TOKENSIZE = 5

                print("Frequency,Token")
}

            {   while (length($0) > (charcount + TOKENSIZE))
                {    
                    charcount ++
                    token = substr($0,charcount,TOKENSIZE)
                    if (token == "\n")
                    {
                        printf("%c",token)
                        token = "!!"
                    }
                    tokentable[token]++
                }
            }

END         {   for (i in tokentable)
                    printf("%10i,%s\n",tokentable[i],QUOTE i QUOTE) | "sort /r"
}

                
   

1 files processed.