Code Files

pARSE-FREQ.AWK

Generated on Tue Dec 05 17:39:06 Eastern Standard Time 2006 from pARSE-FREQ.AWK


# Program	: PARSE-FREQ.AWK
# Purpose	: Use the PARSECSV.AWK subroutine to count the frequency of an arbitrary field
# Date		: 19 October 2004
# Author	: Bob Jonkman <bjonkman@sobac.com>

# Copyright 2008 Bob Jonkman and/or SOBAC Microcomputer Services

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


# Usage	    : gawk -f library.awk -f parse-freq.awk [-v FIELDNUM=x | -v FIELDNAME=nnn] [-v NOHEADER=(0|1)] inputfiles > outputfile.csv

# Variables :   FIELDNAME (optional) Matches field header for counting 
#               FIELDNUM  (optional) If FIELDNAME does not exist selects field number for counting (0 = whole record)
#               NOHEADER  (optional) Do not treat the first record as field headers



BEGIN 	{	FS     = "," ; 
        	OFS    = "," ; 
	        SUBSEP = "," ;

            QUOTE  = "\"" ;

		    FIELD_SEPARATOR = "," ;
		    FIELD_DELIMITER = QUOTE ;
		    RECORD_SEPARATOR = "\n";
		
    		IGNORECASE = 1;
	}



##### Read the headers #####

(NR==1) && !NOHEADER    {   numfields = parsecsv($0,headers);
                            if(FIELDNAME)
                            {
                                for(i=1; i<=numfields; i++)
                                {
                                    if(FIELDNAME == headers[i])
                                        countfield = i ;
                                }
                                if(!countfield)
                                {
                                    print("FIELDNAME= " FIELDNAME " does not exist") > "/dev/stderr" ;
                                    for(i in headers)
                                        print("headers[" i "]= " headers[i] ) > "/dev/stderr" ;
                                    exit ;
                                }
                            } else {
                                countfield = FIELDNUM ; # 0 means complete record is counted
                            }

# Provide a reasonable value of headertext, even if the complete record is selected (FIELDNUM == 0)
                            if(countfield)
                                headertext = headers[countfield] ;
                            else
                                headertext = $0 ;
                	    }

##### End of Read the Headers #####


$0 == "====="   {   nextfile ;
}


(NR!=1)	|| NOHEADER {   if(countfield)
                        {
                            parsecsv($0,record);
                            field = record[countfield] ;
                        } else {
                            field = $0 ;
                        }

        		        total++ ;
		                if(!freq[field]) unique++ ;
        		        freq[field]++ ;
	}

!(NR % 10000)   {   print(NR "  Total= " total "  Unique= " unique) > "/dev/stderr"
}



##### Print the frequency list #####

END {   if(NOHEADER)
        {
            print("Frequency")
        } else {
            print("Frequency," printcsv(headertext)) ;
        }

        for (i in freq) 
		    printf("%15i,%s", freq[i], printcsv(i) "\n") | "sort /R" ;

    print("=====");
    print(total, "Total");
    print(unique, "Unique");
}
 			 

# EOF: PARSE-FREQ.AWK

   

1 files processed.