pARSE-FREQ.AWK
Generated on Tue Dec 05 17:39:06 Eastern Standard Time 2006 from pARSE-FREQ.AWK
# Program : PARSE-FREQ.AWK
# Purpose : Use the PARSECSV.AWK subroutine to count the frequency of an arbitrary field
# Date : 19 October 2004
# Author : Bob Jonkman <bjonkman@sobac.com>
# Copyright 2008 Bob Jonkman and/or SOBAC Microcomputer Services
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Usage : gawk -f library.awk -f parse-freq.awk [-v FIELDNUM=x | -v FIELDNAME=nnn] [-v NOHEADER=(0|1)] inputfiles > outputfile.csv
# Variables : FIELDNAME (optional) Matches field header for counting
# FIELDNUM (optional) If FIELDNAME does not exist selects field number for counting (0 = whole record)
# NOHEADER (optional) Do not treat the first record as field headers
BEGIN { FS = "," ;
OFS = "," ;
SUBSEP = "," ;
QUOTE = "\"" ;
FIELD_SEPARATOR = "," ;
FIELD_DELIMITER = QUOTE ;
RECORD_SEPARATOR = "\n";
IGNORECASE = 1;
}
##### Read the headers #####
(NR==1) && !NOHEADER { numfields = parsecsv($0,headers);
if(FIELDNAME)
{
for(i=1; i<=numfields; i++)
{
if(FIELDNAME == headers[i])
countfield = i ;
}
if(!countfield)
{
print("FIELDNAME= " FIELDNAME " does not exist") > "/dev/stderr" ;
for(i in headers)
print("headers[" i "]= " headers[i] ) > "/dev/stderr" ;
exit ;
}
} else {
countfield = FIELDNUM ; # 0 means complete record is counted
}
# Provide a reasonable value of headertext, even if the complete record is selected (FIELDNUM == 0)
if(countfield)
headertext = headers[countfield] ;
else
headertext = $0 ;
}
##### End of Read the Headers #####
$0 == "=====" { nextfile ;
}
(NR!=1) || NOHEADER { if(countfield)
{
parsecsv($0,record);
field = record[countfield] ;
} else {
field = $0 ;
}
total++ ;
if(!freq[field]) unique++ ;
freq[field]++ ;
}
!(NR % 10000) { print(NR " Total= " total " Unique= " unique) > "/dev/stderr"
}
##### Print the frequency list #####
END { if(NOHEADER)
{
print("Frequency")
} else {
print("Frequency," printcsv(headertext)) ;
}
for (i in freq)
printf("%15i,%s", freq[i], printcsv(i) "\n") | "sort /R" ;
print("=====");
print(total, "Total");
print(unique, "Unique");
}
# EOF: PARSE-FREQ.AWK
1 files processed.