XTAB.AWK
Generated on Tue Dec 05 17:39:05 Eastern Standard Time 2006 from XTAB.AWK
# Program : XTAB.AWK
# Purpose : Create a cross-tab file from a .CSV file
# Author : Bob Jonkman <bjonkman@sobac.com>
# Copyright 2008 Bob Jonkman and/or SOBAC Microcomputer Services
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Date : 9 March 2004
# Modified : 15 June 2005 (made generic based on XTAB-VERSION-PO.AWK)
# Usage : gawk -f xtab.awk -f library.csv ([-v TIMESLOT=(0|1)] | [-v DATESLOT=(0|1)]) [-v XFIELDNUM=x | -v XFIELDNAME=xname] [-v YFIELDNUM=y | -v YFIELDNAME=yname] inputfile.csv > outputfile.csv
# Note : XFIELD is across, YFIELD is down
# Variables : XFIELDNAME = (optional) the fieldheader for column values
# XFIELDNUM = (optional) the fieldnumber for column values if there is no XFIELDNAME
# YFIELDNAME = (optional) the fieldheader for row values
# YFIELDNUM = (optional) the fieldnumber for row values if there is no YFIELDNAME
# TIMESLOT = (optional) Treat the YFIELD as Datetime, slotted by hour
# DATESLOT = (optional) Treat the YFIELD as Datetime, slotted by date
BEGIN { ORS = "" ;
FS = "," ;
QUOTE = "\"" ;
COMMA = "," ;
FIELD_DELIMITER = QUOTE ;
FIELD_SEPARATOR = COMMA ;
}
##### Read the headers #####
(FNR == 1) { numfields = parsecsv($0,headers)
for (i in headers)
{
if(headers[i] == XFIELDNAME)
xfield = i ;
if(headers[i] == YFIELDNAME)
yfield = i ;
}
if(!xfield) # if XFIELDNAME doesn't exist
xfield = XFIELDNUM ;
if(!xfield) # if XFIELDNUM doesn't exist either
xfield = 1 ;
if(!yfield) # if YFIELDNAME doesn't exist
yfield = YFIELDNUM ;
if(!yfield)
yfield = 2 ; # if YFIELDNUM doesn't exist either
}
(!(NR % 10000)) { print("NR=" NR " X=" seenx " Y=" seeny "\n" ) > "/dev/stderr"
}
(FNR != 1) { if($0 == "=====") # Don't process CSV footers
nextfile ;
parsecsv($0, record) ;
if (TIMESLOT || DATESLOT) # truncate to hour or date
{
# print("##### DEBUG ##### NR=" NR " if(TIMESLOT || DATESLOT): TIMESLOT=" TIMESLOT " DATESLOT=" DATESLOT "\n") > "/dev/stderr"
# separate Date (datetime[1]) from Time (datetime[2])
split(record[yfield],datetime," ") ;
if (TIMESLOT)
{
# print("##### DEBUG ##### NR=" NR " if(TIMESLOT): TIMESLOT=" TIMESLOT " DATESLOT=" DATESLOT "\n") > "/dev/stderr"
# separate Time into Hour (time[1]), Minute (time[2]) and Second (time[3])
split(datetime[2],time,":") ;
# Reconstruct truncated field
record[yfield] = datetime[1] " " time[1] ":00"
}
else # if (DATESLOT)
{
# print("##### DEBUG ##### NR=" NR " elseif (DATESLOT): TIMESLOT=" TIMESLOT " DATESLOT=" DATESLOT "\n") > "/dev/stderr"
record[yfield] = datetime[1]
}
}
# Count the number of X fields seen
if (!xcount[record[xfield]])
{
seenx++;
xnames[seenx] = record[xfield]; # itemize the x names
# print("seenx =====DEBUG=====", seenx, xnames[seenx] "\n" );
}
xcount[record[xfield]]++ ;
# Count the number of Y fields seen
if (!ycount[record[yfield]])
{
seeny++;
ynames[seeny] = record[yfield] ;
# print("seeny =====DEBUG=====", seeny, ynames[seeny] "\n");
}
ycount[record[yfield]]++;
# count the entry
crosstab[record[xfield],record[yfield]]++ ;
}
# Post-process: print headers, totals, and the crosstab array
END {
asort(xnames) ; # ensure the top headers print in alphabetical (or numerical) order
# print headers
print(printcsv("'" headers[yfield] "' by '" headers[xfield] "'"));
for (i=1; i <= seenx; i++)
print(COMMA printcsv(xnames[i]));
print(COMMA COMMA "Totals\n" ); # Newline to complete the headers
# print the crosstab
for (i=1; i <= seeny; i++)
{
print(printcsv(ynames[i])) | "SORT" ;
ytotal += ycount[ynames[i]];
for (j=1; j <= seenx; j++)
print(COMMA crosstab[xnames[j], ynames[i] ] ) | "SORT" ;
print(COMMA COMMA ycount[ynames[i]]) | "SORT" ; # Print blank column, row total
print( "\n" ) | "SORT" ; # Newline to complete this row
}
print("=====\n");
print("Totals")
for (j=1; j <= seenx; j++)
print(COMMA xcount[xnames[j]]);
print(COMMA COMMA ytotal);
print("\n\n");
print("Unique entries:\n")
print(printcsv(headers[yfield]) COMMA seeny "\n")
print(printcsv(headers[xfield]) COMMA seenx "\n")
}
# EOF: XTAB.AWK
1 files processed.