fnameslot.awk
Generated on Tue Dec 05 17:39:06 Eastern Standard Time 2006 from fnameslot.awk
# Program: FNAMESLOT.AWK
# Purpose: Parse through USER.CSV (from GWCONV) counting firstnames
# Date : 25 October 2002
# Author : Bob Jonkman <bjonkman@sobac.com>
# Copyright 2008 Bob Jonkman and/or SOBAC Microcomputer Services
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Input format: Domain, Postoffice, ObjectID, Surname, Firstname...
# A firstname is the text block to the first space, ie for a field "Bob John" the firstname is "Bob"
# Modified : 2005-01-13 - Output sorted list
# : 2005-02-08 - Added "Unique" and "Total" and "Length"
BEGIN { FS = ","
OFS = ","
SUBSEP = ","
IGNORECASE = 1
}
!(NR % 1000) { print(NR) > "/dev/stderr"
}
NR != 1 {
parsecsv($0,record)
split(record[5],fnameparts," "); # Split multiple names on spaces
if (!firstnamelist[fnameparts[1]]) unique++ ; # Count unique names
firstnamelist[fnameparts[1]]++ ; # Count this name
total++ ; # Count all names
}
# end of scanning
# Post-process: print headers, totals, and the timeslot array
END { print("Frequency,Firstname,Length");
for (i in firstnamelist)
{
printf("%8i,%s,%i\n", firstnamelist[i],printcsv(i),length(i)) | "sort /R" ;
}
print("=====") ;
print(total ",Total");
print(unique ",Unique");
}
# EOF: FNAMESLOT.AWK
1 files processed.