GWClientID.awk
Generated on Tue Dec 05 17:39:06 Eastern Standard Time 2006 from GWClientID.awk
# Program : GWCLIENTID.AWK
# Purpose : Extract the GW Client Program Release and Date from the HTTP Monitor capture
# Date : 23 September 2003
# Author : Bob Jonkman <bjonkman@sobac.com>
# Copyright 2008 Bob Jonkman and/or SOBAC Microcomputer Services
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Note : The input files are saved from the GW POA HTTP Monitor
# eg. http://cotadm.example.com:3800/conn
# Modified : 30 June 2006 - Output Network Location (instead of Network Ordinal)
# : 4 Aug 2006 - Changed lookup of Network Location to use NetRange instead of NetOrdinal
# : - Changed date format to yyyy-mm-dd (RFC3339) for Client Release Date and Login Time
# : 26 October 2006 - Added "Access Mode" field (from rconn connections)
function getitem( strng) { getline; getline;
print("\n##### DEBUG ##### $0= " $0 "\n") ;
strng = substr($0,index($0,">")+1);
return(substr(strng,0,index(strng,"<")-1));
}
function matchnth(string, regexp, nth) { matchstring = string
while(nth)
{
# print("##### DEBUG ##### string= " string "\n")
# print("##### DEBUG ##### nth= " nth "\n")
match(string,regexp)
matchedstring = substr(string,RSTART,RLENGTH)
string = substr(string,RSTART+RLENGTH)
# print("##### DEBUG ##### matchedstring = " matchedstring "\n")
# print("##### DEBUG ##### RSTART= " RSTART "\n")
# print("##### DEBUG ##### RLENGTH= " RLENGTH "\n")
nth--
}
# print("##### DEBUG ##### finalmatchedstring= " matchedstring "\n")
# print("##### DEBUG ##### finalnth= " nth "\n")
# print("##### DEBUG ##### finalRSTART= " RSTART "\n")
# print("##### DEBUG ##### finalRLENGTH= " RLENGTH "\n")
return(matchedstring)
}
BEGIN { ORS="" ;
QUOTE="\"" ;
IGNORECASE = 1 ; # Different versions of the GW POA return field labels with different capitalization *grump*
CONVFMT = "%i" # Ensure that strtonum() results in an integer, not exponential notation
idomain = "@sobac.com" ;
# Output Field Names:
field[0] = "Postoffice"
field[1] = "GroupWise User ID"
field[2] = "Login Name"
field[3] = "User IP Address"
field[4] = "Login Time"
field[5] = "User Platform"
field[6] = "GroupWise Client Release"
field[7] = "Client Release Date"
field[8] = "Proxied User ID"
field[9] = "IPOrdinal"
field[10] = "BU"
field[11] = "Network Location"
field[12] = "Domain"
field[13] = "Access Mode"
for(i=0; i<=13; i++)
print(printcsv(field[i]) "," )
print("\n") ;
delete field
################################################
##### #####
##### Read Network Location #####
##### #####
################################################
# Structure of "NetworkLocations.txt" is
# NetmaskBits,Netmask,Network,Location,NetOrdinal,Netrange
# Field 1. NetmaskBits
# Field 2. Netmask
# Field 3. Network
# Field 4 Location
# Field 5. NetOrdinal
# Field 6. NetRange
getline < "NetworkLocations.txt"
parsecsv($0,networklocationheaders)
while ( getline < "NetworkLocations.txt")
{
# print("##### DEBUG ##### NR=" NR " $0=" $0 "\n") ;
parsecsv($0,rawnetloc)
networklocation[networklocationrecords ,"Location"] = rawnetloc[4]
networklocation[networklocationrecords ,"NetOrdinal"] = strtonum(rawnetloc[5])
networklocation[networklocationrecords++ ,"NetRange"] = strtonum(rawnetloc[6]) # increment counter AFTER using it!!!
}
# for(i=0;i<=networklocationrecords;i++) print("\n##### DEBUG ##### i=" i " Loc=" networklocation[i,"Location"] " NetOrdinal=" networklocation[i,"NetOrdinal"] " NetRange=" networklocation[i,"NetRange"])
##### End Read Network Location #####
################################################
} # BEGIN
!(FNR % 1000) { print(FILENAME, FNR "\n") > "/dev/stderr" ;
}
# { print("\n##### DEBUG #### $0=" $0 "\n") }
/<TITLE>/ { namestart = substr($0,index($0,"-") + 2)
# print("##### DEBUG ##### " namestart "\n" )
dotpos = index(namestart,".")
postoffice = substr(namestart,0,dotpos-1) ; # Postoffice
domain = substr(namestart,dotpos+1,index(namestart," ")-dotpos) ; # Domain
}
/HEIGHT=11/ {
print("##### DEBUG ##### Hit on 'HEIGHT=11'\n") ;
if(field[1]) # Do not print blank records
{
print(printcsv(postoffice) ",") # postoffice cannot be in field[], it's retrieved only once per file
for(i=1; i<=11; i++)
print(printcsv(field[i]) "," )
print(domain "\n") # domain cannot be in field[], see above
total++
}
delete field # Clear array in case the next record has blank fields
}
/GroupWise User ID/ {
print("##### DEBUG ##### hit on 'GroupWise User ID'\n");
field[1] = getitem()
if ( leftbracketpos = index(field[1],"(") ) # brackets indicate there's a proxied mailbox
{
field[8] = gwid2smtp(substr(field[1],leftbracketpos+1,index(field[1],")")-leftbracketpos-1)) idomain
field[1] = substr(field[1],0,leftbracketpos-3);
}
field[1] = gwid2smtp(field[1]) idomain
print ("##### DEBUG ##### 'GroupWise User ID' field[1]=" field[1] " field[8]=" field[8] "\n") ;
}
/Login Name/ { field[2] = getitem()
# print("\n##### DEBUG ##### field[2]= " field[2] "\n")
dot = index(field[2],".")
field[10] = dot ? reverse(substr(field[2],dot+1),".","_") : "" ; # BU - Blank string if there is no dot
field[2] = dot ? substr(field[2],1,dot-1) : field[2] ; # Login Name - Entire field if there is no dot
}
/User IP Address/ { field[3] = getitem()
# Determine the IPOrdinal for this IP address...
split(field[3],ipaddress,".")
field[9] = ipaddress[1]*16777216 + ipaddress[2]*65536 + ipaddress[3]*256 + ipaddress[4]
# Find this ipordinal in the networklocation table by brute force
for(counter = 0; (field[9] > networklocation[counter,"NetRange"]) || (counter > networklocationrecords) ; counter++)
; # do nothing, we're just finding the correct counter for this IPOrdinal
# print("\n##### DEBUG ##### counter=" counter " networklocation[counter,NetRange]=" networklocation[counter,"NetRange"] " field[9]=" field[9] "\n")
if (field[9] >= networklocation[counter,"NetOrdinal"] && field[9] <= networklocation[counter,"NetRange"])
{ field[11] = networklocation[counter,"Location"]
}
}
/Login Time/ { field[4] = getitem() # in "mm/dd/yyyy hh:mm:ss" format
split(field[4],datetime," ")
split(datetime[1],date,"/")
field[4] = date[3] "-" date[1] "-" date[2] " " datetime[2] # in "yyyy-mm-dd hh:mm:ss" format (space is allowed as a separator in RFC3339, we have no TZ)
}
/User platform/ { # field[5] = tolower(substr(getitem(),1,3)) # Truncate to three lowercase characters; different POA versions return different formats for Platform
field[5] = getitem()
}
/Access Mode/ {
print("##### DEBUG ##### Hit on 'Access Mode'\n") ;
field[13] = getitem() ;
# field[5] = "Batch Remote" # Put Batch Remote into User Platform field (omit from Old Clients)
}
/GroupWise Client Release/ { field[6] = rtrim(getitem()); # field[6] = Client Release ('unx' release number trails a space)
# print("##### DEBUG ##### $0= " $0 "\n") ;
field[7] = matchnth($0,"(>[\\- 0-9.ßB]*<)",3) ; # field[7] = Client Release Date in m-d-yyyy format
field[7] = substr(field[7],2,length(field[7])-2) # trim angle brackets
split(ltrim(field[7]),date,"-")
# print("##### DEBUG ##### field[7]=" field[7] " date[1]=" date[1] " date[2]=" date[2] " date[3]=" date[3])
field[7] = date[3] "-" rstring("00" date[1],2) "-" rstring("00" date[2],2) # Convert to yyyy-mm-dd format
}
END { print("=====\n")
print("Total," total "\n")
}
# EOF: GWCLIENTID.AWK
1 files processed.