Code Files

library.awk

Generated on Tue Dec 05 17:39:05 Eastern Standard Time 2006 from library.awk

# Program   : LIBRARY.AWK
# Purpose   : Contains functions common to many AWK scripts
# Author    : Bob Jonkman <bjonkman@sobac.com>

# Copyright 2008 Bob Jonkman and/or SOBAC Microcomputer Services

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Date      : 19 July 2005

# Contents  :
#             rstring()         - Returns the right-most n characters of string
#             max()             - Maximum of items in list
#             min()             - Minimum of items in list
#             trim()            - Trim whitespace from both ends of a string
#             rtrim()           - Trim whitespace from end (right side) of a string
#             ltrim()           - Trim whitespace from beginning (left side) of a string
#             gwid2smtp()       - escape reserved SMTP characters in GW Object
#                           see http://www.novell.com/documentation/gw55/index.html?page=/documentation/gw55/gw55ia/data/a30135u.html#a30135u 
#             smtp2gwid()       - un-escape 
#             reverse()         - reverse order of delimited string, eg. a.b.c -> c_b_a
#             txt2html()        - escape reserved HTML characters
#             txt2uri()         - escape reserved URI characters
#             makehtml()        - Create href links (http:// file:// mailto:) from text  (maito: was microformatted with class="vcard" on 2006-08-04)
#             parsecsv()        - Split Comma-Quote fields in string into an array
#             printcsv()        - Determine whether to print a field with delimiters
#             valnameid()       - Validate Name and ID token names (see http://www.w3.org/TR/html4/types.html#type-id )
#             printhtmlhead()   - print headers for an HTML file
#             parsecgi()        - Parse QUERY_STRING or POST data into an array
#             txt2gwapi()       - escape reserved characters in GW API files
#                           (see pg 42,43 "Keyword Ordering Requirements and Delimiters" in GroupWise API Gateway documentation)
#             getheaders()      - Create reverse lookup array for input string


function rstring(string,n)    {     #  Returns the right-most n characters of string
                                    return(substr(string,length(string)-n+1))
}



function max(a,b)   {   return(( a > b ) ? a : b)
}

function min(a,b)   {   return(( a < b ) ? a : b)
}

function trim(string)   {
                            string = rtrim(string) ;
                            string = ltrim(string) ;
                            return(string) ;
}

##### End trim #####





function rtrim(string)	{
                		    while (substr(string,length(string),1) ~ /[ \f\n\r\t\v]/ )	# While the last character is whitespace ([:space:] is better)
			                    string = substr(string,1,length(string)-1) ;	        #   trim off the last character
		                    return(string);
	}

##### End rtrim #####
        





function ltrim(string)  {
                        while (substr(string,1,1) ~ /[ \f\t]/)  # While the first character is whitespace 
                            string = substr(string,2) ;                 # trim off the first character
                        return(string);
}

##### End ltrim #####





function gwid2smtp(text,   smtptext)   # Perform GroupWise character translation
# http://www.novell.com/documentation/gw55/index.html?page=/documentation/gw55/gw55ia/data/a30135u.html#a30135u 
             {      
                 smtptext = text ;
                 gsub(/#/   ,"#h#",smtptext) ;
                 gsub(/_/   ,"#u#",smtptext) ;
                 gsub(/ /   ,"_"  ,smtptext) ;
                 gsub(/\(/  ,"#l#",smtptext) ;
                 gsub(/)/   ,"#r#",smtptext) ;
                 gsub(/,/   ,"#m#",smtptext) ;
                 gsub(/:/   ,"#c#",smtptext) ;
                 gsub(/\\/  ,"#b#",smtptext) ;
                 gsub(/=/   ,"#e#",smtptext) ;
                 gsub(/\//  ,"#s#",smtptext) ;
              
                 return(smtptext) ;
}

##### End gwid2smpt #####

function smtp2gwid(text,   gwidtext)  
         {  
             gwidtext = text ; 
             gsub(/#s#/,"/" ,gwidtext);
             gsub(/#e#/,"=" ,gwidtext);
             gsub(/#b#/,"\\",gwidtext);
             gsub(/#c#/,":" ,gwidtext);
             gsub(/#m#/,"," ,gwidtext);
             gsub(/#r#/,")" ,gwidtext);
             gsub(/#l#/,"(" ,gwidtext);
             gsub(/_/  ," " ,gwidtext);
             gsub(/#u#/,"_" ,gwidtext);
             gsub(/#h#/,"#" ,gwidtext);
             return(gwidtext) ;
}

##### End smtp2gwid #####




function reverse(instring,inseparator,outseparator,       numelements,array,i,outstring)
        {    
            numelements = split(instring, array, inseparator);
            outstring = array[numelements];
            for (i=numelements-1; i>0; i--)
                outstring = outstring outseparator array[i];
            return outstring;
        }

##### End reverse #####





function txt2html(text,   htmltext)
            {
            htmltext = text ;
            gsub(/\&/, "\\&amp;" ,htmltext)
            gsub(/>/ , "\\&gt;"  ,htmltext)
            gsub(/</ , "\\&lt;"  ,htmltext)
            gsub(/"/ , "\\&quot;",htmltext)

            return(htmltext) ;
            }

##### End txt2html #####


function txt2uri(text,   uritext)   # see RFC3986 (STD0066) section 2.2
            {
            uritext = text ;
            gsub(/%/   ,"%25",uritext)
            gsub(/ /   ,"%20",uritext)
            gsub(/:/   ,"%3A",uritext)
            gsub(/\//  ,"%2F",uritext)
            gsub(/\?/  ,"%3F",uritext)
            gsub(/#/   ,"%23",uritext)
            gsub(/\[/  ,"%5B",uritext)
            gsub(/]/   ,"%5D",uritext)
            gsub(/@/   ,"%40",uritext)
            gsub(/!/   ,"%21",uritext)
            gsub(/\$/  ,"%24",uritext)
            gsub(/&/   ,"%26",uritext)
            gsub(/'/   ,"%27",uritext)
            gsub(/\(/  ,"%28",uritext)
            gsub(/)/   ,"%29",uritext)
            gsub(/\*/  ,"%2A",uritext)
            gsub(/\+/  ,"%2B",uritext)
            gsub(/,/   ,"%2C",uritext)
            gsub(/;/   ,"%3B",uritext)
            gsub(/=/   ,"%3D",uritext)
            gsub(/\\/  ,"%5C",uritext)  # Backslash isn't part of RFC3986 (I think)

            return(uritext) ;
            }
##### End txt2uri #####


function uri2txt(uri,  text)
            {
            text = uri
            gsub(/%20/," "     ,text)
            gsub(/%3A/,":"     ,text)
            gsub(/%2F/,"/"     ,text)
            gsub(/%3F/,"?"     ,text)
            gsub(/%23/,"#"     ,text)
            gsub(/%5B/,"["     ,text)
            gsub(/%5D/,"]"     ,text)
            gsub(/%40/,"@"     ,text)
            gsub(/%21/,"!"     ,text)
            gsub(/%24/,"\\$"   ,text)
            gsub(/%26/,"\\&"   ,text)
            gsub(/%27/,"'"     ,text)
            gsub(/%28/,"\\("   ,text)
            gsub(/%29/,")"     ,text)
            gsub(/%2A/,"*"     ,text)
            gsub(/%2B/,"+"     ,text)
            gsub(/%2C/,","     ,text)
            gsub(/%3B/,";"     ,text)
            gsub(/%3D/,"="     ,text)
            gsub(/%0D%0A/,"\\n" ,text)

            gsub(/%5C/,"\\\\"  ,text)   # not part of RFC3986 ?

            gsub(/%25/,"%"     ,text)
            return(text)
            }


function makehtml(string,   htmlstring,path,gwiduri)    
        {   
          # print("<!-- ##### DEBUG ##### fieldarray= " string " substr= " substr(string,1,2) " -->") ;
            if(substr(string,1,2) == "\\\\")
            {
                    path = string ;
                    gsub(/\\\\/,"/",path) ;
                    htmlstring = "<a href=" QUOTE "file:///" txt2uri(path) QUOTE ">" txt2html(string) "</a>" ;
            }
            else if(substr(string,1,7) == "http://")  # If the string starts with http:// then assume it is already txt2uri converted
                    htmlstring = "<a href=" QUOTE "http://" substr(string,8) QUOTE ">" txt2html(string) "</a>" 
            else if(atpos=index(string, "@"))
                 {  htmlstring = "<span class=" QUOTE "vcard" QUOTE "><a class=" QUOTE "email fn" QUOTE " href=" QUOTE "mailto:" txt2html(string) QUOTE " title=" QUOTE "E-mail to " txt2html(string) QUOTE ">" txt2html(string) "</a>" ;    # E-mail address is not cleaned with txt2uri() so @ stays as symbol -- but txt2uri() may be necessary!
                    htmlstring = htmlstring "</span>"
                 }
            else htmlstring = txt2html(string) ;
                
            return(htmlstring) ;
}

##### End makehtml #####




function parsecsv(rawfield,fieldarray,    fieldnum,qflag,i,char)
	{	
		delete fieldarray ;
		qflag = 0 ;				# TRUE if inside field delimiters
		fieldnum = 1 ;

        if(!FIELD_SEPARATOR)
            FIELD_SEPARATOR = "," ;

        if(!FIELD_DELIMITER)
            FIELD_DELIMITER = "\"" ;


		for(i=1; i <= length(rawfield); i++)
		{   char     = substr(rawfield,i,1)
	                
			if (char == FIELD_DELIMITER)
            {
                if (substr(rawfield,i+1,1) == FIELD_DELIMITER)  # check for two adjacent field delimiters; treat as one character
                {   
                    i++     # skip over the next character, and add the delimiter to the output string
                    fieldarray[fieldnum] = fieldarray[fieldnum] FIELD_DELIMITER 
                }
                else
    				qflag = !qflag;
			
			
            }
			else if (char == FIELD_SEPARATOR)
			{	if (qflag)
					fieldarray[fieldnum] = fieldarray[fieldnum] FIELD_SEPARATOR ;
				
				
				else
				{	
					fieldnum++ ;
				}
				
				
			}
	
			else
			{	fieldarray[fieldnum] = fieldarray[fieldnum] char ;
			}		
	
		}

		return(fieldnum);
	}



#####  End of parsecsv() #####


function printcsv(field)    # Determine whether to print a field with delimiters
    {
        if(!FIELD_SEPARATOR)
            FIELD_SEPARATOR = "," ;

        if(!FIELD_DELIMITER)
            FIELD_DELIMITER = "\"" ;

# Double field delimiters to escape them
        gsub(FIELD_DELIMITER,FIELD_DELIMITER FIELD_DELIMITER,field)

# Apply field delimiters when field separator is in field
        if (field ~ FIELD_SEPARATOR)
            field = FIELD_DELIMITER field FIELD_DELIMITER
        return(field)
    }
            


function valnameid(text,   valtext)     # Validate NAME and ID token names
    {
        valtext = text
        gsub(/[^A-Za-z0-9\-_:\.]/, "_", valtext)
        if(substr(valtext,1,1) !~ /[A-Za-z]/)
        {
            valtext = "a" valtext
        }
        return(valtext)
    }

##### End of valnameid() #####



function printhtmlhead(title)       
    {       if (!QUOTE)
                QUOTE = "\""

            if (!NOHTTP)
                print("Content-type: text/html; charset=UTF-8\n")

    		print("<?xml version=" QUOTE "1.0" QUOTE " encoding=" QUOTE "utf-8" QUOTE "?>")
            print("<!DOCTYPE html");

# DTD needs to be "Transistional" because there may be "target" attributes in "A" elements
        print("     PUBLIC " QUOTE "-//W3C//DTD XHTML 1.0 Transitional//EN" QUOTE );
		print("     " QUOTE "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" QUOTE ">") ;

#       print("     PUBLIC " QUOTE "-//W3C//DTD XHTML 1.0 Strict//EN" QUOTE );
#		print("     " QUOTE "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" QUOTE ">") ;


		print("<html xmlns=" QUOTE "http://www.w3.org/1999/xhtml" QUOTE)
        print("      xml:lang=" QUOTE "en" QUOTE)
        print("      lang=" QUOTE "en" QUOTE ">" );
		print(" <head>" );
        print("  <meta http-equiv=" QUOTE "Content-Type" QUOTE " content=" QUOTE "text/html; charset=UTF-8" QUOTE " />") ;
        print("  <meta name=" QUOTE "generator" QUOTE " content=" QUOTE "CSV AWK Tools by Bob Jonkman bjonkman@sobac.com" QUOTE " />" );
        print("  <link rel=" QUOTE "stylesheet" QUOTE " href=" QUOTE "/default.css" QUOTE " type=" QUOTE "text/css" QUOTE  " />" ) ;

   		print("  <title>" txt2html(title) "</title>" )

		print(" </head>");
    }

##### End of printhtmlhead() #####



function parsecgi(inputstring,outputarray,     querystring,paramstring,numparam)
    {
        delete querystring
        delete outputarray
        numparam = split(inputstring,querystring,"&")
        for(i in querystring)
        {
            gsub(/\+/," ",querystring[i])            # remove + as space substitute
            split(querystring[i],paramstring,"=")
            outputarray[uri2txt(paramstring[1])] = uri2txt(paramstring[2])
        }
        return(numparam)
    }

##### End of parsecgi() #####



function txt2gwapi(text)
    {
        gsub(/;/  ,"\\;"  , text)    # escape semicolon to backslash semicolon
        gsub(/\"/ , "\\\"", text)    # escape unpaired double quote to backslash unpaired double quote
        return(text)
    }

##### End of txt2gwapi()



# Results of getheaders("Alpha,Beta,Gamma",outarray) 
#                           outarray["Alpha"] == 1
#                           outarray["Beta"]  == 2
#                           outarray["Gamma"] == 3

function    getheaders(instring,outarray)    {
                    numfields = parsecsv(instring,temparray)
                    for(i=1; i<=numfields; i++)
                        outarray[temparray[i]] = i ;
                    return(numfields)
}

##### End of getheaders()

# EOF: LIBRARY.AWK
1 files processed.