Code Files

csv2html.awk

Generated on Tue Dec 05 17:39:04 Eastern Standard Time 2006 from csv2html.awk


# Program    : CSV2HTML.AWK
# Purpose    : Convert a CSV file into an HTML table
# Date       : 18 February 2005
# Author     : Bob Jonkman

# Modified   : 3 February 2006 - Added navaids "Go to the top of this page"
#              31 March 2006   - Changed default from HTTP to NOHTTP primarily in dynamic pages
#                              - Added other code (eg. QUERY_STRING) for dynamic pages
#              25 Sept 2006    - Added "Prev, Next" navigational aids

# Usage      : gawk -f CSV2HTML.AWK -f LIBRARY.AWK [-v TITLE=title] [-v PLOT=(0|1)] [-v LINK=(0|1)] [-v NOHEADER=(0|1)] [-v NOHTTP=(0|1)] 1*(filename.csv)
#               http:\\hostname\csv2html.awk?FILENAME=filename.csv[1*(|filename.csv)][&TITLE=title][&PLOT=(0|1)][&LINK=(0|1)][&NOHEADER=(0|1)]
# Note       : Use a literal | to separate filenames in URI

# Variables  :   TITLE = HTML <title> and <h1> elements for the page
#                PLOT  = Places FILENAME.png image element on right
#                LINK  = Creates link to FILENAME for each table
#                NOHEADER = Does not place the first line of CSV file in THEAD entity
#                HTTP    = Emits a "Content-type: text/html" line for dynamic pages


function makemenu(topofpage)    {
    # Create header/footer menu
    # parameter==TRUE then include "Top of page" link
            print("   <div class=" QUOTE "menu" QUOTE ">") 
            print("    <p>In this page:</p>") ;
            print("    <ul>" )

            if (topofpage)  
                print("     <li>[<a href=" QUOTE "#top-of-page" QUOTE " class=" QUOTE "internal" QUOTE ">Top of this page</a>] </li>")

            for(i=1; i<ARGC; i++)
            {
                print("     <li>[<a class=" QUOTE "internal" QUOTE " href=" QUOTE "#" valnameid(ARGV[i]) QUOTE ">" txt2html(ARGV[i]) "</a>] </li>") 
            }
            print("   </ul>")
            print("  </div> <!-- end of menu  -->")
}




BEGIN    {  QUOTE = "\"" 
            FIELD_SEPARATOR = "," 
            FIELD_DELIMITER = "\"" 
            RECORD_SEPARATOR = "\n"

            parsecgi(ENVIRON["QUERY_STRING"],qstring)

# print("<!--                         ##### DEBUG #####")
# print("ARGC= " ARGC)                ##### DEBUG #####
# for(i in ARGV)                      ##### DEBUG #####
#     print("ARGV[" i "]= " ARGV[i])  ##### DEBUG #####
# print("-->")                        ##### DEBUG #####


            if(ARGC == 1)   # no files on the command line
            {
                if(qstring["FILENAME"])
                {
                    ARGC = 1 + split(qstring["FILENAME"],ARGV,"|")
                }
                NOHTTP = 0      # if there are filenames in qstring["FILENAME"] then always emit an HTTP style header
            }
            else
                NOHTTP = 1  # if there are filenames on the command line then do not emit an HTTP style header

            if(!TITLE)
                TITLE = qstring["TITLE"]

            if(!TITLE)          # if there is still no title then...
                TITLE = "Tables from CSV files"

            if(!PLOT)
                PLOT = qstring["PLOT"]

            if(!LINK)
                LINK = qstring["LINK"]

            if(!NOHEADER)
                NOHEADER = qstring["NOHEADER"]


        printhtmlhead(TITLE)

        filenum = 0 


        print(" <body><a name=" QUOTE "top-of-page" QUOTE " id=" QUOTE "top-of-page" QUOTE "></a>")
        print("  <h1>" makehtml(TITLE) "</h1>")

        if (ARGC == 1)
        {   print("  <div id=" QUOTE "content" QUOTE ">")
            print("   <p>No files specified, nothing to do!</p>")

#            print("<!-- ##### DEBUG ##### ARGC= " ARGC)
#            for(i in ARGV)  ##### DEBUG #####
#                print("ARGV[" i "]= " ARGV[i])  ##### DEBUG ##### -->")

            exit    # this will process END{}
        }

        if (ARGC > 2)       # If there is more than one file on the page
            makemenu(0) ;   #   create header menu without "Top of page" link

        print("  <div id=" QUOTE "content" QUOTE ">")

    }

(FILENAME != oldfilename )    {
# print("<!-- ##### DEBUG ##### FILENAME= " FILENAME " (FILENAME != oldfilename) FILENAME= " FILENAME " oldfilename= " oldfilename " -->")

        if(filenum)        # check if we've processed multiple files
                {    if(footerflag == 2)
                    {   print("    </tfoot>") 
                        footerflag = 0 
                    }
                    if(bodyflag == 2)
                    {   print("    </tbody>") 
                        bodyflag = 0 
                    }

                    print("   </table>") 
                    print("") 
                }

        footerflag  = 0    # 0 == none  1 == to be written 2 == currently active  
        bodyflag    = 0 
        
        oldfilename = FILENAME 
        filenum++ 

        print("   <h2 id=" QUOTE valnameid(ARGV[filenum]) QUOTE)
        if (filenum != 1)   #  Suppress page-break-before on the first file
            print("        style=" QUOTE "page-break-before: always ; " QUOTE )

        print("       >")

        if (ARGC > 2)   # Display "Prev, Top, Next" navigational aids
        {
            print("    <span class=" QUOTE "internal navaid" QUOTE ">")

            if (filenum != 1)       # suppress "Previous" for first file    
            {
                print("     <a href=" QUOTE "#" valnameid(ARGV[filenum-1]) QUOTE)
                print("        title=" QUOTE "Previous Table: " txt2html(ARGV[filenum-1]) QUOTE)
                print("     >&lt;</a>")
            } else
            {   print("     <span class=" QUOTE "disabled" QUOTE ">&lt;</span>")
            }

            print("     <a href=" QUOTE "#top-of-page" QUOTE)
            print("        title=" QUOTE "Go to the top of this page" QUOTE)
            print("     >^</a>")

            if (filenum < ARGC-1 )    # suppress "Next" for last file
            {
                print("     <a href=" QUOTE "#" valnameid(ARGV[filenum+1]) QUOTE)
                print("        title=" QUOTE "Next Table: " txt2html(ARGV[filenum+1]) QUOTE)
                print("     >&gt;</a>")
            } else
            {   print("     <span class=" QUOTE "disabled" QUOTE ">&gt;</span>")
            }
        print("    </span>")
        }
# print("<!-- ##### DEBUG #####  filenum=" filenum ", ARGC=" ARGC " -->")

        print("   " txt2html(FILENAME) "</h2>" )


        print("   <p>Generated on " strftime() " from ") 

        if(LINK)
            print("    <a href=" QUOTE txt2uri(FILENAME) QUOTE ">" txt2html(FILENAME) "</a>") 
        else
            print("    " txt2html(FILENAME))

        print("   </p>")
        print("") 
        if(PLOT)            
        {   print("   <img src=" QUOTE txt2uri(FILENAME) ".png" QUOTE )
            print("        alt=" QUOTE "Plot of " txt2html(FILENAME) QUOTE )
            print("        style=" QUOTE "float: right ; " QUOTE )
            print("        />") 
        }
        print("   <table") 
        print("    border=" QUOTE "1" QUOTE ) 
#        print("    style=" QUOTE "border: thin solid black ;" QUOTE ) 
        print("    summary=" QUOTE "Generated on " strftime() " from " txt2html(FILENAME) QUOTE) 
        print("         >") 
}

((FNR == 1) && (!NOHEADER))  {        # Do the headers, except if NOHEADER is specified
# print("<!-- ##### DEBUG ##### FILENAME= " FILENAME " (FNR == 1) " FNR " -->")
        print("    <thead>") 
        print("     <tr>") 
        delete hdr 
        
        numfields = parsecsv($0,header) 
    
        for(i=1; i <= numfields; i++)
        {
            hdr[i] = "hdr" i "file" filenum 
            print("      <td") 
            print("       id=" QUOTE hdr[i] QUOTE )      # doesn't need validnameid(hdr[i]) because we've generated hdr[i]
#           print("       style=" QUOTE "border: thin solid ;" QUOTE )
            print("         >" makehtml(header[i]) "</td>") 
        }

        print("     </tr>") 
        print("    </thead>") 

        bodyflag = 1   # ready to write tbody

        next 
}


($0 == "=====") {   # My convention for signalling footers in CSV files is "====="
# print("<!-- ##### DEBUG ##### FILENAME= " FILENAME " ($0 == '=====') " FNR " -->")

                    if(bodyflag == 2)
                    {   print("    </tbody>") 
                    }

                    bodyflag   = 0     # Body is always done after "====="
                    footerflag = 1     # Ready to write tfoot
                    next               # Don't display the footer separator itself
}

((FNR > 1) || (NOHEADER))    {  
# print("<!-- ##### DEBUG ##### FILENAME= " FILENAME " (FNR > 1) " FNR " -->")
    
        if (bodyflag == 1)
        {   print("    <tbody>")
            bodyflag = 2
        }

        if (footerflag == 1)
        {   print("    <tfoot>")
            footerflag = 2
        }

        numfields = parsecsv($0,row) 
                
        print("     <tr>") 
        print("      <td")       # Note that the first element in a column could be replaced with <th></th>
        if (hdr[1])
            print("           headers=" QUOTE hdr[1] QUOTE ) 
        print("         >" makehtml(row[1]) "</td>")    

        for(i=2; i <= numfields; i++)
        {    print("      <td") 
            if (hdr[i])
               print("      headers=" QUOTE hdr[i] QUOTE ) 
#           print("      style=" QUOTE "border: thin solid ;" QUOTE ) 
            print("          >" makehtml(row[i]) "</td>") 
        }

        print("     </tr>") 
    }

END {    if(filenum)    # Tables are ended only if files have been processed
        {    if(footerflag == 2)
                print("    </tfoot>") 
             if(bodyflag == 2)
                print("    </tbody>") 

            print("   </table>") 
        }
        print("   <p>" filenum " files processed.</p>") 
        print("  </div> <!-- end of content -->") 

        if (ARGC > 2)       # If there is more than one file on the page
        {   print("  <div id=" QUOTE "footer" QUOTE ">")
            makemenu(1) ;   #   Create footer menu with "Top of page" link
            print("  </div> <!-- end of footer -->")
        }

        print(" </body>")     
        print("</html>") 
    }

# EOF: CSV2HTML.AWK
   

1 files processed.