/*
 * Copyright (c) 2001
 * Pavel "EvilOne" Minayev
 *
 * Permission to use, copy, modify, distribute and sell this software
 * and its documentation for any purpose is hereby granted without fee,
 * provided that the above copyright notice appear in all copies and
 * that both that copyright notice and this permission notice appear
 * in supporting documentation.  Author makes no representations about
 * the suitability of this software for any purpose. It is provided
 * "as is" without express or implied warranty.
 
 * Updated by J C Calvarese, http://jcc_7.tripod.com/d/, 2003/12/18

 * Changed to process SQL syntax instead of D.
*/


import std.c.stdio;
import std.string;
import std.stream;


/* Colors for syntax highlighting, default values are
   Pavel Minayev's preferences in Microsoft Visual Studio editor   */
class Colors
{
    static char[] keyword = "0000FF";
    static char[] number  = "008000";
    static char[] string  = "000080";
    static char[] comment = "808080";
}

const int tabsize = 4;    /* number of spaces in tab */
const char[24] symbols = "()[]{}.,;:=<>+-*/%&|^!~?";
char[][] keywords;
version(talkative) int lineNumber = 0;

bit isspace(char c)
/* true if c is whitespace, false otherwise */
{
    version(talkative) if (c==10) lineNumber++;
    return std.string.find(whitespace, c) >= 0;
}

bit isalpha(char c)
/* true if c is a letter or an underscore, false otherwise */
{
    /* underscore doesn't differ from letters in D anyhow... */
    return c == '_' || std.string.find(letters, c) >= 0;
}

bit isdigit(char c)
/* true if c is a decimal digit, false otherwise */
{
    return cast(bit) ((std.string.find(digits, c) >= 0) ? true : (c == '_'));
}

bit ishexdigit(char c)
/* true if c is a hexadecimal digit, false otherwise */
{
    return cast(bit) ((std.string.find(hexdigits, c) >= 0) ? true : (c == '_'));
}

bit isoctdigit(char c)
/* true if c is an octal digit, false otherwise */
{
    return cast(bit) ((std.string.find(octdigits, c) >= 0) ? true : (c == '_'));
}

bit issymbol(char c)
/* true if c is legal D symbol other than above, false otherwise */
{
    return find(symbols, c) >= 0;
}

bit iskeyword(char[] token)
/* true if token is a D keyword, false otherwise */
{
    for (int i = 0; i < keywords.length; i++)
        if (!cmp(keywords[i], token))
            return true;
    return false;
}


char[] baseFilenameWithPath(char[] r)
{    /*    By J C Calvarese
           Returns the path & filename without the extension. */

    int i;
    char[] s;


    version(very_talkative) printf("Looking for \".\" in \"%.*s\"...\n\0", r);
    if(r.length > 0) i = rfind(r, '.');

    version(very_talkative) printf("Result: %d\n\0", i);
    
    if (i > 0) s = r[0..i];
    else s = r;
    version(very_talkative) printf("Return baseFilenameWithPath\n\0");
    
    return s;
}


int main(char[][] args)
{
    bit outputIdentifiers;
    File idFile;
    File dst;
    
    version(very_talkative) printf("Starting program...\n\0");
    
    /* need help? */
    if (args.length < 2) // || args.length > 3)
    {
        printf("SQL to HTML converter\n"
               "Usage: SQL2HTML program.d [file.html] [-i]\n"
               \n
               "program.sql: source file\nfile.html: output file\n"
               "-i:        produce list of identifiers\n\0");
        return 0;
    }
    
    /* auto-name output file */
    char[] fn = baseFilenameWithPath(args[1]) ~ ".html";
    char[] fnIdent = baseFilenameWithPath(args[1]) ~ ".id";
    
    version(very_talkative) printf("Checking argument list...\n\0");

    if (args.length > 2) for(int i = 2; i < args.length; i++)
    {
        if(args[i] == "-i")
        { 
            version(very_talkative) printf("Produce identifier list: true.\n\0");
            outputIdentifiers = true;
            fnIdent = null;
            fnIdent = baseFilenameWithPath(args[1]) ~ ".id";
        }
        else
        {  
            if(args[i].length > 2)
                if(args[i][0..2] == "-i")
                {
                    version(very_talkative) printf("Produce identifier list (with specific filename): true.\n\0");
                    fnIdent = null; 
                    fnIdent = args[i][2..args[i].length];
                }
                else
                {
                    version(very_talkative) printf("Override default output name.\n\0");
                    fn = null; 
                    fn = args[i]; /* override auto-name */
                }
            else
            {
                version(very_talkative) printf("Override default output name.\n\0");
                fn = null; 
                fn = args[i]; /* override auto-name */
            }
        }
    }
    
    /* load keywords */
    version(talkative) printf("Reading in keywords...\t\0");
    File kwd = new File("sql2html.kwd");
    while (!kwd.eof())
    {
        keywords.length = keywords.length + 1;
        keywords[][keywords.length - 1] = kwd.readLine();
    }
    version(talkative) 
    {
        printf("Read in keywords.\n\0");
        for(int i=0; i < keywords.length; i++)
            printf(cast(char*) (keywords[i] ~ "\t" ~ "\0"));
        version(talkative) printf("\n\0");
    }
    kwd.close();
    
    /* open input and output files */
    File src = new File(args[1]);
    dst = new File;
    dst.create(fn);
    
    if(outputIdentifiers)
    { 
        idFile = new File;
        idFile.create(fnIdent);
    }

    /* write HTML header */
    dst.writeLine("<html><head><title>" ~ args[1] ~ "</title></head>");
    dst.writeLine("<body color='#000000' bgcolor='#FFFFFF'><code>");
    
    /* the main part is wrapped into try..catch block because
       when end of file is reached, an exception is raised;
       so we can omit any checks for EOF inside this block... */

    try
    {
        int linestart = 0;    /* for tabs */
        char c;
        src.read(c);
        while (true)
        {
            if (isspace(c))        /* whitespace */
            {
                do
                {
                    if (c == 9) /* tab character */
                    {
                        /* expand tabs to spaces */
                        int spaces = tabsize -
                            (src.position() - linestart) % tabsize;
                        for (int i = 0; i < spaces; i++)
                            dst.writeString(" ");
                        linestart = src.position() - tabsize + 1;
                    }
                    else
                    {
                        /* reset line start on newline */

                        if (c == 10 || c == 13)
                        {  
                            linestart = src.position() + 1;
                            if (c==10) dst.writeLine(cast(char[]) "<br>");

                        }
                        dst.write(c);
                    }

                    src.read(c);
                
                } while (isspace(c));
            }
            else if (isalpha(c))    /* keyword or identifier */
            {
                char[] token;
                do
                {
                    token ~= c;
                    src.read(c);
                } while (isalpha(c) || isdigit(c));
                if (iskeyword(toupper(token)))    /* keyword */    /* make case-insensitive for SQL */
                    dst.writeString("<font color='#" ~ Colors.keyword ~
                        "'>" ~ toupper(token) ~ "</font>");
                else    /* simple identifier */
                {    
                    /* add identifier to the identifier list, if desired */
                
                    if(outputIdentifiers)
                        idFile.writeString(token ~ \n);
                
                    
                    dst.writeString(token);
                    
                }
            }
            else if (c == '0')    /* binary, octal or hexadecimal number */
            {
                dst.writeString("<font color='#" ~ Colors.number ~ "'>");
                dst.write(c);
                src.read(c);
                if (c == 'X' || c == 'x')    /* hexadecimal */
                {
                    dst.write(c);
                    src.read(c);
                    while (ishexdigit(c))
                    {
                        dst.write(c);
                        src.read(c);
                    }
                }
                else if (c == 'B' || c == 'b')    /* binary */
                {
                    dst.write(c);
                    src.read(c);
                    while (c == '0' || c == '1' || c == '_' )
                    {
                        dst.write(c);
                        src.read(c);
                    }
                                    }
                else    /* octal */
                {
                    do
                    {
                        dst.write(c);
                        src.read(c);
                    } while (isoctdigit(c))
                    {
                        dst.write(c);
                        src.read(c);
                    }
                }
                dst.writeString("</font>");
            }
            else if (isdigit(c))    /* decimal number */
            {
                dst.writeString("<font color='#" ~ Colors.number ~ "'>");
                /* integral part */
                do
                {
                    dst.write(c);
                    src.read(c);
                } while (isdigit(c));
                /* fractional part */
                if (c == '.')
                {
                    dst.write(c);
                    src.read(c);
                    while (isdigit(c))
                    {
                        dst.write(c);
                        src.read(c);
                    }
                }
                /* scientific notation */
                if (c == 'E' || c == 'e')
                {
                    dst.write(c);
                    src.read(c);
                    if (c == '+' || c == '-')
                    {
                        dst.write(c);
                        src.read(c);
                    }
                    while (isdigit(c))
                    {
                        dst.write(c);
                        src.read(c);
                    }
                }
                /* suffixes */
                while (c == 'U' || c == 'u' || c == 'L' ||
                    c == 'l' || c == 'F' || c == 'f')
                {
                    dst.write(c);
                    src.read(c);
                }
                dst.writeString("</font>");                
            }
            else if (c == '\\')   /* naked escape sequence (\) */
            {
                dst.writeString("<font color='#" ~ Colors.string ~ "'>");
                char prev;  /* used to handle \" properly */
                dst.write(c);
                prev = c;
                src.read(c);
                dst.write(c);
                src.read(c);
                dst.writeString("</font>");
            }
            else if (c == '"')    /* string (") with escape sequences  */
            {
                dst.writeString("<font color='#" ~ Colors.string ~ "'>");
                char prev;    /* used to handle \" properly */
                bit isEscape;
                do
                {
                    if (c == '<')    /* special symbol in HTML */
                        dst.writeString("&lt;");
                    else
                        dst.write(c);
                    
                    prev = c;
                    if ((prev == '\\' && c == '"') || (prev == '\\' && c == '\\') && !isEscape) 
                        isEscape = true;
                    else
                        isEscape = false;
                    
                    src.read(c);
                } while (isEscape || c != '"');    
                
                dst.write(c);
                src.read(c);
                dst.writeString("</font>");
            }
            else if (c == '\'')    /* character (') with escape sequences  */
            {
                dst.writeString("<font color='#" ~ Colors.string ~ "'>");
                char prev;    /* used to handle \" properly */
                bit isEscape;
                do
                {
                    if (c == '<')    /* special symbol in HTML */
                        dst.writeString("&lt;");
                    else
                        dst.write(c);
                        
                    prev = c;
                    
                    if ((prev == '\\' && c == '\'') || (prev == '\\' && c == '\\') && !isEscape) 
                        isEscape = true;
                    else
                        isEscape = false;
                    
                    src.read(c);
                } while (isEscape || c != '\'');
                dst.write(c);
                src.read(c);
                dst.writeString("</font>");
            }
            else if (c == '`')    /* character (`) with no escape sequences */
            {
                dst.writeString("<font color='#" ~ Colors.string ~ "'>");
                do
                {
                    if (c == '<')    /* special symbol in HTML */
                        dst.writeString("&lt;");
                    else
                        dst.write(c);
                    src.read(c);
                } while (!(c == '`')); /* 96 */
                dst.write(c);
                src.read(c);
                dst.writeString("</font>");
            }
            else if (issymbol(c))    /* either operator or comment */
            {
                if (c == '<')    /* special symbol in HTML */
                {
                    dst.writeString("&lt;");
                    src.read(c);
                }
                else if (c == '/')    /* could be a comment... */
                {
                    src.read(c);
                    if (c == '/')    /* single-line one */
                    {
                        dst.writeString("<font color='#" ~ Colors.comment ~ "'>/");
                        while (c != 10)
                        {
                            if (c == '<')    /* special symbol in HTML */
                                dst.writeString("&lt;");
                            else if (c == 9)
                            {
                                /* expand tabs */
                                int spaces = tabsize -
                                    (src.position() - linestart) % tabsize;
                                for (int i = 0; i < spaces; i++)
                                    dst.writeString(" ");
                                linestart = src.position() - tabsize + 1;
                            }
                            else
                                dst.write(c);
                            src.read(c);
                        }
                        dst.writeString("</font>");
                    }
                    else if (c == '*')    /* multi-line one */
                    {
                        dst.writeString("<font color='#" ~ Colors.comment ~ "'>/");
                        char prevprev;
                        char prev = '/';
                        do
                        {
                            if (c == '<')    /* special symbol in HTML */
                                dst.writeString("&lt;");
                            else if (c == 9)
                            {
                                /* expand tabs */
                                int spaces = tabsize -
                                    (src.position() - linestart) % tabsize;
                                for (int i = 0; i < spaces; i++)
                                    dst.writeString(" ");
                                linestart = src.position() - tabsize + 1;
                            }
                            else
                            {
                                /* reset line start on newline */
                                if (c == 10 || c == 13)
                                    linestart = src.position() + 1;
                                dst.write(c);
                            }
                            prevprev = prev;
                            prev = c;
                            src.read(c);
                        } while (!(prevprev != '/' && prev == '*' && c == '/'));
                        dst.write(c);
                        dst.writeString("</font>");
                        src.read(c);
                    }
                    else if (c == '+')    /* nestable multi-line comment */
                    {
                        int level = 0;
                        
                        dst.writeString("<font color='#" ~ Colors.comment ~ "'>/");
                        char prevprev = '/';
                        char prev = '+';
                        do
                        {
                            if (c == '<')    /* special symbol in HTML */
                                dst.writeString("&lt;");
                            else if (c == 9)
                            {
                                /* expand tabs */
                                int spaces = tabsize -
                                    (src.position() - linestart) % tabsize;
                                for (int i = 0; i < spaces; i++)
                                    dst.writeString(" ");
                                linestart = src.position() - tabsize + 1;
                            }
                            else
                            {
                                /* reset line start on newline */
                                if (c == 10 || c == 13)
                                    linestart = src.position() + 1;
                                dst.write(c);
                                
                                if (prev == '/' && c == '+') 
                                    level++;
                                
                                if (prevprev != '/' && prev == '+' && c == '/') 
                                    level--;
                                
                            }
                            prevprev = prev;
                            prev = c;
                            src.read(c);
                        } while (!( prev == '+' && c == '/' && level == 0));
                        dst.write(c);
                        dst.writeString("</font>");
                        src.read(c);
                    }
                    else    /* just an operator */
                        dst.write('/');
                }
                else    /* just an operator */
                {
                    dst.write(c);
                    src.read(c);
                }
            }
            else
                {
                    /* whatever it is, it's not a valid D token */
                    version(talkative) 
                    { 
                        printf("%d\t%d\n", c, lineNumber);
                    }
                    throw new Error("unrecognized token");
                }
        }
    }

    /* if end of file is reached and we try to read something
       with typed read(), a ReadError is thrown; in our case,
       this means that job is successfully done               */
    catch (ReadError e)
    {
        src.close();
        
        /* write HTML footer */
        dst.writeLine("</code></pre></body></html>");
        dst.close();
        
        if(outputIdentifiers)
            idFile.close();
        
    }
    return 0;
}