/* * Copyright (c) 2001 * Pavel "EvilOne" Minayev * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Author makes no representations about * the suitability of this software for any purpose. It is provided * "as is" without express or implied warranty. * Updated by J C Calvarese, http://jcc_7.tripod.com/d/, 2003/12/18 * Changed to process SQL syntax instead of D. */ import std.c.stdio; import std.string; import std.stream; /* Colors for syntax highlighting, default values are Pavel Minayev's preferences in Microsoft Visual Studio editor */ class Colors { static char[] keyword = "0000FF"; static char[] number = "008000"; static char[] string = "000080"; static char[] comment = "808080"; } const int tabsize = 4; /* number of spaces in tab */ const char[24] symbols = "()[]{}.,;:=<>+-*/%&|^!~?"; char[][] keywords; version(talkative) int lineNumber = 0; bit isspace(char c) /* true if c is whitespace, false otherwise */ { version(talkative) if (c==10) lineNumber++; return std.string.find(whitespace, c) >= 0; } bit isalpha(char c) /* true if c is a letter or an underscore, false otherwise */ { /* underscore doesn't differ from letters in D anyhow... */ return c == '_' || std.string.find(letters, c) >= 0; } bit isdigit(char c) /* true if c is a decimal digit, false otherwise */ { return cast(bit) ((std.string.find(digits, c) >= 0) ? true : (c == '_')); } bit ishexdigit(char c) /* true if c is a hexadecimal digit, false otherwise */ { return cast(bit) ((std.string.find(hexdigits, c) >= 0) ? true : (c == '_')); } bit isoctdigit(char c) /* true if c is an octal digit, false otherwise */ { return cast(bit) ((std.string.find(octdigits, c) >= 0) ? true : (c == '_')); } bit issymbol(char c) /* true if c is legal D symbol other than above, false otherwise */ { return find(symbols, c) >= 0; } bit iskeyword(char[] token) /* true if token is a D keyword, false otherwise */ { for (int i = 0; i < keywords.length; i++) if (!cmp(keywords[i], token)) return true; return false; } char[] baseFilenameWithPath(char[] r) { /* By J C Calvarese Returns the path & filename without the extension. */ int i; char[] s; version(very_talkative) printf("Looking for \".\" in \"%.*s\"...\n\0", r); if(r.length > 0) i = rfind(r, '.'); version(very_talkative) printf("Result: %d\n\0", i); if (i > 0) s = r[0..i]; else s = r; version(very_talkative) printf("Return baseFilenameWithPath\n\0"); return s; } int main(char[][] args) { bit outputIdentifiers; File idFile; File dst; version(very_talkative) printf("Starting program...\n\0"); /* need help? */ if (args.length < 2) // || args.length > 3) { printf("SQL to HTML converter\n" "Usage: SQL2HTML program.d [file.html] [-i]\n" \n "program.sql: source file\nfile.html: output file\n" "-i: produce list of identifiers\n\0"); return 0; } /* auto-name output file */ char[] fn = baseFilenameWithPath(args[1]) ~ ".html"; char[] fnIdent = baseFilenameWithPath(args[1]) ~ ".id"; version(very_talkative) printf("Checking argument list...\n\0"); if (args.length > 2) for(int i = 2; i < args.length; i++) { if(args[i] == "-i") { version(very_talkative) printf("Produce identifier list: true.\n\0"); outputIdentifiers = true; fnIdent = null; fnIdent = baseFilenameWithPath(args[1]) ~ ".id"; } else { if(args[i].length > 2) if(args[i][0..2] == "-i") { version(very_talkative) printf("Produce identifier list (with specific filename): true.\n\0"); fnIdent = null; fnIdent = args[i][2..args[i].length]; } else { version(very_talkative) printf("Override default output name.\n\0"); fn = null; fn = args[i]; /* override auto-name */ } else { version(very_talkative) printf("Override default output name.\n\0"); fn = null; fn = args[i]; /* override auto-name */ } } } /* load keywords */ version(talkative) printf("Reading in keywords...\t\0"); File kwd = new File("sql2html.kwd"); while (!kwd.eof()) { keywords.length = keywords.length + 1; keywords[][keywords.length - 1] = kwd.readLine(); } version(talkative) { printf("Read in keywords.\n\0"); for(int i=0; i < keywords.length; i++) printf(cast(char*) (keywords[i] ~ "\t" ~ "\0")); version(talkative) printf("\n\0"); } kwd.close(); /* open input and output files */ File src = new File(args[1]); dst = new File; dst.create(fn); if(outputIdentifiers) { idFile = new File; idFile.create(fnIdent); } /* write HTML header */ dst.writeLine("
");
/* the main part is wrapped into try..catch block because
when end of file is reached, an exception is raised;
so we can omit any checks for EOF inside this block... */
try
{
int linestart = 0; /* for tabs */
char c;
src.read(c);
while (true)
{
if (isspace(c)) /* whitespace */
{
do
{
if (c == 9) /* tab character */
{
/* expand tabs to spaces */
int spaces = tabsize -
(src.position() - linestart) % tabsize;
for (int i = 0; i < spaces; i++)
dst.writeString(" ");
linestart = src.position() - tabsize + 1;
}
else
{
/* reset line start on newline */
if (c == 10 || c == 13)
{
linestart = src.position() + 1;
if (c==10) dst.writeLine(cast(char[]) "
");
}
dst.write(c);
}
src.read(c);
} while (isspace(c));
}
else if (isalpha(c)) /* keyword or identifier */
{
char[] token;
do
{
token ~= c;
src.read(c);
} while (isalpha(c) || isdigit(c));
if (iskeyword(toupper(token))) /* keyword */ /* make case-insensitive for SQL */
dst.writeString("" ~ toupper(token) ~ "");
else /* simple identifier */
{
/* add identifier to the identifier list, if desired */
if(outputIdentifiers)
idFile.writeString(token ~ \n);
dst.writeString(token);
}
}
else if (c == '0') /* binary, octal or hexadecimal number */
{
dst.writeString("");
dst.write(c);
src.read(c);
if (c == 'X' || c == 'x') /* hexadecimal */
{
dst.write(c);
src.read(c);
while (ishexdigit(c))
{
dst.write(c);
src.read(c);
}
}
else if (c == 'B' || c == 'b') /* binary */
{
dst.write(c);
src.read(c);
while (c == '0' || c == '1' || c == '_' )
{
dst.write(c);
src.read(c);
}
}
else /* octal */
{
do
{
dst.write(c);
src.read(c);
} while (isoctdigit(c))
{
dst.write(c);
src.read(c);
}
}
dst.writeString("");
}
else if (isdigit(c)) /* decimal number */
{
dst.writeString("");
/* integral part */
do
{
dst.write(c);
src.read(c);
} while (isdigit(c));
/* fractional part */
if (c == '.')
{
dst.write(c);
src.read(c);
while (isdigit(c))
{
dst.write(c);
src.read(c);
}
}
/* scientific notation */
if (c == 'E' || c == 'e')
{
dst.write(c);
src.read(c);
if (c == '+' || c == '-')
{
dst.write(c);
src.read(c);
}
while (isdigit(c))
{
dst.write(c);
src.read(c);
}
}
/* suffixes */
while (c == 'U' || c == 'u' || c == 'L' ||
c == 'l' || c == 'F' || c == 'f')
{
dst.write(c);
src.read(c);
}
dst.writeString("");
}
else if (c == '\\') /* naked escape sequence (\) */
{
dst.writeString("");
char prev; /* used to handle \" properly */
dst.write(c);
prev = c;
src.read(c);
dst.write(c);
src.read(c);
dst.writeString("");
}
else if (c == '"') /* string (") with escape sequences */
{
dst.writeString("");
char prev; /* used to handle \" properly */
bit isEscape;
do
{
if (c == '<') /* special symbol in HTML */
dst.writeString("<");
else
dst.write(c);
prev = c;
if ((prev == '\\' && c == '"') || (prev == '\\' && c == '\\') && !isEscape)
isEscape = true;
else
isEscape = false;
src.read(c);
} while (isEscape || c != '"');
dst.write(c);
src.read(c);
dst.writeString("");
}
else if (c == '\'') /* character (') with escape sequences */
{
dst.writeString("");
char prev; /* used to handle \" properly */
bit isEscape;
do
{
if (c == '<') /* special symbol in HTML */
dst.writeString("<");
else
dst.write(c);
prev = c;
if ((prev == '\\' && c == '\'') || (prev == '\\' && c == '\\') && !isEscape)
isEscape = true;
else
isEscape = false;
src.read(c);
} while (isEscape || c != '\'');
dst.write(c);
src.read(c);
dst.writeString("");
}
else if (c == '`') /* character (`) with no escape sequences */
{
dst.writeString("");
do
{
if (c == '<') /* special symbol in HTML */
dst.writeString("<");
else
dst.write(c);
src.read(c);
} while (!(c == '`')); /* 96 */
dst.write(c);
src.read(c);
dst.writeString("");
}
else if (issymbol(c)) /* either operator or comment */
{
if (c == '<') /* special symbol in HTML */
{
dst.writeString("<");
src.read(c);
}
else if (c == '/') /* could be a comment... */
{
src.read(c);
if (c == '/') /* single-line one */
{
dst.writeString("/");
while (c != 10)
{
if (c == '<') /* special symbol in HTML */
dst.writeString("<");
else if (c == 9)
{
/* expand tabs */
int spaces = tabsize -
(src.position() - linestart) % tabsize;
for (int i = 0; i < spaces; i++)
dst.writeString(" ");
linestart = src.position() - tabsize + 1;
}
else
dst.write(c);
src.read(c);
}
dst.writeString("");
}
else if (c == '*') /* multi-line one */
{
dst.writeString("/");
char prevprev;
char prev = '/';
do
{
if (c == '<') /* special symbol in HTML */
dst.writeString("<");
else if (c == 9)
{
/* expand tabs */
int spaces = tabsize -
(src.position() - linestart) % tabsize;
for (int i = 0; i < spaces; i++)
dst.writeString(" ");
linestart = src.position() - tabsize + 1;
}
else
{
/* reset line start on newline */
if (c == 10 || c == 13)
linestart = src.position() + 1;
dst.write(c);
}
prevprev = prev;
prev = c;
src.read(c);
} while (!(prevprev != '/' && prev == '*' && c == '/'));
dst.write(c);
dst.writeString("");
src.read(c);
}
else if (c == '+') /* nestable multi-line comment */
{
int level = 0;
dst.writeString("/");
char prevprev = '/';
char prev = '+';
do
{
if (c == '<') /* special symbol in HTML */
dst.writeString("<");
else if (c == 9)
{
/* expand tabs */
int spaces = tabsize -
(src.position() - linestart) % tabsize;
for (int i = 0; i < spaces; i++)
dst.writeString(" ");
linestart = src.position() - tabsize + 1;
}
else
{
/* reset line start on newline */
if (c == 10 || c == 13)
linestart = src.position() + 1;
dst.write(c);
if (prev == '/' && c == '+')
level++;
if (prevprev != '/' && prev == '+' && c == '/')
level--;
}
prevprev = prev;
prev = c;
src.read(c);
} while (!( prev == '+' && c == '/' && level == 0));
dst.write(c);
dst.writeString("");
src.read(c);
}
else /* just an operator */
dst.write('/');
}
else /* just an operator */
{
dst.write(c);
src.read(c);
}
}
else
{
/* whatever it is, it's not a valid D token */
version(talkative)
{
printf("%d\t%d\n", c, lineNumber);
}
throw new Error("unrecognized token");
}
}
}
/* if end of file is reached and we try to read something
with typed read(), a ReadError is thrown; in our case,
this means that job is successfully done */
catch (ReadError e)
{
src.close();
/* write HTML footer */
dst.writeLine("
");
dst.close();
if(outputIdentifiers)
idFile.close();
}
return 0;
}