/* * Copyright (c) 2001 * Pavel "EvilOne" Minayev * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Author makes no representations about * the suitability of this software for any purpose. It is provided * "as is" without express or implied warranty. * Updated by J C Calvarese, http://jcc_7.tripod.com/d/, 2003/12/18 * Changed to process SQL syntax instead of D. */ import std.c.stdio; import std.string; import std.stream; /* Colors for syntax highlighting, default values are Pavel Minayev's preferences in Microsoft Visual Studio editor */ class Colors { static char[] keyword = "0000FF"; static char[] number = "008000"; static char[] string = "000080"; static char[] comment = "808080"; } const int tabsize = 4; /* number of spaces in tab */ const char[24] symbols = "()[]{}.,;:=<>+-*/%&|^!~?"; char[][] keywords; version(talkative) int lineNumber = 0; bit isspace(char c) /* true if c is whitespace, false otherwise */ { version(talkative) if (c==10) lineNumber++; return std.string.find(whitespace, c) >= 0; } bit isalpha(char c) /* true if c is a letter or an underscore, false otherwise */ { /* underscore doesn't differ from letters in D anyhow... */ return c == '_' || std.string.find(letters, c) >= 0; } bit isdigit(char c) /* true if c is a decimal digit, false otherwise */ { return cast(bit) ((std.string.find(digits, c) >= 0) ? true : (c == '_')); } bit ishexdigit(char c) /* true if c is a hexadecimal digit, false otherwise */ { return cast(bit) ((std.string.find(hexdigits, c) >= 0) ? true : (c == '_')); } bit isoctdigit(char c) /* true if c is an octal digit, false otherwise */ { return cast(bit) ((std.string.find(octdigits, c) >= 0) ? true : (c == '_')); } bit issymbol(char c) /* true if c is legal D symbol other than above, false otherwise */ { return find(symbols, c) >= 0; } bit iskeyword(char[] token) /* true if token is a D keyword, false otherwise */ { for (int i = 0; i < keywords.length; i++) if (!cmp(keywords[i], token)) return true; return false; } char[] baseFilenameWithPath(char[] r) { /* By J C Calvarese Returns the path & filename without the extension. */ int i; char[] s; version(very_talkative) printf("Looking for \".\" in \"%.*s\"...\n\0", r); if(r.length > 0) i = rfind(r, '.'); version(very_talkative) printf("Result: %d\n\0", i); if (i > 0) s = r[0..i]; else s = r; version(very_talkative) printf("Return baseFilenameWithPath\n\0"); return s; } int main(char[][] args) { bit outputIdentifiers; File idFile; File dst; version(very_talkative) printf("Starting program...\n\0"); /* need help? */ if (args.length < 2) // || args.length > 3) { printf("SQL to HTML converter\n" "Usage: SQL2HTML program.d [file.html] [-i]\n" \n "program.sql: source file\nfile.html: output file\n" "-i: produce list of identifiers\n\0"); return 0; } /* auto-name output file */ char[] fn = baseFilenameWithPath(args[1]) ~ ".html"; char[] fnIdent = baseFilenameWithPath(args[1]) ~ ".id"; version(very_talkative) printf("Checking argument list...\n\0"); if (args.length > 2) for(int i = 2; i < args.length; i++) { if(args[i] == "-i") { version(very_talkative) printf("Produce identifier list: true.\n\0"); outputIdentifiers = true; fnIdent = null; fnIdent = baseFilenameWithPath(args[1]) ~ ".id"; } else { if(args[i].length > 2) if(args[i][0..2] == "-i") { version(very_talkative) printf("Produce identifier list (with specific filename): true.\n\0"); fnIdent = null; fnIdent = args[i][2..args[i].length]; } else { version(very_talkative) printf("Override default output name.\n\0"); fn = null; fn = args[i]; /* override auto-name */ } else { version(very_talkative) printf("Override default output name.\n\0"); fn = null; fn = args[i]; /* override auto-name */ } } } /* load keywords */ version(talkative) printf("Reading in keywords...\t\0"); File kwd = new File("sql2html.kwd"); while (!kwd.eof()) { keywords.length = keywords.length + 1; keywords[][keywords.length - 1] = kwd.readLine(); } version(talkative) { printf("Read in keywords.\n\0"); for(int i=0; i < keywords.length; i++) printf(cast(char*) (keywords[i] ~ "\t" ~ "\0")); version(talkative) printf("\n\0"); } kwd.close(); /* open input and output files */ File src = new File(args[1]); dst = new File; dst.create(fn); if(outputIdentifiers) { idFile = new File; idFile.create(fnIdent); } /* write HTML header */ dst.writeLine("" ~ args[1] ~ ""); dst.writeLine(""); /* the main part is wrapped into try..catch block because when end of file is reached, an exception is raised; so we can omit any checks for EOF inside this block... */ try { int linestart = 0; /* for tabs */ char c; src.read(c); while (true) { if (isspace(c)) /* whitespace */ { do { if (c == 9) /* tab character */ { /* expand tabs to spaces */ int spaces = tabsize - (src.position() - linestart) % tabsize; for (int i = 0; i < spaces; i++) dst.writeString(" "); linestart = src.position() - tabsize + 1; } else { /* reset line start on newline */ if (c == 10 || c == 13) { linestart = src.position() + 1; if (c==10) dst.writeLine(cast(char[]) "
"); } dst.write(c); } src.read(c); } while (isspace(c)); } else if (isalpha(c)) /* keyword or identifier */ { char[] token; do { token ~= c; src.read(c); } while (isalpha(c) || isdigit(c)); if (iskeyword(toupper(token))) /* keyword */ /* make case-insensitive for SQL */ dst.writeString("" ~ toupper(token) ~ ""); else /* simple identifier */ { /* add identifier to the identifier list, if desired */ if(outputIdentifiers) idFile.writeString(token ~ \n); dst.writeString(token); } } else if (c == '0') /* binary, octal or hexadecimal number */ { dst.writeString(""); dst.write(c); src.read(c); if (c == 'X' || c == 'x') /* hexadecimal */ { dst.write(c); src.read(c); while (ishexdigit(c)) { dst.write(c); src.read(c); } } else if (c == 'B' || c == 'b') /* binary */ { dst.write(c); src.read(c); while (c == '0' || c == '1' || c == '_' ) { dst.write(c); src.read(c); } } else /* octal */ { do { dst.write(c); src.read(c); } while (isoctdigit(c)) { dst.write(c); src.read(c); } } dst.writeString(""); } else if (isdigit(c)) /* decimal number */ { dst.writeString(""); /* integral part */ do { dst.write(c); src.read(c); } while (isdigit(c)); /* fractional part */ if (c == '.') { dst.write(c); src.read(c); while (isdigit(c)) { dst.write(c); src.read(c); } } /* scientific notation */ if (c == 'E' || c == 'e') { dst.write(c); src.read(c); if (c == '+' || c == '-') { dst.write(c); src.read(c); } while (isdigit(c)) { dst.write(c); src.read(c); } } /* suffixes */ while (c == 'U' || c == 'u' || c == 'L' || c == 'l' || c == 'F' || c == 'f') { dst.write(c); src.read(c); } dst.writeString(""); } else if (c == '\\') /* naked escape sequence (\) */ { dst.writeString(""); char prev; /* used to handle \" properly */ dst.write(c); prev = c; src.read(c); dst.write(c); src.read(c); dst.writeString(""); } else if (c == '"') /* string (") with escape sequences */ { dst.writeString(""); char prev; /* used to handle \" properly */ bit isEscape; do { if (c == '<') /* special symbol in HTML */ dst.writeString("<"); else dst.write(c); prev = c; if ((prev == '\\' && c == '"') || (prev == '\\' && c == '\\') && !isEscape) isEscape = true; else isEscape = false; src.read(c); } while (isEscape || c != '"'); dst.write(c); src.read(c); dst.writeString(""); } else if (c == '\'') /* character (') with escape sequences */ { dst.writeString(""); char prev; /* used to handle \" properly */ bit isEscape; do { if (c == '<') /* special symbol in HTML */ dst.writeString("<"); else dst.write(c); prev = c; if ((prev == '\\' && c == '\'') || (prev == '\\' && c == '\\') && !isEscape) isEscape = true; else isEscape = false; src.read(c); } while (isEscape || c != '\''); dst.write(c); src.read(c); dst.writeString(""); } else if (c == '`') /* character (`) with no escape sequences */ { dst.writeString(""); do { if (c == '<') /* special symbol in HTML */ dst.writeString("<"); else dst.write(c); src.read(c); } while (!(c == '`')); /* 96 */ dst.write(c); src.read(c); dst.writeString(""); } else if (issymbol(c)) /* either operator or comment */ { if (c == '<') /* special symbol in HTML */ { dst.writeString("<"); src.read(c); } else if (c == '/') /* could be a comment... */ { src.read(c); if (c == '/') /* single-line one */ { dst.writeString("/"); while (c != 10) { if (c == '<') /* special symbol in HTML */ dst.writeString("<"); else if (c == 9) { /* expand tabs */ int spaces = tabsize - (src.position() - linestart) % tabsize; for (int i = 0; i < spaces; i++) dst.writeString(" "); linestart = src.position() - tabsize + 1; } else dst.write(c); src.read(c); } dst.writeString(""); } else if (c == '*') /* multi-line one */ { dst.writeString("/"); char prevprev; char prev = '/'; do { if (c == '<') /* special symbol in HTML */ dst.writeString("<"); else if (c == 9) { /* expand tabs */ int spaces = tabsize - (src.position() - linestart) % tabsize; for (int i = 0; i < spaces; i++) dst.writeString(" "); linestart = src.position() - tabsize + 1; } else { /* reset line start on newline */ if (c == 10 || c == 13) linestart = src.position() + 1; dst.write(c); } prevprev = prev; prev = c; src.read(c); } while (!(prevprev != '/' && prev == '*' && c == '/')); dst.write(c); dst.writeString(""); src.read(c); } else if (c == '+') /* nestable multi-line comment */ { int level = 0; dst.writeString("/"); char prevprev = '/'; char prev = '+'; do { if (c == '<') /* special symbol in HTML */ dst.writeString("<"); else if (c == 9) { /* expand tabs */ int spaces = tabsize - (src.position() - linestart) % tabsize; for (int i = 0; i < spaces; i++) dst.writeString(" "); linestart = src.position() - tabsize + 1; } else { /* reset line start on newline */ if (c == 10 || c == 13) linestart = src.position() + 1; dst.write(c); if (prev == '/' && c == '+') level++; if (prevprev != '/' && prev == '+' && c == '/') level--; } prevprev = prev; prev = c; src.read(c); } while (!( prev == '+' && c == '/' && level == 0)); dst.write(c); dst.writeString(""); src.read(c); } else /* just an operator */ dst.write('/'); } else /* just an operator */ { dst.write(c); src.read(c); } } else { /* whatever it is, it's not a valid D token */ version(talkative) { printf("%d\t%d\n", c, lineNumber); } throw new Error("unrecognized token"); } } } /* if end of file is reached and we try to read something with typed read(), a ReadError is thrown; in our case, this means that job is successfully done */ catch (ReadError e) { src.close(); /* write HTML footer */ dst.writeLine("
"); dst.close(); if(outputIdentifiers) idFile.close(); } return 0; }