''' A horrible, horrible script which converts a specific subset of C++ to D. (specifically, the subset used by DMD) Coded by Andy Friesen 20 July 2004 Distribution / Code rights: Use this source code in any fashion you see fit. Giving me credit where credit is due is optional, depending on your own levels of integrity and honesty. ''' import sys import re QUIET = False INDENT = ' ' COUNT = 0 def getNumber(): 'Just a function which returns some number.' global COUNT COUNT += 1 return COUNT cTypeMap = ( # multi-word: must be handled first. ('unsigned char', 'ubyte'), ('unsigned short', 'ushort'), ('unsigned int', 'uint'), ('unsigned long', 'uint'), ('unsigned long long', 'ulong'), ('long double', 'real'), ('long long', 'long'), ('unsigned char*', 'ubyte[]'), ('unsigned char *', 'ubyte[]'), ('unsigned short*', 'wchar[]'), ('unsigned short *', 'wchar[]'), ('int', 'int'), ('char', 'char'), ('short', 'short'), ('int', 'uint'), ('unsigned', 'uint'), ('wchar_t', 'wchar'), # dchar? ('wchar', 'wchar'), # dchar? ('long', 'int'), ('longlong', 'long'), ('ulonglong', 'ulong'), ('integer_t', 'integer_t'), ('size_t', 'size_t'), ('complex_t', 'complex_t'), ('void', 'void'), ('va_list', 'va_list'), ('...', '...'), # strings ('char *', 'char[]'), ('const char *', 'char[]'), ('wchar_t *', 'wchar[]'), ('const wchar_t *', 'wchar[]'), ) cTypeDict = dict(cTypeMap) versionMap = { '0' : 'None', '1' : 'True', 'LOGSEMANTIC' : 'LogSemantic', 'M_UNICODE' : 'Unicode', '_win32' : 'Windows', 'linux' : 'linux', } matchClassDecl = re.compile( r'struct ([a-zA-Z_][a-zA-Z0-9_]*)(?: *\: *([a-zA-Z]+))? *$' # no semicolon at the end ) matchAttr = re.compile( r' *([A-Za-z0-9_. ]+(?: \**)?)' # type name, possibly followed by a space and an asterisk r' *\b([A-Za-z0-9_]+)' # attribute name r' *;' # semicolon. ) matchMethodDecl = re.compile( r' *(?:virtual)?' # virtual keyword (maybe) r' *(?:static)?' # static (maybe) r' *([A-Za-z0-9_]+ +\**)' # identifier, possibly followed by a space and asterisk(s) r' *(?:[A-Za-z0-9_]*\:\:)?' # optional: class name (?!) Toss it. r' *([A-Za-z0-9_]+)' # method name r' *\((.*?)\)' # argument list r' *(?:;|(\{[^\}]*\}))' # semicolon or stuff in braces ) matchCtorDecl = re.compile( # matches destructors too r'()' # empty group to swallow type r' *(\~?[A-Za-z0-9_]+)' # optional tilde, name r' *\((.*?)\)' # argument list r' *(?:;|(\{[^\}]*\}))' # semicolon or body ) matchMethodDefn = re.compile( r' *[A-Za-z0-9_ ]+ +\**' # return type (identifiers, asterisks, etc) r' *([A-Za-z0-9_]+) *\:\:' # class type r' *([A-Za-z0-9_]+)' # method name r' *\((.*)\)' # argument list ) matchCtorDefn = re.compile( # matches destructors too r' *([A-Za-z0-9_]+) *\:\:' # class type r' *(\~?[A-Za-z0-9_]+)' # optional tilde, name r' *\((.*)\)' # argument list ) matchArg = re.compile( r' *([A-Za-z0-9_ ]+ *[\[\]\*]*)' # type name, spaces, asterisks r' *(\b[A-Za-z0-9_]+)?' # argument name (may not be present!) ) matchClassPointer = re.compile( r'(\b[A-Z_][A-Za-z0-9]*) \*' ) matchClassPointerCast = re.compile( r'\(([A-Z_][A-Za-z0-9]*) *\*\)' ) matchPrimitiveCast = re.compile( r'\(' # opening parenth r'(' # grab r'(?:[wd]?char)|' # char, wchar, dchar r'(?:wchar_t)|' # wchar_t r'(?:u?byte)|' # byte/ubyte r'(?:u?short)|' # short/ushort r'(?:u?int)|' # int/uint r'(?:u?long)|' # long/ulong r'(?:d_[a-z]+[0-9]+)|' # d_uns8, etc r'(?:[a-z]+_t)' # xxxxx_t r')' # end grab r' *' # whitespace r'(' # suffixes r'(?:\[\])?|' # trailing [] to indicate array r'(?:\**)' # stars r')' # close grab. Suffixes are optional r' *\)' # whitespace. Close parenth ) matchLongLong = re.compile( r'(' # grab r'(?:0x[0-9A-Fa-f_]+)|' # hex r'[0-9]+' # hex/octal r')' # end grab r'[lL]{2}' # trailing L's ) matchBrace = re.compile('([\{\}])') matchSizeof = re.compile( r'\bsizeof\(' r'([^\)]*)' r'\)' ) matchArray = re.compile( r'\bArray\b' ) def log(*args): if not QUIET: print >> sys.stderr, ' '.join(map(str, args)) def chomp(s): if s and s[-1] == '\n': return s[:-1] else: return s class Class(object): def __init__(self, name, base): self.name = name self.base = base self.methods = [] self.attributes = [] class Method(object): def __init__(self, name, retType, args, body): self.name = name self.retType = retType self.args = args self.body = body class Attribute(object): def __init__(self, name, type): self.name = name self.type = type def scanHeader(name): header = file(name + '.h', 'rt') result = {} # name : Class instance try: iterator = iter(header) while True: line = iterator.next().strip() if line == '' or line.startswith('//'): continue match = matchClassDecl.match(line) if match is not None: c = scanClass(match.group(1), match.group(2), iterator) assert c.name not in result log('\t',c.name) result[c.name] = c continue log("?:\t%s" % line) except StopIteration: pass return result def scanClass(name, base, iterator): cls = Class(convertType(name), convertType(base)) try: log('Class: %s' % name) while True: line = chomp(iterator.next().strip()) if line == '' or line == '{' or line.startswith('//') or line.startswith('#'): continue elif line == '};': log('-') break result = matchAttr.match(line) if result is not None: log('\tattr:\t%s' % line) name, type = result.group(2), result.group(1) cls.attributes.append(Attribute(name, convertType(type))) continue result = matchMethodDecl.match(line) or matchCtorDecl.match(line) if result is not None: log('\tmethod:\t%s' % repr(result.groups())) retType, name, args, body = result.groups() if body: log(`body`) body = [body] args = convertArgumentList(args) if retType: retType = convertType(retType) else: retType = None if name.startswith('~'): name = '~this' else: name = 'this' cls.methods.append(Method(name, retType, args, body)) continue log('??:%s' % repr(line)) except StopIteration: raise IOError, 'Unexpected end of file' return cls def scanSource(name, headerDecls): source = None try: source = file(name + '.c', 'rt') except IOError: source = file(name + '.cpp', 'rt') assert source is not None try: iterator = iter(source) while True: line = iterator.next().strip() if line == '' or line.startswith('//'): continue result = matchMethodDefn.match(line) or matchCtorDefn.match(line) if result is not None: className = convertType(result.group(1)) name = result.group(2).replace(className, 'this') # translate constructor so we can search on it args = convertArgumentList(result.group(3)) body = scanMethod(name, iterator) cls = headerDecls[className] methods = [m for m in cls.methods if m.name == name] assert(methods) # bad, fake overload resolution: # if only one method with the right name exists, use it. # Else, find one whose argument signature matches exactly # fail otherwise meth = None if len(methods) == 1: meth = methods[0] elif len(methods) > 1: overloads = ( # method with exactly the same sig [m for m in methods if m.args == args] or # same count (desperate here!!) [m for m in methods if len(m.args) == len(args)] ) assert(overloads) meth = overloads[0] meth.body = body log('\tMethod: %s.%s' % (className, name)) except StopIteration: pass return headerDecls def scanMethod(name, iterator): method = [] # suppress code in #if None blocks # always show code in #if True blocks suppressConst = True NORMAL = None NOTSKIP, SKIP = False, True # currently open #if's preprocStack = [] # number of indent levels (for prettifying) indent = 1 def skipState(s=None): s = s or (preprocStack and preprocStack[-1]) if not suppressConst: return NORMAL elif s == 'None': return SKIP elif s == 'True': return NOTSKIP else: return NORMAL def invertSkip(oldSkip): if not suppressConst: return NORMAL elif oldSkip == SKIP: return 'True' elif oldSkip == NOTSKIP: return 'None' else: print 'skip:\t%s\t%s' % (`skipState()`, `oldSkip`) return oldSkip def tokenize(): 'Helper function to stream tokens in.' while True: line = chomp(iterator.next()) # quick "preprocess" if line.startswith("#if"): arg = line[line.index(' ') + 1:].strip() arg = versionMap.get(arg, arg.capitalize()) preprocStack.append(arg) if skipState(arg) and suppressConst: line = '' else: line = 'version (%s) {' % arg elif line.startswith('#else'): assert len(preprocStack) if skipState() is not NORMAL: # flip it around preprocStack[-1] = invertSkip(preprocStack[-1]) line = '' else: line = '} else {' elif line.startswith('#endif'): assert len(preprocStack) if skipState() is not NORMAL: line = '' else: line = '} // end version %s' % preprocStack[-1] preprocStack.pop() line = (line .replace('->', '.') .replace('::', '.') .replace('NULL', 'null') .replace('(void *)', 'cast(void*)') ) for k, v in cTypeMap: # potential bottleneck? line = re.sub(r'\b%s\b' % re.escape(k), v, line) # match sizeof(x) before matching cast line = matchSizeof.sub(r'\1.sizeof', line) line = matchPrimitiveCast.sub(r'cast(\1\2)', line) line = matchClassPointerCast.sub(r'cast(\1)', line) line = matchClassPointer.sub(r'\1 ', line) line = matchLongLong.sub(r'\1L', line) line = matchArray.sub('_ARRAY_', line) oneLineIndent = False for l in filter(bool, matchBrace.split(line)): if suppressConst and preprocStack and preprocStack[-1] in ('True', 'None'): pass else: yield l.strip() try: iter = tokenize() t = iter.next() if t.startswith(':'): # initializer list name = re.match('\: *([a-zA-Z_][a-zA-Z0-9_]+)\(', t).group(1) pos = t.index(name) + len(name) if name[0].isupper(): # HACK: assume super(...) if initializer list starts with something that resembles a type method.append(INDENT * indent + 'super%s;' % t[pos:]) t = iter.next() if t == ',': log("NYI") assert False # no initializer list, or already processed if t == '{': pass else: log(`t`) assert False except StopIteration: raise IOError, 'Unexpected end of file' try: while True: line = iter.next() if line: if line == '}': indent -= 1 if indent == 0: break method.append(INDENT * indent + line) if line == '{': indent += 1 except StopIteration: pass return method def convertType(oldType): 'Converts a C++ syntax type to a D syntax type.' if not oldType: return '' oldType = oldType.strip().replace('const ', '') if not oldType: return '' if oldType in cTypeDict: return cTypeDict[oldType] elif oldType.startswith('enum '): # drop enum prototype return oldType.replace('enum ', '') elif oldType.startswith('Array *'): # Array* gets replaced with something greppable. # Need a human to figure out what sort of array # it is. return '_ARRAY_' elif oldType in ('Object', 'Object *'): # special case: need to convert Object to RootObject so as to not conflict with std.object.Object return 'RootObject' elif oldType[0].isupper(): if oldType.endswith(' *'): # pointer to class: drop the pointer notation, if present return oldType[:-2] else: return oldType elif oldType.endswith('*'): # pointer to C type? Make the pointer associate to the left. pos = oldType.find('*') if pos != -1 and oldType[pos - 1] == ' ': oldType = oldType[:pos - 1] + oldType[pos:] return oldType else: log("convertType: Can't convert %s" % `oldType`) return oldType assert False def convertArgument(arg): if not arg: return '' result = matchArg.match(arg) if result is None: log("Could not convert argument %s" % repr(arg)) return result # ??? else: type, name = result.groups() # heuristics again. Multi-word types may in actual fact just be the type and the name all in one. if not name and ' ' in type and type[-1] not in '[]*': p = type.rindex(' ') type, name = type[:p], type[p+1:] if not name: name = 'unnamed%i' % getNumber() result = (convertType(type), name) return '%s %s' % result def convertArgumentList(args): args = args.split(',') numArgs = len(args) newArgs = [convertArgument(a) for a in args if not a.isspace()] assert len(args) == len(newArgs) return newArgs def createHeader(name): # take the text, cut the leading whitespace, fill in the blanks, etc. ____ = name # the blank. ;) return (''' module dd.%(____)s; // Auto conversion from %(____)s.h and %(____)s.c of the DMD compiler. // Converter coded by Andy Friesen // Copyright (c) 1999-2002 by Digital Mars // All Rights Reserved // written by Walter Bright // www.digitalmars.com // License for redistribution is by either the Artistic License // in artistic.txt, or the GNU General Public License in gnu.txt. // See the included readme.txt for details. import dd.mars; ''' % locals() ).replace('\n ', '\n') def dump(name, decls): INDENT = ' ' * 3 destFile = file(name + '.d', 'wt') print >> destFile, createHeader(name) for name, decl in decls.iteritems(): if decl.base: print >> destFile, 'class %s : %s {' % (decl.name, decl.base) else: print >> destFile, 'class %s' % decl.name for meth in decl.methods: if meth.retType is None: # c/dtor print >> destFile, INDENT, '%s(%s)' % (meth.name, ', '.join(meth.args)), else: print >> destFile, INDENT, '%s %s(%s)' % (meth.retType, meth.name, ', '.join(meth.args)), if meth.body is not None: print >> destFile, ' {' for line in meth.body or []: print >> destFile, INDENT, line print >> destFile, INDENT, '}' print >> destFile, '' else: print >> destFile, ';' if decl.methods: print >> destFile, '' for attr in decl.attributes: print >> destFile, INDENT, '%s %s;' % (attr.type, attr.name) print >> destFile, '}' print >> destFile, '' def main(*args): assert len(args) > 1 for name in args[1:]: QUIET = True decls = scanHeader(name) QUIET = False scanSource(name, decls) dump(name, decls) if __name__ == '__main__': main(*sys.argv)