'''
A horrible, horrible script which converts a specific subset of C++ to D.
(specifically, the subset used by DMD)

Coded by Andy Friesen
20 July 2004

Distribution / Code rights:
 Use this source code in any fashion you see fit.  Giving me credit where
 credit is due is optional, depending on your own levels of integrity and
 honesty.
'''
import sys
import re

QUIET = False
INDENT = '    '

COUNT  = 0
def getNumber():
    'Just a function which returns some number.'
    global COUNT
    COUNT += 1
    return COUNT

cTypeMap = (
    # multi-word: must be handled first.
    ('unsigned char',   'ubyte'),
    ('unsigned short',  'ushort'),
    ('unsigned int',    'uint'),
    ('unsigned long',   'uint'),
    ('unsigned long long', 'ulong'),
    ('long double',     'real'),
    ('long long',       'long'),

    ('unsigned char*',  'ubyte[]'),
    ('unsigned char *',  'ubyte[]'),
    ('unsigned short*', 'wchar[]'),
    ('unsigned short *', 'wchar[]'),

    ('int',             'int'),
    ('char',            'char'),
    ('short',           'short'),
    ('int',             'uint'),
    ('unsigned',        'uint'),
    ('wchar_t',         'wchar'), # dchar?
    ('wchar',           'wchar'), # dchar?
    ('long',            'int'),
    ('longlong',        'long'),
    ('ulonglong',       'ulong'),
    ('integer_t',       'integer_t'),
    ('size_t',          'size_t'),

    ('complex_t',       'complex_t'),
    ('void',            'void'),
    ('va_list',         'va_list'),
    ('...',             '...'),

    # strings
    ('char *',          'char[]'),
    ('const char *',    'char[]'),
    ('wchar_t *',       'wchar[]'),
    ('const wchar_t *', 'wchar[]'),
)

cTypeDict = dict(cTypeMap)

versionMap = {
    '0'                 :       'None',
    '1'                 :       'True',
    'LOGSEMANTIC'       :       'LogSemantic',
    'M_UNICODE'         :       'Unicode',
    '_win32'            :       'Windows',
    'linux'             :       'linux',
}

matchClassDecl = re.compile(
    r'struct ([a-zA-Z_][a-zA-Z0-9_]*)(?: *\: *([a-zA-Z]+))? *$' # no semicolon at the end
)

matchAttr = re.compile(
    r' *([A-Za-z0-9_. ]+(?: \**)?)'    # type name, possibly followed by a space and an asterisk
    r' *\b([A-Za-z0-9_]+)'        # attribute name
    r' *;'                      # semicolon.
)

matchMethodDecl = re.compile(
    r' *(?:virtual)?'           # virtual keyword (maybe)
    r' *(?:static)?'            # static (maybe)
    r' *([A-Za-z0-9_]+ +\**)'   # identifier, possibly followed by a space and asterisk(s)
    r' *(?:[A-Za-z0-9_]*\:\:)?' # optional: class name (?!)  Toss it.
    r' *([A-Za-z0-9_]+)'        # method name
    r' *\((.*?)\)'              # argument list
    r' *(?:;|(\{[^\}]*\}))'     # semicolon or stuff in braces
)

matchCtorDecl = re.compile(     # matches destructors too
    r'()'                       # empty group to swallow type
    r' *(\~?[A-Za-z0-9_]+)'     # optional tilde, name
    r' *\((.*?)\)'              # argument list
    r' *(?:;|(\{[^\}]*\}))'     # semicolon or body
)

matchMethodDefn = re.compile(
    r' *[A-Za-z0-9_ ]+ +\**'    # return type (identifiers, asterisks, etc)
    r' *([A-Za-z0-9_]+) *\:\:'  # class type
    r' *([A-Za-z0-9_]+)'        # method name
    r' *\((.*)\)'               # argument list
)

matchCtorDefn = re.compile(     # matches destructors too
    r' *([A-Za-z0-9_]+) *\:\:'  # class type
    r' *(\~?[A-Za-z0-9_]+)'     # optional tilde, name
    r' *\((.*)\)'               # argument list
)

matchArg = re.compile(
    r' *([A-Za-z0-9_ ]+ *[\[\]\*]*)'     # type name, spaces, asterisks
    r' *(\b[A-Za-z0-9_]+)?'              # argument name (may not be present!)
)

matchClassPointer = re.compile(
    r'(\b[A-Z_][A-Za-z0-9]*) \*'
)

matchClassPointerCast = re.compile(
    r'\(([A-Z_][A-Za-z0-9]*) *\*\)'
)

matchPrimitiveCast = re.compile(
    r'\('               # opening parenth
    r'('                # grab
        r'(?:[wd]?char)|'    # char, wchar, dchar
        r'(?:wchar_t)|' # wchar_t
        r'(?:u?byte)|'  # byte/ubyte
        r'(?:u?short)|' # short/ushort
        r'(?:u?int)|'   # int/uint
        r'(?:u?long)|'  # long/ulong
        r'(?:d_[a-z]+[0-9]+)|'     # d_uns8, etc
        r'(?:[a-z]+_t)' # xxxxx_t
    r')'                # end grab
    r' *'               # whitespace
    r'('                # suffixes
        r'(?:\[\])?|'   # trailing [] to indicate array
        r'(?:\**)'      # stars
    r')'                # close grab.  Suffixes are optional
    r' *\)'             # whitespace.  Close parenth
)

matchLongLong = re.compile(
    r'('                        # grab
    r'(?:0x[0-9A-Fa-f_]+)|'     # hex
    r'[0-9]+'                   # hex/octal
    r')'                        # end grab
    r'[lL]{2}'                  # trailing L's
)

matchBrace = re.compile('([\{\}])')

matchSizeof = re.compile(
    r'\bsizeof\('
    r'([^\)]*)'
    r'\)'
)

matchArray = re.compile(
    r'\bArray\b'
)

def log(*args):
    if not QUIET:
        print >> sys.stderr, ' '.join(map(str, args))

def chomp(s):
    if s and s[-1] == '\n':
        return s[:-1]
    else:
        return s

class Class(object):
    def __init__(self, name, base):
        self.name = name
        self.base = base
        self.methods = []
        self.attributes = []

class Method(object):
    def __init__(self, name, retType, args, body):
        self.name = name
        self.retType = retType
        self.args = args
        self.body = body

class Attribute(object):
    def __init__(self, name, type):
        self.name = name
        self.type = type

def scanHeader(name):
    header = file(name + '.h', 'rt')

    result = {} # name : Class instance

    try:
        iterator = iter(header)
        while True:
            line = iterator.next().strip()

            if line == '' or line.startswith('//'):
                continue

            match = matchClassDecl.match(line)
            if match is not None:
                c = scanClass(match.group(1), match.group(2), iterator)
                assert c.name not in result
                log('\t',c.name)
                result[c.name] = c
                continue

            log("?:\t%s" % line)

    except StopIteration:
        pass

    return result

def scanClass(name, base, iterator):
    cls = Class(convertType(name), convertType(base))

    try:
        log('Class: %s' % name)
        while True:
            line = chomp(iterator.next().strip())
            if line == '' or line == '{' or line.startswith('//') or line.startswith('#'):
                continue
            elif line == '};':
                log('-')
                break

            result = matchAttr.match(line)
            if result is not None:
                log('\tattr:\t%s' % line)
                name, type = result.group(2), result.group(1)
                cls.attributes.append(Attribute(name, convertType(type)))
                continue

            result = matchMethodDecl.match(line) or matchCtorDecl.match(line)
            if result is not None:
                log('\tmethod:\t%s' % repr(result.groups()))
                retType, name, args, body = result.groups()

                if body:
                    log(`body`)
                    body = [body]

                args = convertArgumentList(args)
                if retType:
                    retType = convertType(retType)
                else:
                    retType = None
                    if name.startswith('~'):
                        name = '~this'
                    else:
                        name = 'this'

                cls.methods.append(Method(name, retType, args, body))
                continue

            log('??:%s' % repr(line))
    except StopIteration:
        raise IOError, 'Unexpected end of file'

    return cls

def scanSource(name, headerDecls):
    source = None
    try:
        source = file(name + '.c', 'rt')
    except IOError:
        source = file(name + '.cpp', 'rt')
    assert source is not None

    try:
        iterator = iter(source)
        while True:
            line = iterator.next().strip()

            if line == '' or line.startswith('//'):
                continue

            result = matchMethodDefn.match(line) or matchCtorDefn.match(line)
            if result is not None:
                className = convertType(result.group(1))
                name = result.group(2).replace(className, 'this') # translate constructor so we can search on it
                args = convertArgumentList(result.group(3))
                body = scanMethod(name, iterator)
                cls = headerDecls[className]
                methods = [m for m in cls.methods if m.name == name]
                assert(methods)

                # bad, fake overload resolution:
                # if only one method with the right name exists, use it.
                # Else, find one whose argument signature matches exactly
                # fail otherwise
                meth = None
                if len(methods) == 1:
                    meth = methods[0]
                elif len(methods) > 1:
                    overloads = (
                        # method with exactly the same sig
                        [m for m in methods if m.args == args] or
                        # same count (desperate here!!)
                        [m for m in methods if len(m.args) == len(args)]
                    )
                    assert(overloads)
                    meth = overloads[0]

                meth.body = body
                log('\tMethod: %s.%s' % (className, name))


    except StopIteration:
        pass

    return headerDecls

def scanMethod(name, iterator):
    method = []

    # suppress code in #if None blocks
    # always show code in #if True blocks
    suppressConst = True

    NORMAL = None
    NOTSKIP, SKIP = False, True

    # currently open #if's
    preprocStack = []

    # number of indent levels (for prettifying)
    indent = 1

    def skipState(s=None):
        s = s or (preprocStack and preprocStack[-1])

        if not suppressConst:
            return NORMAL

        elif s == 'None':
            return SKIP
        elif s == 'True':
            return NOTSKIP
        else:
            return NORMAL

    def invertSkip(oldSkip):
        if not suppressConst:
            return NORMAL

        elif oldSkip == SKIP:
            return 'True'
        elif oldSkip == NOTSKIP:
            return 'None'
        else:
            print 'skip:\t%s\t%s' % (`skipState()`, `oldSkip`)
            return oldSkip

    def tokenize():
        'Helper function to stream tokens in.'
        while True:
            line = chomp(iterator.next())

            # quick "preprocess"
            if line.startswith("#if"):
                arg = line[line.index(' ') + 1:].strip()
                arg = versionMap.get(arg, arg.capitalize())
                preprocStack.append(arg)
                if skipState(arg) and suppressConst:
                    line = ''
                else:
                    line = 'version (%s) {' % arg

            elif line.startswith('#else'):
                assert len(preprocStack)
                if skipState() is not NORMAL:
                    # flip it around
                    preprocStack[-1] = invertSkip(preprocStack[-1])
                    line = ''
                else:
                    line = '} else {'

            elif line.startswith('#endif'):
                assert len(preprocStack)
                if skipState() is not NORMAL:
                    line = ''
                else:
                    line = '} // end version %s' % preprocStack[-1]
                preprocStack.pop()

            line = (line
                .replace('->', '.')
                .replace('::', '.')
                .replace('NULL', 'null')
                .replace('(void *)', 'cast(void*)')
            )


            for k, v in cTypeMap:
                # potential bottleneck?
                line = re.sub(r'\b%s\b' % re.escape(k), v, line)

            # match sizeof(x) before matching cast
            line = matchSizeof.sub(r'\1.sizeof', line)
            line = matchPrimitiveCast.sub(r'cast(\1\2)', line)
            line = matchClassPointerCast.sub(r'cast(\1)', line)
            line = matchClassPointer.sub(r'\1 ', line)
            line = matchLongLong.sub(r'\1L', line)
            line = matchArray.sub('_ARRAY_', line)

            oneLineIndent = False
            for l in filter(bool, matchBrace.split(line)):
                if suppressConst and preprocStack and preprocStack[-1] in ('True', 'None'):
                    pass
                else:
                    yield l.strip()

    try:
        iter = tokenize()
        t = iter.next()
        if t.startswith(':'):
            # initializer list
            name = re.match('\: *([a-zA-Z_][a-zA-Z0-9_]+)\(', t).group(1)
            pos = t.index(name) + len(name)
            if name[0].isupper(): # HACK: assume super(...) if initializer list starts with something that resembles a type
                method.append(INDENT * indent + 'super%s;' % t[pos:])

            t = iter.next()
            if t == ',':
                log("NYI")
                assert False

        # no initializer list, or already processed
        if t == '{':
            pass
        else:
            log(`t`)
            assert False
    except StopIteration:
        raise IOError, 'Unexpected end of file'

    try:
        while True:
            line = iter.next()

            if line:
                if line == '}':
                    indent -= 1
                    if indent == 0:
                        break
                method.append(INDENT * indent + line)
                if line == '{':
                    indent += 1
    except StopIteration:
        pass

    return method

def convertType(oldType):
    'Converts a C++ syntax type to a D syntax type.'

    if not oldType:
        return ''

    oldType = oldType.strip().replace('const ', '')

    if not oldType:
        return ''

    if oldType in cTypeDict:
        return cTypeDict[oldType]

    elif oldType.startswith('enum '):
        # drop enum prototype
        return oldType.replace('enum ', '')

    elif oldType.startswith('Array *'):
        # Array* gets replaced with something greppable.
        # Need a human to figure out what sort of array
        # it is.
        return '_ARRAY_'

    elif oldType in ('Object', 'Object *'):
        # special case: need to convert Object to RootObject so as to not conflict with std.object.Object
        return 'RootObject'

    elif oldType[0].isupper():
        if oldType.endswith(' *'):
            # pointer to class: drop the pointer notation, if present
            return oldType[:-2]
        else:
            return oldType

    elif oldType.endswith('*'):
        # pointer to C type?  Make the pointer associate to the left.
        pos = oldType.find('*')
        if pos != -1 and oldType[pos - 1] == ' ':
            oldType = oldType[:pos - 1] + oldType[pos:]
        return oldType

    else:
        log("convertType: Can't convert %s" % `oldType`)
        return oldType

    assert False

def convertArgument(arg):
    if not arg:
        return ''

    result = matchArg.match(arg)
    if result is None:
        log("Could not convert argument %s" % repr(arg))
        return result # ???
    else:
        type, name = result.groups()
        # heuristics again.  Multi-word types may in actual fact just be the type and the name all in one.
        if not name and ' ' in type and type[-1] not in '[]*':
            p = type.rindex(' ')
            type, name = type[:p], type[p+1:]

        if not name:
            name = 'unnamed%i' % getNumber()

        result = (convertType(type), name)
        return '%s %s' % result

def convertArgumentList(args):
    args = args.split(',')
    numArgs = len(args)

    newArgs = [convertArgument(a) for a in args if not a.isspace()]

    assert len(args) == len(newArgs)
    return newArgs

def createHeader(name):
    # take the text, cut the leading whitespace, fill in the blanks, etc.
    ____ = name # the blank. ;)
    return ('''
        module dd.%(____)s;

        // Auto conversion from %(____)s.h and %(____)s.c of the DMD compiler.
        // Converter coded by Andy Friesen

        // Copyright (c) 1999-2002 by Digital Mars
        // All Rights Reserved
        // written by Walter Bright
        // www.digitalmars.com
        // License for redistribution is by either the Artistic License
        // in artistic.txt, or the GNU General Public License in gnu.txt.
        // See the included readme.txt for details.

        import dd.mars;

        ''' % locals()
    ).replace('\n        ', '\n')

def dump(name, decls):
    INDENT = ' ' * 3

    destFile = file(name + '.d', 'wt')

    print >> destFile, createHeader(name)

    for name, decl in decls.iteritems():
        if decl.base:
            print >> destFile, 'class %s : %s {' % (decl.name, decl.base)
        else:
            print >> destFile, 'class %s' % decl.name

        for meth in decl.methods:
            if meth.retType is None: # c/dtor
                print >> destFile, INDENT, '%s(%s)' % (meth.name, ', '.join(meth.args)),
            else:
                print >> destFile, INDENT, '%s %s(%s)' % (meth.retType, meth.name, ', '.join(meth.args)),

            if meth.body is not None:
                print >> destFile, ' {'

                for line in meth.body or []:
                    print >> destFile, INDENT, line

                print >> destFile, INDENT, '}'
                print >> destFile, ''
            else:
                print >> destFile, ';'


        if decl.methods: print >> destFile, ''

        for attr in decl.attributes:
            print >> destFile, INDENT, '%s %s;' % (attr.type, attr.name)

        print >> destFile, '}'
        print >> destFile, ''

def main(*args):
    assert len(args) > 1

    for name in args[1:]:
        QUIET = True
        decls = scanHeader(name)
        QUIET = False
        scanSource(name, decls)

        dump(name, decls)

if __name__ == '__main__':
    main(*sys.argv)