# Convert Tango documentation to XML # Public domain require 'hpricot' require 'open-uri' def parseLevel(source, doc) headers = doc/"> dl > dt" descs = doc/"> dl > dd" headers.zip(descs).each do |h, d| text = h.innerText.gsub(/[\r\n]/, ' ').gsub(/explorer\.outline\.addDecl\([^)]*\);/, '').gsub('"', '"').squeeze(" ").strip type = "method" case text when /^class/ then type = "class" when /^struct/ then type = "struct" end if type == "method" puts %Q{} parseLevel(source, d) else puts %Q{} parseLevel(source, d) puts "" end end end index = "http://dsource.org/projects/tango/docs/current/" indexDoc = Hpricot(open(index)) puts "" (indexDoc/"#searchable ul li a").each do |a| source = index + a.attributes["href"] $stderr.puts source # Skip: there are errors in the html of those files if source =~ /tango\.core\.Variant/ or source =~ /tango\.util\.Convert/ $stderr.puts "Skipping #{source}..." next end begin fullDoc = Hpricot(open(source)) docbody = fullDoc/"#docbody" puts %Q{} parseLevel(source, docbody) puts "" rescue OpenURI::HTTPError => e $stderr.puts "Error with #{source}: #{e.message}" end end puts ""