Subversion

helios_wp3

[/] [trunk/] [python/] [npmdebbugs.py] - Rev 17 Go to most recent revision

Compare with Previous - Blame


#!/usr/bin/python

# Copyright Olivier Berger <olivier.berger@it-sudparis.eu> + Institut TELECOM, 2008-2009
# developped in the frame of the HELIOS project (http://www.helios-platform.org/)
# License : GNU LGPL V3

# Processes debian bugs to produce their RDF representation in turtle
# format, suitable to be consumed as EvoOnt bom ontology
# (http://www.ifi.uzh.ch/ddis/evoont/2008/11/bom/) and others

import sys
import traceback
#import urllib2
from urllib import quote,quote_plus
import re
import email.utils
import string
import time

# import these bits to extend debbugs from btsutils
from debbugsloc import debbugsloc

# Import bits from bts-link
from btslinkutils import BTSLConfig as Cnf
import btslinkbts
bts = btslinkbts

from btslinkremote import RemoteBts


BUG_STEP=50

bugtrackers = [
#        ("mandriva", "https://qa.mandriva.com/show_bug.cgi?ctype=xml&excludefield=attachment&%s", 0, 46200),
#        ("kde", "http://bugs.kde.org/show_bug.cgi?ctype=xml&excludefield=attachment&%s", 0, 177000),
#        ("kernel", "http://bugzilla.kernel.org/show_bug.cgi?ctype=xml&excludefield=attachment&%s", 0, 12200),
    ("debian", "debbugs", "http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s",3170,501010),
#    ("debian", "debbugs", "http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s",250000,501010),
#    ("debian", "debbugs", "http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s",283060,284000),
        ]

# Needs to be improved
#email_r = re.compile("([^<]*<)?([^@]*)@([a-zA-Z.\-]+).*")

TEMPLATE_DIR="TEMPLATES"
templates = {}
#for t in ["header", "bug-debian", "reporter", "package", "sys", "comment", "resolution"]:
for t in ["header-debian", "bug-debian", "reporter", "package", "comment", "resolution", "software-debian", "forwarded-debian"]:
    templates[t] = open("%s/%s" % (TEMPLATE_DIR, t)).read()


# character escaping
def escapechars(s, chars=[ '\"', '\'', '\\' ]):
    """Converts invalid characters to corresponding html"""
    return "".join(['\\%s' % c if c in chars else c for c in s])

def parse_bug(spoolbts, id, output=sys.stdout, output_header=False):
    """Parses bugzilla bugs and outputs valid turtle format"""

    bug = spoolbts.get(str(id))
    if not bug :
        return False
#    print bug
#    return

#     data = open(file).read()

#     try:
#         res = ET.fromstring(data)
#     except SyntaxError:
#         # looks like we found an invalid character
#         print >>sys.stderr, "Invalid characters found, stripping..",
#         # this is slow, but it is the best we can do right now..
#         for match in regex.finditer(data):
#             data = data[:match.start()] + "?" + data[match.end():]
#         res = ET.fromstring(data)

#     baseurl = res.get("urlbase")
    baseurl="http://bugs.debian.org/cgi-bin/bugreport.cgi?bug="

    if output_header:
        print >>output, templates["header-debian"]
#     for bug in res:
#         # Check if bug is valid
#         if bug.get("error"):
#             # there is some error with this bug
#             # TODO: do we actually need the information about this error?
#             # usually the error fields means that this bug does not exists.
#             continue
#         # XPATH
    params = {}
#         # process bug parameters
#         bug_id = bug.findtext("./bug_id")
    bug_id=bug.getBug()
#         url = "%s/%s" % (baseurl, bug_id)
    url = "%s%s" % (baseurl, bug_id)
#         bugzilla_url = "%s/show_bug.cgi?id=%s" % (baseurl, bug_id)
    params["bug_id"] = bug_id
    params["url"] = url
#         params["bugzilla_url"] = bugzilla_url
    params["bugzilla_url"] = url
#         short_desc = bug.findtext("./short_desc", "Bug %s (no short description)" % bug_id)
    short_desc = bug.getSummary()
    params["short_desc"] = escapechars(short_desc.encode("utf-8"))
    params["creation_ts"] = time.strftime('%Y-%m-%dT%H:%M:%S',time.gmtime(bug.getDate()))
#         product = bug.findtext("./product", "Bug %s (no product)" % bug_id)
#         params["product"] = quote_plus(product)
    params["product"] = ''
#         params["sys"] = bug.findtext("./sys")
    params["sys"] = ''
#         params["bug_status"] = bug.findtext("./bug_status").capitalize()
    params["bug_status"] = bug.getStatus()
#         bug_priority = bug.findtext("./priority")
#         if bug_priority == "release_critical":
#             priority = 1
#         elif bug_priority == "high":
#             priority = 2
#         elif bug_priority == "normal":
#             priority = 3
#         elif bug_priority == "low":
#             priority = 4
#         else:
#             priority = 5
#         params["priority"] = priority
    params["priority"] = 5
#         # TODO: severity!
#         severity = bug.findtext("./severity")
#         if severity:
#             params["severity"] = severity.capitalize()
#         else:
#             params["severity"] = "Normal"
    params["severity"] = bug.getSeverity()
#         description = bug.findtext("./long_desc/thetext", "")
#         params["description"] = escapechars(description.encode("utf-8"))
    params["description"] = ''
#    print params
    print >>output, templates["bug-debian"] % params

    package = bug.getPackage()
    product = package
    quoted_product = quote_plus(product)
    quoted_package = quote_plus(package)
    print >>output, templates["software-debian"] % {
                            "url": url,
                            "quoted_product": quoted_product,
                            "product": product
                            }
    print >>output, templates["package"] % {
                            "quoted_product": quoted_product,
                            "package": package,
                            "quoted_package": quoted_package
                            }

    # reporter
    reporter = bug.getSubmitter()

    name, mail = email.utils.parseaddr(reporter)

    # Maybe the reporter will contain non-standard characters making email.utils.parseaddr fail
    if string.find(mail,'@') >= 0 :
        print >>sys.stderr, "name :", name
        print >>sys.stderr, "email :", mail

        quoted_email = quote(mail)
        quoted_email = quoted_email.replace("%40","@")
        print >>output, templates["reporter"] % {
                            "url": url,
                            "email": escapechars(mail),
                            "quoted_email": quoted_email
                        }

    forwarded = bug.getForwarded()
    if forwarded :
        rbts = RemoteBts.find(forwarded)
        if rbts:
            # add the bug to it's bugtracker's queue
            print >>output, templates["forwarded-debian"] % {
                "curbug": url,
                "newbug": forwarded
                }

    return True

#         # resolution
#         resolution = bug.findtext("./resolution")
#         if resolution:
#             # convert resolution
#             resolution = resolution.upper()
#             if resolution == "WONTFIX":
#                 resolution_s = "WontFix"
#             elif resolution == "WORKSFORME":
#                 resolution_s = "WorksForMe"
#             elif resolution == "THIRDPARTY":
#                 resolution_s = "ThirdParty"
#             else:
#                 resolution_s = resolution.capitalize()

#             print >>output, templates["resolution"] % {
#                             "url": url,
#                             "resolution": resolution_s
#                             }

#         # now let's look at comments
#         comment_id = 0
#         for comment in bug.findall("./long_desc"):
#             who = comment.find("./who")
#             nick = quote_plus(who.get("name").encode('utf-8'))
#             email = who.text
#             quoted_email = email.replace(" ", "+") # remove blank spaces from emails
#             comment_ts = parse_ts(comment.findtext("./bug_when"))
#             description = escapechars(comment.findtext("./thetext").encode("utf-8"))
#             print >>output, templates["comment"] % {
#                             "url": url,
#                             "comment_id": comment_id,
#                             "email": email,
#                             "quoted_email": quoted_email,
#                             "nick": nick,
#                             "description": description,
#                             "comment_ts": comment_ts
#                             }
#             comment_id += 1


if __name__ == "__main__":
#     if len(sys.argv) < 2:
#         print "Usage: %s <bugs dir> [output file]" % sys.argv[0]
#         sys.exit(1)

    RemoteBts.setup(Cnf.resources())
    btsi = bts.BtsInterface(Cnf)

    # Instantiate our btsutils.debbugs compliant interface
    spoolbts = debbugsloc(btsi,False)

    for name, type, url, start, end in bugtrackers:

        output_header = True

        for id in range(start, end, BUG_STEP):
#            for id in range(start, end):
            print >>sys.stderr, "Getting [%s] %d - %d.." % (name, id, id+BUG_STEP)

            parsed = False

            for x in range(id, id+BUG_STEP) :
                print >>sys.stderr, "Processing bug %d.." % x,
                try:
                    parsed = parse_bug(spoolbts, x, output_header=output_header)
                    print >>sys.stderr, "ok"
            #            os.unlink(file)
                except:
                    traceback.print_exc()
                    print >>sys.stderr, "error processing %d: %s" % (x, sys.exc_value)
                    sys.exit(1)

                if parsed and output_header :
                    output_header = False
        
#         bugs = "&id=".join(str(x) for x in range(id, id+BUG_STEP))
#         fd = open("%s_%d_%d.xml" % (name, id, id+BUG_STEP), "w")
#         #data = urllib2.urlopen("http://bugs.kde.org/show_bug.cgi?ctype=xml&%s" % bugs).read() # kde bugs
#         data = urllib2.urlopen(url % bugs).read() # mandriva bugs
#         fd.write(data)
#         fd.close()
    sys.exit(0)


#print data

Powered by WebSVN v1.61