#!/usr/bin/python
# Copyright Olivier Berger <olivier.berger@it-sudparis.eu> + Institut TELECOM, 2008-2009
# developped in the frame of the HELIOS project (http://www.helios-platform.org/)
# License : GNU LGPL V3
# Processes debian bugs to produce their RDF representation in turtle
# format, suitable to be consumed as EvoOnt bom ontology
# (http://www.ifi.uzh.ch/ddis/evoont/2008/11/bom/) and others
import sys
import traceback
#import urllib2
from urllib import quote,quote_plus
import re
import email.utils
import string
import time
# import these bits to extend debbugs from btsutils
from debbugsloc import debbugsloc
# Import bits from bts-link
from btslinkutils import BTSLConfig as Cnf
import btslinkbts
bts = btslinkbts
from btslinkremote import RemoteBts
BUG_STEP=50
bugtrackers = [
# ("mandriva", "https://qa.mandriva.com/show_bug.cgi?ctype=xml&excludefield=attachment&%s", 0, 46200),
# ("kde", "http://bugs.kde.org/show_bug.cgi?ctype=xml&excludefield=attachment&%s", 0, 177000),
# ("kernel", "http://bugzilla.kernel.org/show_bug.cgi?ctype=xml&excludefield=attachment&%s", 0, 12200),
("debian", "debbugs", "http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s",3170,501010),
# ("debian", "debbugs", "http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s",250000,501010),
# ("debian", "debbugs", "http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s",283060,284000),
]
# Needs to be improved
#email_r = re.compile("([^<]*<)?([^@]*)@([a-zA-Z.\-]+).*")
TEMPLATE_DIR="TEMPLATES"
templates = {}
#for t in ["header", "bug-debian", "reporter", "package", "sys", "comment", "resolution"]:
for t in ["header-debian", "bug-debian", "reporter", "package", "comment", "resolution", "software-debian", "forwarded-debian"]:
templates[t] = open("%s/%s" % (TEMPLATE_DIR, t)).read()
# character escaping
def escapechars(s, chars=[ '\"', '\'', '\\' ]):
"""Converts invalid characters to corresponding html"""
return "".join(['\\%s' % c if c in chars else c for c in s])
def parse_bug(spoolbts, id, output=sys.stdout, output_header=False):
"""Parses bugzilla bugs and outputs valid turtle format"""
bug = spoolbts.get(str(id))
if not bug :
return False
# print bug
# return
# data = open(file).read()
# try:
# res = ET.fromstring(data)
# except SyntaxError:
# # looks like we found an invalid character
# print >>sys.stderr, "Invalid characters found, stripping..",
# # this is slow, but it is the best we can do right now..
# for match in regex.finditer(data):
# data = data[:match.start()] + "?" + data[match.end():]
# res = ET.fromstring(data)
# baseurl = res.get("urlbase")
baseurl="http://bugs.debian.org/cgi-bin/bugreport.cgi?bug="
if output_header:
print >>output, templates["header-debian"]
# for bug in res:
# # Check if bug is valid
# if bug.get("error"):
# # there is some error with this bug
# # TODO: do we actually need the information about this error?
# # usually the error fields means that this bug does not exists.
# continue
# # XPATH
params = {}
# # process bug parameters
# bug_id = bug.findtext("./bug_id")
bug_id=bug.getBug()
# url = "%s/%s" % (baseurl, bug_id)
url = "%s%s" % (baseurl, bug_id)
# bugzilla_url = "%s/show_bug.cgi?id=%s" % (baseurl, bug_id)
params["bug_id"] = bug_id
params["url"] = url
# params["bugzilla_url"] = bugzilla_url
params["bugzilla_url"] = url
# short_desc = bug.findtext("./short_desc", "Bug %s (no short description)" % bug_id)
short_desc = bug.getSummary()
params["short_desc"] = escapechars(short_desc.encode("utf-8"))
params["creation_ts"] = time.strftime('%Y-%m-%dT%H:%M:%S',time.gmtime(bug.getDate()))
# product = bug.findtext("./product", "Bug %s (no product)" % bug_id)
# params["product"] = quote_plus(product)
params["product"] = ''
# params["sys"] = bug.findtext("./sys")
params["sys"] = ''
# params["bug_status"] = bug.findtext("./bug_status").capitalize()
params["bug_status"] = bug.getStatus()
# bug_priority = bug.findtext("./priority")
# if bug_priority == "release_critical":
# priority = 1
# elif bug_priority == "high":
# priority = 2
# elif bug_priority == "normal":
# priority = 3
# elif bug_priority == "low":
# priority = 4
# else:
# priority = 5
# params["priority"] = priority
params["priority"] = 5
# # TODO: severity!
# severity = bug.findtext("./severity")
# if severity:
# params["severity"] = severity.capitalize()
# else:
# params["severity"] = "Normal"
params["severity"] = bug.getSeverity()
# description = bug.findtext("./long_desc/thetext", "")
# params["description"] = escapechars(description.encode("utf-8"))
params["description"] = ''
# print params
print >>output, templates["bug-debian"] % params
package = bug.getPackage()
product = package
quoted_product = quote_plus(product)
quoted_package = quote_plus(package)
print >>output, templates["software-debian"] % {
"url": url,
"quoted_product": quoted_product,
"product": product
}
print >>output, templates["package"] % {
"quoted_product": quoted_product,
"package": package,
"quoted_package": quoted_package
}
# reporter
reporter = bug.getSubmitter()
name, mail = email.utils.parseaddr(reporter)
# Maybe the reporter will contain non-standard characters making email.utils.parseaddr fail
if string.find(mail,'@') >= 0 :
print >>sys.stderr, "name :", name
print >>sys.stderr, "email :", mail
quoted_email = quote(mail)
quoted_email = quoted_email.replace("%40","@")
print >>output, templates["reporter"] % {
"url": url,
"email": escapechars(mail),
"quoted_email": quoted_email
}
forwarded = bug.getForwarded()
print "forwarded:", forwarded
if forwarded :
rbts = RemoteBts.find(forwarded)
if rbts:
# add the bug to it's bugtracker's queue
print >>output, templates["forwarded-debian"] % {
"curbug": url,
"newbug": forwarded
}
return True
# # resolution
# resolution = bug.findtext("./resolution")
# if resolution:
# # convert resolution
# resolution = resolution.upper()
# if resolution == "WONTFIX":
# resolution_s = "WontFix"
# elif resolution == "WORKSFORME":
# resolution_s = "WorksForMe"
# elif resolution == "THIRDPARTY":
# resolution_s = "ThirdParty"
# else:
# resolution_s = resolution.capitalize()
# print >>output, templates["resolution"] % {
# "url": url,
# "resolution": resolution_s
# }
# # now let's look at comments
# comment_id = 0
# for comment in bug.findall("./long_desc"):
# who = comment.find("./who")
# nick = quote_plus(who.get("name").encode('utf-8'))
# email = who.text
# quoted_email = email.replace(" ", "+") # remove blank spaces from emails
# comment_ts = parse_ts(comment.findtext("./bug_when"))
# description = escapechars(comment.findtext("./thetext").encode("utf-8"))
# print >>output, templates["comment"] % {
# "url": url,
# "comment_id": comment_id,
# "email": email,
# "quoted_email": quoted_email,
# "nick": nick,
# "description": description,
# "comment_ts": comment_ts
# }
# comment_id += 1
if __name__ == "__main__":
# if len(sys.argv) < 2:
# print "Usage: %s <bugs dir> [output file]" % sys.argv[0]
# sys.exit(1)
RemoteBts.setup(Cnf.resources())
btsi = bts.BtsInterface(Cnf)
# Instantiate our btsutils.debbugs compliant interface
spoolbts = debbugsloc(btsi,False)
for name, type, url, start, end in bugtrackers:
output_header = True
for id in range(start, end, BUG_STEP):
# for id in range(start, end):
print >>sys.stderr, "Getting [%s] %d - %d.." % (name, id, id+BUG_STEP)
parsed = False
for x in range(id, id+BUG_STEP) :
print >>sys.stderr, "Processing bug %d.." % x,
try:
parsed = parse_bug(spoolbts, x, output_header=output_header)
print >>sys.stderr, "ok"
# os.unlink(file)
except:
traceback.print_exc()
print >>sys.stderr, "error processing %d: %s" % (x, sys.exc_value)
sys.exit(1)
if parsed and output_header :
output_header = False
# bugs = "&id=".join(str(x) for x in range(id, id+BUG_STEP))
# fd = open("%s_%d_%d.xml" % (name, id, id+BUG_STEP), "w")
# #data = urllib2.urlopen("http://bugs.kde.org/show_bug.cgi?ctype=xml&%s" % bugs).read() # kde bugs
# data = urllib2.urlopen(url % bugs).read() # mandriva bugs
# fd.write(data)
# fd.close()
sys.exit(0)
#print data
|