#!/usr/bin/python

# Copyright (c) 2009 Olivier Berger and Institut TELECOM

# This tool is published under the terms of the GNU General Public
# License version 3 or above.

# This tool is used in order to convert a multi-framed HTML
# documentation of an ontology generated with Protege's OWLDoc plugin
# into a single HTML file, using htmldoc.

# There are 2 steps :
#  - option 'book' helps generate a .book file for batch processing by
#     htmldoc : python ../owldoc2htmldoc.py book ../helios_bt/ontologies/index.html helios_bt >book.book
#  - option 'convert' helps get rid of external links to files not
#     considered in the book : python ../owldoc2htmldoc.py convert ../helios_bt/ontologies/index.html helios_bt book.html >index.html

# This tool was developped in the frame of the Helios project to help
# document the ontologies designed in the project.

from BeautifulSoup import BeautifulSoup

import sys
import os
import re
arguments = sys.argv

def print_usage():
    print "USAGE:"
    print "grep.py action ontologies/index.html ontology"
    print "  with action in : book, convert"


###############################################
# PROGRAM STARTS HERE

if len(arguments) < 2:
    print_usage()
    sys.exit(1)

action = arguments[1]
#print action

if not action in ["book", "convert"] :
    print_usage()
    sys.exit(1)

if action == "book" and len(arguments) != 4:
    print_usage()
    sys.exit(1)

if action == "convert" and len(arguments) != 5:
    print_usage()
    sys.exit(1)


filename = arguments[2]
#print filename
ontology = arguments[3]
#print ontology

if action == "convert" :
    bookHtmlFileName = arguments[4]

ontologiesDir = os.path.dirname(filename)
owldocDir = os.path.dirname(ontologiesDir)
classesDir =  os.path.normpath(owldocDir + "/classes")
#print "classesDir :", classesDir
#print "owldocDir :",owldocDir
objectPropertiesDir = os.path.normpath(owldocDir + "/objectproperties")
dataPropertiesDir = os.path.normpath(owldocDir + "/dataproperties")


file = open(filename)
html = file.read()
soup = BeautifulSoup(html)
#print soup.prettify()

a = soup.find("a", { "class" : "active-ontology-uri"})
mainOnto = a.string

if ontology != mainOnto :
    print "Error : found active ontology", mainOnto, "doesn't match wanted", ontology
    sys.exit(1)


ontoHtmls=[]
classesHtmls=[]
objectPropertiesHtmls=[]
dataPropertiesHtmls=[]

ontoMainHtml = a['href']
#print ontoMainHtml


ontoMainFilename = ontologiesDir + "/" + ontoMainHtml
#print ontoMainFilename
ontoHtmls.append(ontoMainFilename)

ontoMainFile = open(ontoMainFilename)
html = ontoMainFile.read()
soup = BeautifulSoup(html)
#print soup.prettify()

for a in soup.findAll("a") :
    if a.string == "Classes" :
        indexClassesOntoFilename = a["href"]
    if a.string == "Object Properties":
#        print a
        indexObjectPropertiesFilename = a["href"]
    if a.string == "Data Properties":
        indexDataPropertiesFilename = a["href"]


indexClassesOntoFilename = os.path.normpath(classesDir + "/" + indexClassesOntoFilename)
classesHtmls.append(indexClassesOntoFilename)

indexClassesOntoFile = open(indexClassesOntoFilename)
html = indexClassesOntoFile.read()
soup = BeautifulSoup(html)
#print soup.prettify()


codebox = soup.find("div", { "class" : "codebox"})
#print codebox


for a in codebox.findAll("a") :
    classesHtmls.append(os.path.normpath(classesDir + "/" + a["href"]))



indexObjectPropertiesFilename = os.path.normpath(objectPropertiesDir + "/" + indexObjectPropertiesFilename)
objectPropertiesHtmls.append(indexObjectPropertiesFilename)

indexObjectPropertiesFile = open(indexObjectPropertiesFilename)
html = indexObjectPropertiesFile.read()
soup = BeautifulSoup(html)
#print soup.prettify()


codebox = soup.find("div", { "class" : "codebox"})
#print codebox


for a in codebox.findAll("a") :
    objectPropertiesHtmls.append(os.path.normpath(objectPropertiesDir + "/" + a["href"]))


indexDataPropertiesFilename = os.path.normpath(dataPropertiesDir + "/" + indexDataPropertiesFilename)
dataPropertiesHtmls.append(indexDataPropertiesFilename)

indexDataPropertiesFile = open(indexDataPropertiesFilename)
html = indexDataPropertiesFile.read()
soup = BeautifulSoup(html)
#print soup.prettify()


codebox = soup.find("div", { "class" : "codebox"})
#print codebox


for a in codebox.findAll("a") :
    dataPropertiesHtmls.append(os.path.normpath(dataPropertiesDir + "/" + a["href"]))



###############################################
# Action : book

if action == "book" :
    print '#HTMLDOC 1.8.27'
    print '-t html -f "book.html" --book --toclevels 3 --no-numbered --toctitle "Table of Contents" --title --linkstyle plain --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --browserwidth 680 --no-strict --no-overflow'
    print "about.html"

    for i in ontoHtmls :
        print i
    for i in classesHtmls :
        print i
    for i in objectPropertiesHtmls :
        print i
    for i in dataPropertiesHtmls :
        print i




###############################################
# Action : convert

####################################
# utility functions for action : convert

def add_anchor(dict,prefix,file,tag) :
#    print tag
    a = tag.parent
#    print a
    anchor = None
#    print a.attrs
    for name, value in a.attrs :
        if name == 'name' :
            anchor = value
            dict[prefix+file] = anchor
            dict[file] = anchor

def process_html_page(dict, htmlPage, title, path) :
        file = os.path.basename(htmlPage)
        classname = os.path.splitext(file)[0]
        #print file
        if classname == "index-"+ontology :
            for t in soup.findAll(text=re.compile(ontology+": "+path+" .*")) :
                #print t
                add_anchor(dict, "../"+path+"/", file, t)
        else :
            for t in soup.findAll(text=title+": "+classname) :
                add_anchor(dict, "../"+path+"/", file, t)

    
####################################
# now the convert code

if action == "convert" :
    bookHtmlFile = open(bookHtmlFileName)
    html = bookHtmlFile.read()
    soup = BeautifulSoup(html)
    #print soup.prettify()

    # remove footers of OWLDoc
    for p in soup.findAll('p', { "class": "footer"}) :
        p.extract()

    # Change style ?

    # construct a dict of pages and their anchors

    dict = {}

    for html in ontoHtmls :
        file = os.path.basename(html)
        onto = os.path.splitext(file)[0]
        for t in soup.findAll(text="Ontology: "+onto) :
            a = t.parent
#            print a.attrs
            anchor = None
            for name, value in a.attrs :
                if name == 'name' :
                    anchor = value
                    #print anchor
            dict["../ontologies/"+file] = anchor
            dict[file] = anchor

    for html in classesHtmls :
        process_html_page(dict,html,"Class","classes")

    for html in objectPropertiesHtmls :
        process_html_page(dict,html,"Object Property","objectproperties")

    for html in dataPropertiesHtmls :
        process_html_page(dict,html,"Data Property","dataproperties")

    #print dict


    # substitute links to external pages by internal links to the anchors

    for path in dict.keys() :
        #print "looking for path", path
        for a in soup.findAll('a', {'href' : path}) :
            a['href']= "#"+dict[path]


    # remove href of all links to other classes (other ontologies)
    for a in soup.findAll('a',href=re.compile('^\.\./.*')) :
        del(a['href'])

    # remove href of all links to other pages
    for a in soup.findAll('a',href=re.compile('^[^#]')) :
        del(a['href'])

    # remove the opening of links in other frame
    for a in soup.findAll('a',target="content") :
        del(a['target'])

    # remove blocks for expansion of long lists (already expanded by htmldoc)
    for a in soup.findAll('a', { "class" : "subsexpand" }, id="showSubs") :
        a.extract()

    print soup.prettify()


