#!/usr/bin/python
# Helper functions for working at Abertay.
# Adam Sampson <ats@offog.org>

import bs4
from cgi import escape
import hashlib
import netrc
import mechanize
import offog
import os
import re
import sys

def wr_temp(s):
    with open("tmp.html", "w") as f:
        f.write(s)

def status(*s):
    print ">>> " + "".join(map(str, s))

GRADE_VALUES = {
    "A+": 4.5,
    "A": 4.0,
    "B+": 3.5,
    "B": 3.0,
    "C+": 2.5,
    "C": 2.0,
    "D+": 1.5,
    "D": 1.0,
    "MF": 0.5,
    "F": 0.0,
    "CF": 0.0,
    "NS": 0.0,
    "IN": 0.0,
    "LA": 0.0,
    "NC": 0.0, # "Not Complete"
    "DF": 0.0, # Defer
    "EX": 0.0, # Has extension
    }

CACHE_DIR = os.getenv("HOME") + "/.cache/abertay"
offog.ensure_dir(CACHE_DIR)

class Cached:
    def __init__(self, *keys):
        key = hashlib.sha256("_".join(map(str, keys))).hexdigest()
        self.fn = CACHE_DIR + "/" + key

    def get(self):
        try:
            with open(self.fn, "rb") as f:
                return f.read()
        except IOError:
            return None

    def put(self, value):
        with open(self.fn + ".new", "wb") as f:
            f.write(value)
        os.rename(self.fn + ".new", self.fn)
        return value

class OASIS:
    """Wrapper around Abertay's web frontend for SITS."""

    def __init__(self):
        self.br = None
        self.portal_link = None
        self.staff_link = None

    def login(self):
        """Log in to OASIS."""

        if self.staff_link is not None:
            # Already logged in.
            return

        status("Logging in to OASIS")
        nrc = netrc.netrc()
        login, account, password = nrc.authenticators("oasis.abertay.ac.uk")

        self.br = mechanize.Browser()
        self.br.set_handle_robots(False)
        #self.br.set_debug_http(True)
        #self.br.set_debug_responses(True)

        self.br.open("https://oasis.abertay.ac.uk/oasis/sits.urd/run/siw_lgn")
        assert self.br.viewing_html()

        self.br.select_form(nr=0)
        # Must be name not id
        self.br["MUA_CODE.DUMMY.MENSYS.1"] = login
        self.br["PASSWORD.DUMMY.MENSYS.1"] = password
        self.br.submit()

        assert self.br.viewing_html()
        assert self.br.title() == "User Redirect"

        self.portal_link = self.br.find_link(url_regex=r'siw_portal')
        self.br.follow_link(self.portal_link)
        assert self.br.viewing_html()

        self.staff_link = self.br.find_link(text_regex=r'MySITS')
        self.br.follow_link(self.staff_link)
        assert self.br.viewing_html()

        self.slu_link = self.br.find_link(text='Student Look Up')
        self.mcl_link = self.br.find_link(text='Module Class List')
        self.pcl_link = self.br.find_link(text='Programme Class List')
        self.sat_link = self.br.find_link(text='Student Academic Transcript')

    def student_lookup(self, student_id):
        self.login()

        status("Student lookup for ", student_id)
        self.br.follow_link(self.slu_link)
        assert self.br.viewing_html()
        # Search form

        self.br.select_form(nr=0)
        self.br["STU_CODE.DUM1.CAMS.1-1"] = student_id
        self.br.submit()
        assert self.br.viewing_html()
        # Search results page

        self.br.follow_link(self.br.find_link(text=student_id))
        assert self.br.viewing_html()

        if self.br.title().startswith("SCJ & SPR"):
            # Student with multiple records 1234567/1, 1234567/2...
            # (although not all students with a /2 do this!)
            # The first is the most recent.
            # The selection you make affects what shows up in the Academic
            # Transcript report, but you can still see all the modules in
            # Progress and Results.
            self.br.select_form(nr=0)
            control = self.br.form.find_control("SPR_LIST.DUMMY.MENSYS.1")
            control.items[1].selected = True
            self.br.submit()

        # "Student View" tab for staff

    def fetch_transcript(self, student_id):
        if not "/" in student_id:
            # Search for all registrations.
            student_id += "/*"

        c = Cached("transcript", student_id)
        value = c.get()
        if value is not None:
            return value

        status("Transcript for ", student_id)
        self.login()

        self.br.follow_link(self.sat_link)
        assert self.br.viewing_html()

        self.br.select_form(nr=0)
        self.br["POP_UDEF.POP.MENSYS.1-1"] = student_id
        r = self.br.submit("BP103.DUMMY_B.MENSYS.1")
        assert self.br.viewing_html()
        # Academic Transcript page

        return c.put(r.read())

    def get_transcript(self, *args):
        html = bs4.BeautifulSoup(self.fetch_transcript(*args), "lxml")
        items = []
        for table in html.find_all("table", class_="sv-datatable"):
            # Only look at tables that contain module results.
            # There may be several if we're searching for code/*.
            headings = [stringify(th.strings) for th in table.find_all("th")]
            if headings[:4] != ["Year", "Semester", "Module", "Title"]:
                continue
            parse_table(table, "Year", items)

        # Remove the "Total Credits:" line.
        items = [item for item in items
                 if not item["Title"].startswith("Total Credits")]

        return items

    def fetch_module_students(self, module, year, term):
        """
        module like AG0700A14, CMP409
        year like 2014/5
        term like S1 (leaving it blank gets only one instance, not all)"""
        c = Cached("module_class_list", module, year, term)
        value = c.get()
        if value is not None:
            return value

        self.login()

        status("Class list for ", module, " ", year, " ", term)
        r = self.br.follow_link(self.mcl_link)
        assert self.br.viewing_html()
        # Query form

        self.br.select_form(nr=0)
        self.br["POP_UDEF.POP.MENSYS.1-1"] = module
        # 2-1 is "Module Occurrence" (default is "A")
        self.br["POP_UDEF.POP.MENSYS.3-1"] = year
        self.br["POP_UDEF.POP.MENSYS.4-1"] = term
        r = self.br.submit(name="BP103.DUMMY_B.MENSYS.1")
        assert self.br.viewing_html()

        # Module Student List page

        return c.put(r.read())

    def get_module_students(self, *args):
        """Student numbers returned include /"""
        data = self.fetch_module_students(*args)
        html = bs4.BeautifulSoup(data, "lxml")

        items = []
        parse_table(html.find("table"), "Student Number", items)
        return items

    def fetch_programme_students(self, programme, year, stage=""):
        """
        programme like U-COMPUT
        year like 2014/5
        stage like 1, or empty for all stages"""
        c = Cached("programme_class_list", programme, year, stage)
        value = c.get()
        if value is not None:
            return value

        self.login()

        status("Programme list for ", programme, " ", year, " ", stage)
        r = self.br.follow_link(self.pcl_link)
        assert self.br.viewing_html()
        # Query form

        self.br.select_form(nr=0)
        self.br["POP_UDEF.POP.MENSYS.1-1"] = year
        self.br["POP_UDEF.POP.MENSYS.2-1"] = programme
        self.br["POP_UDEF.POP.MENSYS.3-1"] = stage
        r = self.br.submit(name="BP103.DUMMY_B.MENSYS.1")
        assert self.br.viewing_html()
        # Programme Student List page

        return c.put(r.read())

    def get_programme_students(self, *args):
        data = self.fetch_programme_students(*args)
        html = bs4.BeautifulSoup(data, "lxml")

        all_items = []

        # For each stage, there's a <div class="sv-panel ..."> with the header
        # in, then a <div class="sv-table-container ..."> with the student
        # table.
        stage = None
        for div in html.find_all(class_=["sv-panel", "sv-table-container"]):
            if "sv-panel" in div["class"]:
                stage = list(div.find_all(class_="sv-col-md-9"))[1].string.strip()
            else:
                assert stage is not None

                items = []
                parse_table(div.find("table"), "Student Number", items)
                for item in items:
                    item[u"Stage"] = stage
                    # XXX Obviously wrong but good enough for display!
                    item[u"Surname"] = item["Name"].split()[-1]
                    all_items.append(item)

                stage = None

        return all_items

def stringify(ss):
    ss = list(ss)
    if len(ss) == 0:
        return ""
    else:
        s = u"".join(ss)
        s = re.sub(r'\s+', ' ', s)
        return s.strip()

def parse_table(table, expect0, items):
    """Given an HTML table produced by OASIS, extract the items into a list of
    dictionaries."""

    headings = list(stringify(th.strings)
                    for th in table.find("thead").find_all("th"))
    assert headings[0] == expect0
    while headings[-1] in ("", "&nbsp"):
        headings = headings[:-1]

    for tr in table.find("tbody").find_all("tr", recursive=False):
        fields = list(stringify(td.strings)
                      for td in tr.find_all("td", recursive=False))
        items.append({hd: fields[i] for i, hd in enumerate(headings)})

def write_html_top(title, f):
    f.write("""<html>
<head>
<style type="text/css">
body, h1, h2, h3, p, ul { padding: 0; }
body {
    background: white;
    color: black;
    font: 12pt/1.2 "Liberation Serif", serif;
    margin: 12pt;
}
h1 {
    margin: 12pt 0;
    font-size: 24pt;
}
h2 {
    margin: 12pt 0;
    font-size: 16pt;
}
h3 { font-size: 14pt; }
h1, h2, h3 { font-weight: bold; }
.module { font-weight: normal; }
.author, h2, h3 { font-family: "Liberation Sans", sans-serif; }
.author {
    margin: 12pt 0;
    font-size: 16pt;
}
.affil { font-size: 12pt; }
p, ul {
    margin: 6pt 0;
}
li {
    margin: 6pt 0 6pt 20pt;
}

.grade, .modhead, .gpa, .gpahead { text-align: center; }
.grade { font-size: 18pt; }
.gradeAplus { background: #7f7; color: #000; }
.gradeA     { background: #7f7; color: #000; }
.gradeBplus { background: #ff7; color: #000; }
.gradeB     { background: #ff7; color: #000; }
.gradeCplus { background: #7ff; color: #000; }
.gradeC     { background: #7ff; color: #000; }
.gradeDplus { background: #77f; color: #000; }
.gradeD     { background: #77f; color: #000; }
.gradeMF    { background: #900; color: #fff; }
.gradeF     { background: #600; color: #fff; }
.gradeNS    { background: #000; color: #fff; }
.gradeIN    { background: #000; color: #fff; }
.gradeunknown { background: #777; color: #000; }
</style>
<title>""" + escape(title) + """</title>
</head>
<body>
<h1>""" + escape(title) + """</h1>
""")

def write_html_bottom(f):
    f.write("""</body>
</html>
""")

def write_distribution_report(routes, prefixes, year, filename):
    status("Writing distribution report to ", filename)

    # Find all the students who may have taken these modules.
    students = {}
    modules = {}
    for route in routes:
        items = oasis.get_programme_students(route, year)
        print "%s has %d students" % (route, len(items))
        for item in items:
            sid = item["Student Number"]
            students[sid] = item

            tr = oasis.get_transcript(sid)
            item["Transcript"] = tr

            for tri in tr:
                tri["Student Number"] = sid
                if tri["Year"] != year:
                    continue
                module = tri["Module"]
                if not any(module.startswith(pfx) for pfx in prefixes):
                    continue
                modules.setdefault(module, []).append(tri)

    f = open(filename, "w")
    write_html_top("%s modules in %s" % ("/".join(prefixes), year), f)

    f.write("<table>\n")

    # XXX This is incomplete.

    for module, tris in sorted(modules.items()):
        f.write("<tr>\n")
        f.write("<td>%s</td>\n" % escape(module))
        f.write("<td>")
        def tri_key(tri):
            return tri["Grade"]
        for tri in sorted(tris, key=tri_key):
            f.write(escape(tri["Grade"]) + " ")
        f.write("</td>\n")
        f.write("</tr>\n")

    f.write("</table>\n")

    write_html_bottom(f)
    f.close()

def get_students(routes, years):
    """Get student info for all students registered on any of these routes in
    any of these years. Return a list with the latest info for each student,
    i.e. Session/Stage is the last time we saw them."""

    students = {}

    for year in years:
        for route in routes:
            for item in oasis.get_programme_students(route, year):
                students[item["Student Number"]] = item

    return students.values()

def credit_value(module):
    """Return the credit value of a module given the code."""

    if module == "CMP402": # XXX BA project?
        return 40
    elif module == "CMP504":
        return 60
    elif len(module) == 6:
        return 20
    else:
        return 15

def get_student_info(item):
    """Get transcript and compute summary information for a student.
    Return the transcript."""

    tr = oasis.get_transcript(item["Student Number"])
    item["Transcript"] = tr

    # Remove very old modules that have numeric grades.
    tr = [tri for tri in tr if tri["Year"] >= "2005"]

    """
Disable this - there are some grades on OASIS that I don't know the meaning
of when doing large analyses...
    for tri in tr:
        if tri["Grade"] not in GRADE_VALUES and tri["Grade"] != "":
            print "Bad grade:"
            print tri
            print item
            sys.exit(1)
"""

    # Figure out the total number of grade points they've achieved.
    item["RankGrade"] = sum(GRADE_VALUES[tri["Grade"]]
                            for tri in tr
                            if tri["Grade"] != "")

    # All elective modules are level 2 on OASIS. But for GPA purposes, we need
    # to know which year they actually did it in... which we can only do
    # approximately, because a student might (for example) fail the elective in
    # Y1, and carry it into Y2 along with another elective, at which point we
    # can't tell which elective is which.
    for tri in tr:
        if tri["Module"].startswith("EL"):
            # Count the levels of the non-elective modules they did in the same
            # year...
            counts = {}
            for level in [trii["Level"] for trii in tr
                          if trii["Year"] == tri["Year"]
                             and not trii["Module"].startswith("EL")]:
                counts[level] = counts.get(level, 0) + 1
            # ... and use the most common level.
            # XXX This is wrong when an elective is carried.
            levels = sorted(counts.items(), key=lambda t: -t[1])
            tri["Level"] = levels[0][0]

    if len(tr) == 0:
        item["GPA"] = 0.0
    else:
        # Abertay's GPA algorithm, approximately. See academic regs #70.
        # XXX This appears OK for Y3/4 but not for Y1/2...

        # XXX Cut off previous results when a student has gone down in level
        # (e.g. switched from Basketweaving Y3 to Computing Y2).

        highest_level = sorted(tri["Level"] for tri in tr)[-1]
        if highest_level == "4":
            gpa_levels = ["3", "4"]
        else:
            gpa_levels = [highest_level]

        # GPA is weighted by module credit values.
        total_credits = 0
        grade_sum = 0.0
        for tri in tr:
            if tri["Level"] in gpa_levels and tri["Grade"] != "":
                credits = credit_value(tri["Module"])
                total_credits += credits
                grade_sum += GRADE_VALUES[tri["Grade"]] * credits
        if total_credits > 0:
            gpa = grade_sum / total_credits
        else:
            gpa = 0.0
        item["GPA"] = gpa

    return tr

def key_performance(item):
    #return -item["RankGrade"]
    return -item["GPA"]

def write_career_report(routes, first_year, filename):
    status("Writing career report to ", filename)

    years = ("2013/4", "2014/5", "2015/6", "2016/7", "2017/8", "2018/9")

    students = []
    for item in get_students(routes, years):
        tr = get_student_info(item)

        # We want students who first took a module in first_year.
        years = sorted(tri["Year"] for tri in tr)
        if len(years) == 0 or years[0] != first_year:
            continue

        # Figure out what stage they entered at.
        stages = sorted(tri["Level"] for tri in tr)
        if len(stages) == 0:
            print sid, "never took any modules"
            continue
        item["FirstStage"] = stages[0]

        students.append(item)

    student_groups = {}
    for item in sorted(students, key=key_performance):
        label = "Entered at stage %s" % item["FirstStage"]
        student_groups.setdefault(label, []).append(item)

    title = ("Students on %s who first took a module in %s"
             % ("/".join(routes), first_year))
    write_groups_report(title, student_groups, filename)

def write_current_report(routes, current_year, filename):
    status("Writing current report to ", filename)

    students = []
    for item in get_students(routes, (current_year,)):
        tr = get_student_info(item)

        # Collapse modules in previous years to simplify the display.
        # (Don't remove them entirely, because we do want to show that students
        # had *something* prior to what we're displaying.)
        earliest = str(int(current_year[:4]) - (int(item["Stage"]) - 1))
        for tri in tr:
            if tri["Year"] < earliest:
                tri["Module"] = "xxxxxx"
                tri["Semester"] = "Sx"
                tri["Grade"] = ""

        students.append(item)

    student_groups = {}
    for item in sorted(students, key=key_performance):
        label = "Currently in stage %s" % item["Stage"]
        student_groups.setdefault(label, []).append(item)

    title = "Students on %s in %s" % ("/".join(routes), current_year)
    write_groups_report(title, student_groups, filename)

def write_groups_report(title, student_groups, filename):
    f = open(filename, "w")
    write_html_top(title, f)

    for label, these_students in sorted(student_groups.items()):
        f.write("<h2>%s (%d students)</h2>\n"
                % (escape(label), len(these_students)))

        # Collect the list of modules that students took.
        modules = {}
        def module_key(tri):
            return tri["Module"] + "-" + tri["Year"] + "-" + tri["Semester"]
        for item in these_students:
            tr = item["Transcript"]

            # Collapse all the electives into one notional module.
            for tri in tr:
                if tri["Module"].startswith("EL"):
                    tri["Module"] = tri["Module"][:4] + u"xx"

            for tri in tr:
                modules[module_key(tri)] = {
                    "Module": tri["Module"],
                    "Year": tri["Year"],
                    "Semester": tri["Semester"],
                    }

        def sort_module(code):
            item = modules[code]
            return item["Year"] + "," + item["Semester"] + "," + code
        module_codes = sorted(modules.keys(), key=sort_module)

        f.write("<table><tr>\n")
        f.write("<th>Student</th>")
        for i, code in enumerate(module_codes):
            label = escape(code[:6])
            label += "<br>" + escape(modules[code]["Year"][:4])
            label += "&nbsp;" + escape(modules[code]["Semester"])
            f.write('<th class="modhead">%s</th>' % label)
            modules[code]["Pos"] = i
        f.write('<th class="gpahead">GPA</th>')
        f.write("</tr>\n")

        for item in these_students:
            f.write("<tr>")
            f.write("<td>%s" % escape(item["Student Number"]))
            f.write("<br>%s" % escape(item["Surname"]))
            f.write("</td>")

            mods = [None] * len(module_codes)
            for tri in item["Transcript"]:
                n = modules[module_key(tri)]["Pos"]
                mods[n] = tri

            for tri in mods:
                if tri is None:
                    f.write("<td></td>")
                else:
                    grade = tri["Grade"]
                    if grade == "":
                        f.write('<td class="grade gradeunknown">?</td>')
                    else:
                        f.write('<td class="grade grade%s">%s</td>' % (grade.replace("+", "plus"), escape(grade)))

            f.write('<td class="gpa">%.2f</td>' % item["GPA"])

            f.write("</tr>\n")
        f.write("</table>\n")

    write_html_bottom(f)
    f.close()

if __name__ == "__main__":
    oasis = OASIS()

    if False:
        # Routes that contain CMP/MAT modules.
        routes = [
            "D-COMPUT",
            "U-COMPUT",
            "U-ETHCOU", #??
            "U-ETHHAC", "U-ETHHAC-A",
            "U-CGTECH",
            "U-CGADEV", "U-CGADEV-A",
            "U-GDPMGT", "U-GDPMGT-A",
            "U-COMNET",
            "U-DIGFOS",
            "U-WEBDCO",
            "U-AMGMOD",
            #"U-UGVAMG",  # none in 2016/7

            # These don't have any students registered on OASIS...
            #"P-ETHHAC",
            #"P-CGTECH",
            #"P-GAMEDV",
        ]

        for year in ("2016/7",):
            fn = "reports/distribution-%s.html" % year[:4]
            write_distribution_report(routes, ("CMP", "MAT"), year, fn)
        sys.exit(0)

    route_groups = [
        ("COMPS", "U-COMPUT"),
        ("ETHOS", "U-ETHCOU", "ETHAS", "U-ETHHAC", "ETHAA", "U-ETHHAC-A"),
        ("CGTCS", "U-CGTECH"),
        ("CGADS", "U-CGADEV", "CGADA", "U-CGADEV-A"),
        #("DACOM", "D-COMPUT"),
        #("U-WEBDCO",), # old?
        #("U-COMNET",), # old?
        #("U-DIGFOS",), # old?

        #("CSPCO", "U-SOUNDG"),
        #("COGAS", "U-COMART", "COGAA", "U-COMART-A"),
        #("GDPMS", "U-GDPMGT", "GDPMA", "U-GDPMGT-A"),
        #("U-SOUNDG",), # old?

        # These ones don't work correctly...
        #("ETHOP", "P-ETHHAC"),
        #("SWEGP", "P-CGTECH"),
        #("GAMEP", "P-GAMEDV"),
        ]
    #first_years = ["2013/4", "2014/5", "2015/6", "2016/7", "2017/8"]
    first_years = ["2015/6", "2016/7", "2017/8"]

    for route_group in route_groups:
        cur_route = [route for route in route_group if "-" in route][0]

        fn = "reports/current-%s.html" % cur_route
        write_current_report(route_group, first_years[-1], fn)

        for first_year in first_years:
            fn = "reports/career-%s-%s.html" % (cur_route, first_year[:4])
            write_career_report(route_group, first_year, fn)

# XXX tool to show transcript for a student
# XXX use this to generate class lists