In [2]:
#We need a mapping of same master titles to different master acronyms at the different universities
master_acronym_mapping = {
    'Physics Engineering': ['EMPHYS'],
    'Fire Safety Engineering': ['EMFSEN','EMFIRE'],
    'Industrial Engineering and Operations Research': ['EMIEOR'],
    'Textile Engineering': ['EMTEXT'],
    'Civil Engineering': ['EMCIVI','VUBCivil'],
    'Biomedical Engineering': ['CQ_51360389','EMBIEN','EMBIME','VUBBioMed'],
    'Chemical Engineering': ['CQ_51370025','EMCHEM','VUBChem'],
    'Computer Science Engineering': ['CQ_52364384','EMCOSC','VUBCs'],
    'Electrical Engineering': ['CQ_50657365','EMELECCI','EMELECEC','VUBElec'],
    'Master of Engineering: Energy': ['CQ_51384404'],
    'Materials Engineering': ['CQ_50545818','EMMAEN','VUBChem'],
    'Logistics and Traffic Engineering': ['CQ_53845475'],
    '(Electro)Mechanical Engineering': ['CQ_52354207','EMELMEEE','EMELMEMC','EMELMEME','EMELMEMT','EMELMERA','VUBElecMech'],
    'Mathematical Engineering': ['CQ_52357094'],
    'Nanoscience, Nanotechnology and Nanoengineering': ['CQ_50269006','CQ_51228258'],
    'Bioinformatics': ['CQ_50269018','CMBIOISB','CMBIOIEN','CMBIOIBE'],
    'Master of Statistics': ['CQ_50550147'],
    'European Master of Photonics': ['EMEUPH','VUBPho'],
    'Architectural Engineering': ['VUBArch'],
    'Master of Physical Land Resources': ['VUBLand'],
    'Nuclear Engineering': ['EMEFUS','VUBNucl']
}


In [3]:
import math
from ortools.algorithms import pywrapknapsack_solver

from whoosh.qparser import QueryParser, MultifieldParser, OrGroup
from whoosh import scoring
from whoosh.index import open_dir
from whoosh.query import Term, Or
from whoosh.analysis import RegexTokenizer

def curriculum_composer(query_str, lecturer_str, filter_sem, filter_lang, filter_master):
    
    #Get index from folder
    ix = open_dir("indexdir")

    #Split input string and get search terms/keywords for matching
    query_els = query_str.split(', ')
    search_terms = []

    #Tokenize for search terms, also add individual terms of space-seperated search terms, but with adjusted (low, .1) scaling
    tokenizer = RegexTokenizer()
    for query_el in query_els:
        search_terms.append('(%s)' % (query_el))
        for token in tokenizer(query_el):
            if ('(%s)' % (token.text)) not in search_terms:
                search_terms.append('(%s)^0.1' % (token.text))

    #Split input string for lecturer search terms
    lecturers = lecturer_str.split(', ')

    #Open a searcher on the index
    with ix.searcher(weighting=scoring.TF_IDF) as searcher:
        
        #Iterate over different master acronyms (for same master, but different uni)
        #Put them in OR-group
        or_group_masters = []
        for master in filter_master:
            or_group_masters.append(Term("masters", master))

        #Construct filter query: THIS NEEDS TO BE MATCHED, else not indexed
        filter_query = Term("semester", filter_sem) & Term("language", filter_lang) & Or(or_group_masters)

        #Construct search queries (AND, OR) of different search terms
        search_queries = []
        for term in search_terms:
            sub_query = MultifieldParser(["name", "keywords", "situating", "content"], schema=ix.schema).parse(term)
            search_queries.append(sub_query)

        #OR seperate sub-queries in full query
        full_query = Or(search_queries)

        #Index with given query (no limit)
        results = searcher.search(full_query, filter=filter_query, limit=None, terms=True)

        #Construct lecturer search queries (AND, OR) of different lecturer search terms
        lector_queries = []
        for lecturer in lecturers:
            query_lector = QueryParser("lecturer", schema=ix.schema).parse(lecturer)
            lector_queries.append(query_lector)

        #OR seperate sub-queries in full query
        full_lector_query = Or(lector_queries)

        #Index with given query (no limit)
        lector_results = searcher.search(full_lector_query, filter=filter_query, limit=None, terms=True)

        #Upgrade common queries in both results and add new courses not covered by keywords, but only by lecturer name
        results.upgrade_and_extend(lector_results)

        #Make deep copy for iteration
        deep_cp_results = results.copy()

        #Iterate over resulting courses and try fetching similar ones to complete curriculum
        for result in deep_cp_results:
            #Select best 5 similar courses (conditioned on filter query) by comparing contents
            similar_courses = result.more_like_this("content", top=10, filter=filter_query, normalize=False)
            #Add non-common courses to results object
            results.extend(similar_courses)
          
        #Split in different curriculums for different universities
        UGENT_curr = []
        KUL_curr = []
        VUB_curr = []
        curriculums = [UGENT_curr,KUL_curr,VUB_curr]
        
        for result in results:
            if 'VUB' in result['masters'][0]:
                VUB_curr.append(result)
            elif 'CQ_' in result['masters'][0]:
                KUL_curr.append(result)
            else:
                UGENT_curr.append(result)
                
        #Knapsack 3 times for different universities
        for idc in range(3):
            results = curriculums[idc]

            #Fetch credits and their values/weights for knapsack (curriculum)
            credits = [[]]
            scores = []

            for result in results:
                credits[0].append(int(result['credits']))
                scores.append(int(math.ceil(result.score)))

            #DP Knapsack problem for filling semester 30 ECTS with highest value

            # Create the solver.
            solver = pywrapknapsack_solver.KnapsackSolver(
              pywrapknapsack_solver.KnapsackSolver.
              KNAPSACK_MULTIDIMENSION_BRANCH_AND_BOUND_SOLVER,
              'curriculum')

            #Initialize knapsack size on 30 ECTS-credits
            credit_capacity = [30]

            #Solve knapsack with DP
            solver.Init(scores, credits, credit_capacity)
            computed_value = solver.Solve()

            packed_items = [x for x in range(0, len(credits[0]))
                          if solver.BestSolutionContains(x)]
            packed_credits = [credits[0][i] for i in packed_items]
            total_credits= sum(packed_credits)

            curriculum = {}

            for i in packed_items:
                curriculum[results[i]['tag']] = {}
                curriculum[results[i]['tag']]['name'] = results[i]['name']
                curriculum[results[i]['tag']]['lecturer'] = results[i]['lecturer']
                curriculum[results[i]['tag']]['contact'] = results[i]['contact']
                curriculum[results[i]['tag']]['study'] = results[i]['study']
                curriculum[results[i]['tag']]['credits'] = results[i]['credits']
                curriculum[results[i]['tag']]['link'] = results[i]['link']
                curriculum[results[i]['tag']]['match'] = results[i].score
                
            considered_courses = []
                
            for idr,result in enumerate(results):
                if not idr in packed_items:
                    considered_courses.append({'name':result['name'], 'credits':result['credits'], 'score':result.score,'link':result['link']})
                
            curriculums[idc] = [curriculum, computed_value, total_credits, considered_courses]
        
        return curriculums

In [7]:
import ipywidgets as widgets

img = widgets.HTML(
    value="<img src='https://upload.wikimedia.org/wikipedia/en/thumb/9/90/Erasmus_logo.svg/220px-Erasmus_logo.svg.png' style='max-width:none;'>",
    placeholder='(EU) Student Exchange Programme',
)

header = widgets.HTML(
    value=" \
    <style> \
        div#introduction {font-family: Candara; font-size:16px} \
        h4 {margin:4px 0px;} \
        p {margin:4px 0px; padding: 6px 8px; border-left: 10px solid;} \
        div#introduction p {border-color: rgba(35,31,32,1); background-color: rgba(35,31,32,0.05);}\
    </style> \
    <div id='introduction'> \
        <h3 style='margin:4px 0px;'> \
            <u>(EU) Student Exchange Programme: </u> \
        </h3> \
        <h4> \
            Curriculum Composer \
        </h4> \
        <p style='line-height:1.2; margin-top:10px;'> \
            Hi! This is a <u>demo</u> web application created for the course <strong>The Information Society and ICT</strong> at Ghent University. <br> \
            <br> \
            Our aim is to investigate the role of ICT in facilitating the selection procedure of an Erasmus programme considered by exchange students, \
            and more specifically in drafting a valid curriculum at a foreign university. <br> \
            <br> \
            <b>REMARK:</b> this webapp's styling is not responsive, please adjust browser for a correct view. :-)<br> \
            <b>REMARK 2:</b> do not forget to fill in the Google Form, thank you!<br> \
        </p> \
    </div> \
    "
)

info = widgets.HTML(
    value=" \
    <style> \
        div#info {font-family: Candara; max-width: 100%;} \
        div#info p {border-color: rgba(31,79,163,1); background-color: rgba(31,79,163,0.15);} \
        #btnControl, span#infoBody {display: none;} \
        #btnControl + label.btn {padding: 0px; text-align: left; display: inline; white-space: normal; border:0px;} \
        #btnControl:checked + label > div#info span#infoHeader{display: none;} \
        #btnControl:checked + label > div#info span#infoBody{display: inline;} \
    </style> \
    <input type='checkbox' id='btnControl'/> \
    <label class='btn' for='btnControl'> \
        <div id='info'> \
            <span id='infoHeader'> \
                <p style='font-size: 22px; font-weight: bold;'> \
                    INFO \
                </p> \
            </span> \
            <span id='infoBody'> \
                <p style='line-height:1.3; font-size: 15px;'> \
                    Nowadays most universities handle different formats in presenting their study offer, making the process of drafting an optimal curriculum very cumbersome. \
                    Preferably, a <strong>standardized platform or search engine</strong> specialized in <u>uniformally</u> representing different study offers could streamline this process. <br> \
                    <br> \
                    The benefits are <strong>twofold</strong>: an <u>exploration tool</u> to discover potential offers that students would otherwise miss and \
                    reducing the effort needed in <u>bootstrapping and comparing</u> different curriculum candidates. <br> \
                    <br> \
                    Potentially, such a platform could increase the <strong>in- and outflux of exchange students</strong> in universities by providing benefits for both parties involved, \
                    enhancing the attractiveness of an Erasmus programme among students. <br> \
                </p> \
            </span> \
        </div> \
    </label> \
    "
)

useCase = widgets.HTML(
    value=" \
    <style> \
        div#case {font-family: Candara; max-width: 100%;} \
        div#case p {border-color: rgba(255,199,7,1); background-color: rgba(255,199,7,0.15);} \
        #btnControl2, span#caseBody {display: none;} \
        #btnControl2 + label.btn {padding: 0px; text-align: left; display: inline; white-space: normal; border:0px;} \
        #btnControl2:checked + label > div#case span#caseHeader{display: none;} \
        #btnControl2:checked + label > div#case span#caseBody{display: inline;} \
    </style> \
    <input type='checkbox' id='btnControl2'/> \
    <label class='btn' for='btnControl2'> \
        <div id='case'> \
            <span id='caseHeader'> \
                <p style='font-size: 22px; font-weight: bold;'> \
                    USE CASE [Read Me!] \
                </p> \
            </span> \
            <span id='caseBody'> \
                <p id='usecase' style='line-height:1.3; font-size: 15px;'> \
                    Since this webapp validates as a <strong>proof of concept (PoC)</strong>, we'll only investigate a specific use case with <u>limiting features</u>. <br> \
                    <br> \
                    Let's assume <strong>YOU</strong> are a foreign <strong>engineering</strong> student looking for an exchange program in <strong>Belgium</strong>. <br> \
                    You plan to spend 1 semester abroad for an appropriate <strong>30 ECTS-credits</strong>. <br> \
                    Your university, in which you are enrolled, only offers exchange programs at 3 Belgian universities: <br> \
                    <strong>Ghent University, Katholieke Universiteit Leuven and Vrije Universiteit Brussel</strong>.<br> \
                    <br> \
                    Additionally, we assume that there aren't any <u>restrictions</u> in choosing courses except for your <u>field of study</u>. <br> \
                    In order to choose your optimal curriculum at one of the universities, you assess some <u>personal criteria</u> that guide your decision <br> \
                    <em>(e.g. course content, specific professor, sports ..)</em>.<br> \
                    <br> \
                    The <strong>search engine</strong> returns all possible curriculums based on your input and tries to make <strong>suggestions</strong> to personalize them. <br> \
                    <br> \
                    Based on the output, you can pick the curriculum that is most alligned with your <strong>student profile</strong>. <br> \
                    Alternatively, you could try <strong>relaxing your criteria</strong> to explore new combinations that you might not have considered. <br> \
                    <br> \
                    We'd like to <u>invite you in validating</u> the tool-proposed curriculum and giving some additional remarks. :-) <br> \
                </p> \
            </span> \
        </div> \
    </label> \
    "
)

vBoxStyling = widgets.HTML(
     value=" \
        <style> \
            div.widget-html + div.widget-hbox {max-width: 100%; margin:4px 2px; padding: 6px 8px; border-left: 10px solid; border-color: rgba(35,31,32,1); background-color: rgba(35,31,32,0.05);} \
        </style> \
    "
)

studyField = widgets.Dropdown(
    options=master_acronym_mapping.keys(),
    value='Computer Science Engineering',
    description='Study Field: ',
    disabled=False,
    layout=widgets.Layout(margin='8px 0px 8px 0px')
)

credits = widgets.IntText(
    value=30,
    description='ECTS: ',
    disabled=True,
    layout=widgets.Layout(margin='8px 0px 8px 0px')
)

language = widgets.Text(
    description='Language: ',
    value='EN',
    disabled=True,
    layout=widgets.Layout(margin='8px 0px 8px 0px')
)

format_info = widgets.HTML(
    value="<i>Please split search terms in following using comma-space ', '</i>"
)

keywords = widgets.Text(
    description='Keywords: ',
    placeholder='e.g. Lean Six Sigma, Big Data ..',
    disabled=False,
    layout=widgets.Layout(margin='8px 0px 8px 0px')
)

lecturer = widgets.Text(
    description='Lecturers: ',
    placeholder='e.g. I. Newton or Isaac Newton',
    disabled=False,
    layout=widgets.Layout(margin='8px 0px 8px 0px')
)


semester = widgets.ToggleButtons(
    options=['1', '2'],
    description='Semester: ',
    disabled=False,
    layout=widgets.Layout(width='35%')
)

google_form = widgets.HTML(
     value=' \
        <p> \
            <iframe src="https://docs.google.com/forms/d/e/1FAIpQLSfqKTd08qhCjgr9EauBF2e0DOOUbJAZVUmfo4XAJeHbcWsbVg/viewform?embedded=true" width="840" height="519" frameborder="0" marginheight="0" marginwidth="0">Bezig met laden...</iframe> \
        </p> \
    '
)

def on_button_clicked(b):    
    curriculums = curriculum_composer(keywords.value,lecturer.value,semester.value,language.value,master_acronym_mapping[studyField.value])
    tables = []
    for idc in range(3):
        curriculum, computed_value, total_credits, considered_courses = curriculums[idc]
        
        information_header = "<h4>Composed Curriculum:</h4><p style='font-size:16px;'><strong>ECTS: </strong>"+str(total_credits)+" <strong>Matched value: </strong>"+str(computed_value)+"</p>"
        
        table = "<table style='width:100%;'> \
                            <tr> \
                                <th>ECTS</th> \
                                <th>Title</th> \
                                <th>Lecturer</th> \
                                <th>Match</th> \
                            </tr>"

        for tag in curriculum.keys():
            table_row = "<tr><td>%s</td><td><a href='%s'>%s</a></td><td>%s</td><td>%.2f</td></tr>" \
                        % (curriculum[tag]['credits'],curriculum[tag]['link'], \
                           curriculum[tag]['name'],curriculum[tag]['lecturer'], \
                           curriculum[tag]['match'])
            table += table_row
            
        table +=  "</table>"
        
        table_considered = "<h4>Additional considered courses:</h4><table style='width:100%''> \
                            <tr> \
                                <th>ECTS</th> \
                                <th>Title</th> \
                                <th>Match</th> \
                            </tr>"
        
        for course in considered_courses:
            table_row = "<tr><td>%s</td><td><a href='%s'>%s</a></td><td>%.2f</td></tr>" \
                        % (course['credits'], \
                           course['link'], \
                           course['name'], \
                           course['score'])
            table_considered += table_row
            
        table_considered +=  "</table>"
        
        HTML_content = information_header + table + table_considered
        
        tables.append(widgets.HTML(value=HTML_content))
        
    tab.children = tuple(tables)

button = widgets.Button(description="Search!", layout=widgets.Layout(width='35%', height='40px', margin='16px 0px 0px 132px'), style=widgets.ButtonStyle(button_color='#bdd0db'))

button.on_click(on_button_clicked)

formItems = [vBoxStyling, studyField, credits, language, semester, format_info, keywords, lecturer, button]
form = widgets.VBox(formItems, layout=widgets.Layout(border=''))

children = [widgets.Text(disabled=True) for i in range(3)]
tab = widgets.Tab(layout=widgets.Layout(width='75%',height='100%'))
tab.children = children
for i,uni in enumerate(['UGENT','KUL','VUB']):
    tab.set_title(i, uni)
    
itemsV = [widgets.HBox([img,header]), info, useCase, widgets.HBox([form,tab]), google_form]
webApp = widgets.VBox(itemsV, layout=widgets.Layout(width='85%', margin='0px 0px 0px 120px'))

display(webApp)

VBox(children=(HBox(children=(HTML(value="<img src='https://upload.wikimedia.org/wikipedia/en/thumb/9/90/Erasm…