In [None]:
import pandas as pd
import docx
from docx.shared import Inches
from docx import Document

In [None]:
course_file = 'datafication_courses_23-24-validated-jouni-2023-09-08'

In [None]:
df = pd.read_excel(course_file + '.xlsx')

In [None]:
df['start'] = pd.to_datetime( df['start'] )
df['end'] = pd.to_datetime( df['end'] )

In [None]:
df = df.sort_values('start')
df['name'] = df['name'].str.replace(', Luento-opetus', '')

In [None]:
periods = {
    1 : {'start' : pd.Timestamp('2023-09-04'), 'end' : pd.Timestamp('2023-10-22')},
    2 : {'start' : pd.Timestamp('2023-10-30'), 'end' : pd.Timestamp('2023-12-17')},
    3 : {'start' : pd.Timestamp('2024-01-15'), 'end' : pd.Timestamp('2024-03-03')},
    4 : {'start' : pd.Timestamp('2024-03-11'), 'end' : pd.Timestamp('2024-05-05')},
    5 : {'start' : pd.Timestamp('2024-05-06'), 'end' : pd.Timestamp('2024-05-31')}
}

In [None]:
def add_hyperlink(paragraph, url, text):
    
    # This gets access to the document.xml.rels file and gets a new relation id value
    part = paragraph.part
    r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)
    
    # Create the w:hyperlink tag and add needed values
    hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink')
    hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, )
    
    # Create a w:r element
    new_run = docx.oxml.shared.OxmlElement('w:r')
    
    # Create a new w:rPr element
    rPr = docx.oxml.shared.OxmlElement('w:rPr')
    
    # Add blue color
    c = docx.oxml.shared.OxmlElement('w:color')
    c.set(docx.oxml.shared.qn('w:val'), "#0070E0")
    rPr.append(c)
    
    # Join all the xml elements together add add the required text to the w:r element
    new_run.append(rPr)
    new_run.text = text
    hyperlink.append(new_run)
    
    paragraph._p.append(hyperlink)

In [None]:
def write_to_docx( course ):    
    p = document.add_paragraph()
    p.paragraph_format.left_indent = Inches(-0.5)
    p.paragraph_format.right_indent = Inches(-2)
    
    p.add_run( course['code'] + ' ' )
    p.add_run( course['name'] + '\n' ).bold = True
    add_hyperlink( p, course['url'], course['url'] )

In [None]:
years = '2023-2024'
document = Document()

# Write courses to docx file, organized by period and alphabetically within periods
for p in range( 1, len(periods) + 1 ):
    
    # Filter by period
    df_write = df[(df['start'] >= periods[p]['start']) & (df['start'] <= periods[p]['end'])]
    
    if len(df_write) == 0:
        continue
    
    p_start =  periods[p]['start'].strftime("%-d.%-m.%Y")
    p_end = periods[p]['end'].strftime("%-d.%-m.%Y")
    
    h = document.add_heading(level=1)
    h.paragraph_format.left_indent = Inches(-0.5)
    
    heading = 'period of ' + years + ' – ' + p_start + '–' + p_end
    heading = str(p) + '. teaching ' + heading if p!=5 else 'Intensive ' + heading
    heading = "Courses starting in " + heading
    
    h.add_run( heading )
    
    # Sort alphabetically
    df_write = df_write.sort_values(by='name')
    
    df_write.apply( lambda course: write_to_docx(course), axis=1 )

document.save(course_file + '.docx')