In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('da.csv', header=None)

In [3]:
def parse_time(time_required:str):

    time_required = time_required.strip(' \t')
    tmp = time_required.split(' ')
   
    assert len(tmp) %2 == 0, print('Expected an even number of elements in the list ',tmp)
    
    time_spec = {'weeks':0, 'days':0, 'hours':0, 'mins':0}
    
    for i in range(0,len(tmp),2):
        time = int(tmp[i])
        unit = tmp[i+1]
        
        assert time >=0, print('Invalid value for time',time) 
        
        if unit in ['mins', 'minutes', 'minutes']:
            time_spec['mins'] += time
        elif unit in ['hour', 'hours']:
            time_spec['hours'] += time
        elif unit in ['day', 'days']:
            time_spec['days'] += time
        elif unit in ['week', 'weeks']:
            time_spec['weeks'] += time
        else:
            raise ValueError('Invalid unit %s when trying to parse %s'%(unit,tmp))
            
    return time_spec 

In [4]:
def to_hours(time_needed, daily_commitment):
    commitment_hours = [0]*len(time_needed)
    for i in range(len(time_needed)):
        commitment_hours[i] = time_needed[i]['weeks'] * 7 * daily_commitment +\
                              time_needed[i]['days'] * daily_commitment +\
                              time_needed[i]['hours'] +\
                              time_needed[i]['mins']/60
    return commitment_hours
        

In [5]:
time_requirements = list(map(parse_time, data.iloc[:,2]))

In [6]:
daily_commitment = 4

In [7]:
time_requirements

[{'days': 0, 'hours': 1, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 0, 'mins': 15, 'weeks': 0},
 {'days': 0, 'hours': 0, 'mins': 0, 'weeks': 1},
 {'days': 0, 'hours': 1, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 1, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 4, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 5, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 5, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 1, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 0, 'mins': 0, 'weeks': 1},
 {'days': 0, 'hours': 1, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 1, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 2, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 0, 'mins': 30, 'weeks': 0},
 {'days': 0, 'hours': 0, 'mins': 30, 'weeks': 0},
 {'days': 0, 'hours': 0, 'mins': 30, 'weeks': 0},
 {'days': 0, 'hours': 1, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 4, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 1, 'mins': 0, 'weeks': 0},
 {'days': 0, 'hours': 2, 'mins': 0, 'weeks': 0},
 {'days': 0, 'ho

In [8]:
hours_required = to_hours(time_requirements, daily_commitment)

In [9]:
import datetime
start_date = datetime.date(2018,9,11)

In [10]:
lesson_timeline = []
day_counter = 0
cumulative_commit = 0

for lesson_id in range(len(hours_required)):
    ## Tackle lessons with duration less than one commitment-day.
    if hours_required[lesson_id] < daily_commitment:
        start_day_offset = day_counter
        spillover = cumulative_commit + hours_required[lesson_id] - daily_commitment
        
        if spillover > 0:
            end_day_offset = start_day_offset + 1
            cumulative_commit = cumulative_commit + hours_required[lesson_id] - daily_commitment
            day_counter += 1
        else:
            end_day_offset = start_day_offset
            cumulative_commit = cumulative_commit + hours_required[lesson_id]
    
    ## Tackle lessons with duration exceeding one commitment-day
    else:
        ## If we have non-zero accumulated commitment-hours, skip the day
        if cumulative_commit > 0:
            cumulative_commit = 0
            day_counter += 1
        
        start_day_offset = day_counter
        ## Round up commitment-days
        end_day_offset   = start_day_offset + int(np.ceil(hours_required[lesson_id] / daily_commitment)) - 1
        
        ## Ensure next lesson starts on next day.
        day_counter += end_day_offset-start_day_offset+1
        
        
    ## Install day info for current lesson.
    start_absolute_date = (start_date+datetime.timedelta(days=start_day_offset)).strftime('%b %d %Y')
    end_absolute_date   = (start_date+datetime.timedelta(days=end_day_offset)).strftime('%b %d %Y')
    lesson_timeline.append((data.iloc[lesson_id,1], start_absolute_date, end_absolute_date))

In [11]:
lesson_timeline

[('Welcome to the ND', 'Sep 11 2018', 'Sep 11 2018'),
 ('Life of a DA', 'Sep 11 2018', 'Sep 11 2018'),
 ('Project: Weather Trends', 'Sep 12 2018', 'Sep 18 2018'),
 ('Anaconda', 'Sep 19 2018', 'Sep 19 2018'),
 ('Jupyter Notebook', 'Sep 19 2018', 'Sep 19 2018'),
 ('Data Analysis Process', 'Sep 20 2018', 'Sep 20 2018'),
 ('DA Process : Case 1', 'Sep 21 2018', 'Sep 22 2018'),
 ('DA Process : Case 2', 'Sep 23 2018', 'Sep 24 2018'),
 ('Programming Workflow for DA', 'Sep 25 2018', 'Sep 25 2018'),
 ('Project: Investigate a dataset', 'Sep 26 2018', 'Oct 02 2018'),
 ('Descriptive stats - I', 'Oct 03 2018', 'Oct 03 2018'),
 ('Descriptive stats - II', 'Oct 03 2018', 'Oct 03 2018'),
 ('Admissions case study', 'Oct 03 2018', 'Oct 03 2018'),
 ('Probability', 'Oct 03 2018', 'Oct 04 2018'),
 ('Binomial Distribution', 'Oct 04 2018', 'Oct 04 2018'),
 ('Conditional Probability', 'Oct 04 2018', 'Oct 04 2018'),
 ('Bayes Rule', 'Oct 04 2018', 'Oct 04 2018'),
 ('Python Probability Practice', 'Oct 05 2018', 'O

In [12]:
def date_to_lessons(timeline):
    mapping = dict()
    for i in range(len(timeline)):
        lesson, start_date, end_date = timeline[i]
        if mapping.get(start_date):
            mapping[start_date].append(lesson)
        else:
            mapping[start_date] = [lesson]
            
        if end_date != start_date:
            if mapping.get(end_date):
                mapping[end_date].append(lesson)
            else:
                mapping[end_date] = [lesson]
    
    ## Convert list of lessons to comma separated elements in a string
    dates = []
    lessons = []
    for key in mapping:
        dates.append(key)
        lessons.append(', '.join(mapping[key]).rstrip())
    
    date2lesson = pd.DataFrame({'Date':dates, 'Lesson':lessons})
    date2lesson.Date = pd.to_datetime(date2lesson.Date)
    date2lesson.sort_values('Date', inplace=True)
    date2lesson.Date = date2lesson.Date.dt.strftime('%b %d %Y')
    return date2lesson

In [13]:
dl = date_to_lessons(lesson_timeline)

In [14]:
def compact_date_ranges(timeline):
    dates = timeline.Date
    lessons = timeline.Lesson
    
    date_ranges = []
    lessons_ = []
    
    i = 0
    while i < len(timeline):
        if i+1 < len(timeline):
            if lessons[i] == lessons[i+1]:
                lessons_.append(lessons[i])
                date_ranges.append('-'.join([dates[i],dates[i+1]]))
                i += 2
            else:
                lessons_.append(lessons[i])
                date_ranges.append(dates[i])
                i += 1
                
    return pd.DataFrame({'Dates':date_ranges, 'Lessons':lessons_})
                

In [15]:
compact_date_ranges(dl)

Unnamed: 0,Dates,Lessons
0,Sep 11 2018,"Welcome to the ND, Life of a DA"
1,Sep 12 2018-Sep 18 2018,Project: Weather Trends
2,Sep 19 2018,"Anaconda, Jupyter Notebook"
3,Sep 20 2018,Data Analysis Process
4,Sep 21 2018-Sep 22 2018,DA Process : Case 1
5,Sep 23 2018-Sep 24 2018,DA Process : Case 2
6,Sep 25 2018,Programming Workflow for DA
7,Sep 26 2018-Oct 02 2018,Project: Investigate a dataset
8,Oct 03 2018,"Descriptive stats - I, Descriptive stats - II,..."
9,Oct 04 2018,"Probability, Binomial Distribution, Conditiona..."
