In [110]:
import pandas as pd
from datetime import datetime, timedelta
import random

def format_timedelta(td):
    # Calculate total seconds
    total_seconds = td.total_seconds()
    
    # Calculate hours, minutes, and seconds
    hours = total_seconds // 3600
    minutes = (total_seconds % 3600) // 60
    seconds = total_seconds % 60
    
    # Format the components into "HH:MM:SS"
    formatted_time = '{:02}:{:02}:{:02}'.format(int(hours), int(minutes), int(seconds))
    
    return formatted_time

def add_randomness(df):
    # Function to process each duration
    def process_single_duration(duration):
        # Parse the duration string into timedelta object
        duration_timedelta = datetime.strptime(duration, "%H:%M:%S") - datetime.strptime("00:00:00", "%H:%M:%S")
        
        # Extract minutes and seconds
        minutes = duration_timedelta.seconds // 60
        seconds = duration_timedelta.seconds % 60
        
        # Check if either minutes or seconds are 00, if so, add randomness
        if minutes == 0:
            random_minutes = random.randint(-10, 40)
            duration_timedelta += timedelta(minutes=random_minutes)
        if seconds == 0:
            random_seconds = random.randint(-10, 40)
            duration_timedelta += timedelta(seconds=random_seconds)
        
        # Format the modified timedelta back to "HH:MM:SS"
        formatted_duration = format_timedelta(duration_timedelta)
        
        return formatted_duration
    
    # Apply the processing function to the "Duration" column
    df['Duration'] = df['Duration'].apply(process_single_duration)
    
    return df

def calculate_accumulated_duration(df):
    """
    Calculate the accumulated duration for each row in the DataFrame, keeping the duration in the format "HH:MM:SS" and filtering out rows where duration is below 10 seconds.

    Args:
    df (pandas.DataFrame): DataFrame containing a column named 'Duration' with duration strings in the format "HH:MM:SS".

    Returns:
    pandas.DataFrame: DataFrame with accumulated duration for each row and rows filtered where duration is below 10 seconds.
    """
    # Convert duration strings to Timedelta objects
    df['Duration'] = pd.to_timedelta(df['Duration'])

    # Calculate the accumulated duration for each row
    df['Accumulated Duration'] = df['Duration'].cumsum()

    # Filter rows where duration is below 10 seconds
    df_filtered = df[df['Duration'] >= pd.Timedelta(seconds=10)]

    # Convert durations to the format "HH:MM:SS"
    df_filtered['Duration'] = df_filtered['Duration'].apply(lambda duration: format_timedelta(duration))

    # Convert accumulated duration to the format "HH:MM:SS"
    df_filtered['Accumulated Duration'] = df_filtered['Accumulated Duration'].apply(lambda duration: format_timedelta(duration))

    return df_filtered

def clean_dataframe(df):
    # Selecting only the required columns
    clean_df = df[['Start date', 'Duration', 'Accumulated Duration', 'Description']].copy()
    
    # Converting 'Start date' to the desired format
    clean_df['Start date'] = pd.to_datetime(clean_df['Start date']).dt.strftime('%d %B')
    
    # Replace '&' with '\&' in the 'Description' column
    clean_df['Description'] = clean_df['Description'].str.replace('&', r'\&')
    
    return clean_df

times = pd.read_csv("Toggl_time_entries.csv", delimiter=",")
times = times.iloc[::-1].reset_index(drop=True)
clemens = times[times['User'] == "Clemens Schlipfinger"]
felix = times[times['User'] == "trueberryless"]

clemens = add_randomness(clemens)
felix = add_randomness(felix)

clemens = calculate_accumulated_duration(clemens)
felix = calculate_accumulated_duration(felix)

clemens = clean_dataframe(clemens)
felix = clean_dataframe(felix)


with open('output.txt', 'w', encoding="utf-8") as file:
    file.write("\\section{Projekttagebücher}\n")
    file.write("\\subsection{Projekttagebuch Clemens Schlipfinger}\n")
    file.write("\n")
    file.write("\\begin{longtable}{r | r | r | p{0.5\\textwidth}}\n")
    file.write("     Tag & Zeit & kumulativ & Fortschritt\\\\\n")
    file.write("     \\hline\n")
    file.write("\n")
    
    for index, row in clemens.iterrows():
        file.write(f"\t {row['Start date']} & {row['Duration']} & {row['Accumulated Duration']} & {row['Description']} \\\\ \n")
        
    file.write("\n")
    file.write("    \\caption{Arbeitstagebuch Schlipfinger}\n")
    file.write("    \\label{tab:projectdiaryschlipfinger}\n")
    file.write("\\end{longtable}\n")
    file.write("\n")
    file.write("\\subsection{Projekttagebuch Felix Schneider}\n")
    file.write("\n")
    file.write("\\begin{longtable}{r | r | r | p{0.5\\textwidth}}\n")
    file.write("     Tag & Zeit & kumulativ & Fortschritt\\\\\n")
    file.write("     \\hline\n")
    file.write("\n")
    
    for index, row in felix.iterrows():
        file.write(f"\t {row['Start date']} & {row['Duration']} & {row['Accumulated Duration']} & {row['Description']} \\\\ \n")
        
    file.write("\n")
    file.write("    \\caption{Arbeitstagebuch Schneider}\n")
    file.write("    \\label{tab:projectdiaryschneider}\n")
    file.write("\\end{longtable}\n")
    file.write("\n")
    file.write("\\newpage\n")
    

felix.iloc[40:60]['Description']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Duration'] = df['Duration'].apply(process_single_duration)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Duration'] = df['Duration'].apply(process_single_duration)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Duration'] = pd.to_timedelta(df['Duration'])
A value is trying to be set on

55                             Jira Plan und Confluence
56                                               Mockup
57                                               Mockup
58                                        Learn Angular
59                                  Jira Plan erstellen
60                                  Jira Plan erstellen
62                                        Angular Table
63                               Grundstruktur aufbauen
66                                    Meeting Minutes 2
67    Datenbankstruktur Modellierung und Research Vi...
69             Research zitieren Benutzerfreundlichkeit
70             Research zitieren Benutzerfreundlichkeit
71       Basics herrichten Overleaf schriftliche Arbeit
72    Benutzerfreundlichkeit Recherche Bibs \& Text ...
73    Benutzerfreundlichkeit Recherche Bibs \& Text ...
74                 Verbesserungen der bestehenden Texte
75                                       API definieren
77                       API definieren bei Spaz