In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [9]:
# Define the folder path
folder_path = "scraping"

# List all files in the folder
files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

# Initialize an empty DataFrame to store concatenated data
df = pd.DataFrame()

# Loop through each file and concatenate
for file in files:
    file_path = os.path.join(folder_path, file)
    # Assuming files are in CSV format
    data = pd.read_csv(file_path)
    df = pd.concat([df, data], ignore_index=True)

In [10]:
# Convert 'date' and 'heure' columns to a single datetime column for comparison
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['heure'])

# Regrouper par 'ligne', 'id', 'station' et calculer les dates min et max de 'remise_en_service_prevue'
grouped_df = df.groupby(['ligne', 'id', 'arret'])['datetime'].agg(['min', 'max']).reset_index()

In [None]:
# Create a figure
fig = go.Figure()

# Add traces for each row in grouped_df
for i, row in grouped_df.iterrows():
    fig.add_trace(go.Scatter(
        x=[row['min'], row['max']],
        y=[i, i],
        mode='lines+markers',
        marker=dict(size=10),
        name=f"{row['ligne']} - {row['id']} - {row['arret']}"
    ))

# Define custom colors for specific 'ligne' values
custom_colors = {
    "A": "red",
    "B": "blue",
    "C": "orange",
    "D": "green",
    "F": "lightgreen",
    "T1": "purple",
    "T4": "darkviolet",
    "P+R": "pink",
    "Non spécifié": "gray",
}

# Update traces with color based on 'ligne'
for i, row in grouped_df.iterrows():
    ligne_color = custom_colors.get(row['ligne'], "black")  # Default to black if 'ligne' not in custom_colors
    fig.data[i].line.color = ligne_color

# Update layout
fig.update_layout(
    title='Dates de Remise en Service Prévue Min et Max par Ligne, ID et Station',
    xaxis_title='Date de Remise en Service Prévue',
    yaxis=dict(
        tickmode='array',
        tickvals=list(range(len(grouped_df))),
        ticktext=grouped_df.apply(lambda x: f"{x['ligne']} - {x['id']} - {x['arret']}", axis=1)
    ),
    xaxis=dict(tickangle=45),
    height=800,
    width=1200
)

fig.update_layout(showlegend=False)

fig.update_layout(height=1000)

fig.update_xaxes(nticks=40)

fig.show()
