In [1]:
import pandas as pd
import numpy as np

# Load transformed dataset
df = pd.read_csv("fraunhofer_dashboard_data.csv")

# Tạo monthly timeline 12 tháng gần nhất
months = pd.date_range(end=pd.Timestamp.today(), periods=12, freq="M")

synthetic_rows = []

for _, row in df.iterrows():
    course_id = row["course_id"]
    course_name = row["course_name"]
    total_participants = row["participants"]
    total_revenue = row["revenue"]
    base_rating = row["satisfaction"]
    price = row["price"]

    # Phân bổ participants theo seasonal pattern
    distribution = np.abs(np.random.normal(1, 0.3, 12))
    distribution = distribution / distribution.sum()  # normalize
    monthly_participants = (distribution * total_participants).astype(int)

    # Revenue tính theo participants * price
    monthly_revenue = monthly_participants * price

    # Satisfaction có thêm noise nhẹ
    monthly_satisfaction = np.clip(
        base_rating + np.random.normal(0, 0.1, 12),
        0, 5
    )

    for i in range(12):
        synthetic_rows.append({
            "course_id": course_id,
            "course_name": course_name,
            "month": months[i],
            "participants_monthly": monthly_participants[i],
            "revenue_monthly": monthly_revenue[i],
            "satisfaction_monthly": monthly_satisfaction[i],
        })

# Save
ts_df = pd.DataFrame(synthetic_rows)
ts_df.to_csv("courses_timeseries.csv", index=False)

print("Synthetic time series generated!")
print(ts_df.head())


  months = pd.date_range(end=pd.Timestamp.today(), periods=12, freq="M")


Synthetic time series generated!
   course_id                                        course_name  \
0     762616  The Complete SQL Bootcamp 2020: Go from Zero t...   
1     762616  The Complete SQL Bootcamp 2020: Go from Zero t...   
2     762616  The Complete SQL Bootcamp 2020: Go from Zero t...   
3     762616  The Complete SQL Bootcamp 2020: Go from Zero t...   
4     762616  The Complete SQL Bootcamp 2020: Go from Zero t...   

                       month  participants_monthly  revenue_monthly  \
0 2024-11-30 10:49:08.489660                 25655       11673025.0   
1 2024-12-31 10:49:08.489660                 16446        7482930.0   
2 2025-01-31 10:49:08.489660                 19226        8747830.0   
3 2025-02-28 10:49:08.489660                 27555       12537525.0   
4 2025-03-31 10:49:08.489660                 36565       16637075.0   

   satisfaction_monthly  
0              4.910124  
1              4.749609  
2              4.624753  
3              4.545657  
4      