# 00 — Data Generation
This notebook creates required folders and a sample dataset for the demo.

In [5]:
# --- Project root & folders 
from pathlib import Path

project_base_dir = Path.cwd()
if project_base_dir.name == "notebooks":
    project_base_dir = project_base_dir.parent

data_dir   = project_base_dir / "data"
images_dir = project_base_dir / "images"

data_dir.mkdir(parents=True, exist_ok=True)
images_dir.mkdir(parents=True, exist_ok=True)

print(f"Base: {project_base_dir}")


Base: C:\Users\Jovane\cartflow-cancellation-analysis


In [6]:
# Core libraries for data analysis and manipulation
import pandas as pd
import numpy as np

# Libraries for creating professional and insightful data visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# Library for handling dates and times
import datetime

# Libraries for machine learning models and metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score
from pathlib import Path

# Create required folders if they don't exist
# handled by setup
# Set the visualization style for a clean, professional look
sns.set(style="whitegrid")
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({'figure.figsize': (12, 8), 'axes.titlesize': 16, 'axes.labelsize': 12, 'axes.titlepad': 20})

print("✅ Environment successfully configured! All libraries are ready.")

✅ Environment successfully configured! All libraries are ready.


In [3]:
import pandas as pd

sample_csv_path = Path(data_dir / r"sample_data")

if not sample_csv_path.exists():
    df_sample = pd.DataFrame({
        "order_id":[1,2,3,4,5,6,7,8,9,10],
        "order_value":[58,41,72,63,55,60,49,80,66,52],
        "cancelled":[1,0,0,1,0,0,1,0,0,1],
        "payment_method":["credit_card","debit","credit_card","paypal","credit_card","debit","paypal","credit_card","debit","credit_card"],
        "shipping_eta_days":[8,3,2,9,4,5,10,3,6,7],
        "device":["mobile","desktop","desktop","mobile","mobile","desktop","mobile","desktop","mobile","mobile"]
    })
    df_sample.to_csv(sample_csv_path, index=False)
    print(f"Sample CSV created at {sample_csv_path}")
else:
    print(f"Sample CSV already exists at {sample_csv_path}")


Sample CSV already exists at C:\Users\Jovane\cartflow-cancellation-analysis\data\sample_data


In [4]:
from pathlib import Path
import pandas as pd

# Assuming 'data_dir' has already been defined and the folder created
data_dir = Path("./data")
data_dir.mkdir(exist_ok=True)

# Define the source (src) and destination (dst) paths using
# clear and descriptive file names.
src = Path(data_dir / "sample_data.csv")  # Path to the original file
dst = Path(data_dir / "orders.csv")      # Path to the new file

# Check if the source file exists and the destination file does NOT exist
if src.exists() and not dst.exists():
    try:
        # Read the source file and save it to the destination
        pd.read_csv(src).to_csv(dst, index=False)
        print(f"File '{dst.name}' created from '{src.name}'")
    except FileNotFoundError:
        print(f"Error: The source file '{src.name}' was not found.")
else:
    print("Condition not met. Check if the source file exists and the destination does not.")

Condition not met. Check if the source file exists and the destination does not.
