In [5]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# Set the size of the data set
n = 365 # number of days in the year
m = 6 # number of metrics

# Set the start and end dates for the data set
start_date = datetime(2022, 1, 1)
end_date = start_date + timedelta(days=n-1)

# Set the headers for the data set
headers = ['datetime', 'Sales', 'direct', 'retail sales', 'unknown sales', 'applications', 'net applications']

# Create an empty data frame with the specified headers
df = pd.DataFrame(columns=headers)

# Generate the time series data
for i in range(n):
    # Generate a datetime object for the current date
    date = start_date + timedelta(days=i)
    
    # Generate the values for the six metrics
    sales = np.random.normal(loc=50000, scale=10000)
    direct = np.random.normal(loc=10000, scale=2000)
    retail_sales = np.random.normal(loc=30000, scale=5000)
    unknown_sales = np.random.normal(loc=10000, scale=2000)
    applications = np.random.normal(loc=500, scale=100)
    net_applications = np.random.normal(loc=300, scale=50)
    
    # Add anomalies to the data
    if i % 30 == 0: # introduce anomalies every 30 days
        sales = sales * 1.5
        direct = direct * 1.2
        retail_sales = retail_sales * 0.8
        unknown_sales = unknown_sales * 1.5
        applications = applications * 0.5
        net_applications = net_applications * 0.2
    
    # Add the data for the current date to the data frame
    row = [date, sales, direct, retail_sales, unknown_sales, applications, net_applications]
    df.loc[i] = row

# Save the data frame to a CSV file
df.to_csv('credit_card_data.csv', index=False)


In [6]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the credit card data set
df = pd.read_csv('credit_card_data.csv', parse_dates=['datetime'])

# Get the numeric columns (exclude datetime)
numeric_columns = [col for col in df.columns if col != 'datetime']

# Scale the numeric columns using MinMaxScaler
scaler = MinMaxScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Export the scaled data to a new CSV file
df.to_csv('scaled_credit_card_data.csv', index=False)

# Print the first 5 rows of the scaled data
print(df.head())


    datetime     Sales    direct  retail sales  unknown sales  applications   
0 2022-01-01  0.658613  0.801161      0.105829       0.570499      0.049397  \
1 2022-01-02  0.342444  0.359833      0.284472       0.314665      0.796042   
2 2022-01-03  0.482440  0.354412      0.391036       0.262713      0.607675   
3 2022-01-04  0.539262  0.477958      0.703904       0.121639      0.490272   
4 2022-01-05  0.244572  0.344976      0.567132       0.411777      0.389030   

   net applications  
0          0.015137  
1          0.857881  
2          0.713657  
3          0.696971  
4          0.668931  
