In [97]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load data
sheet1 = pd.read_excel('online_retail_II.xlsx', sheet_name='Year 2010-2011')
sheet2 = pd.read_excel('online_retail_II.xlsx', sheet_name='Year 2009-2010')
combined_data = pd.concat([sheet1, sheet2], ignore_index=True)
combined_data

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
1067366,538171,22271,FELTCRAFT DOLL ROSIE,2,2010-12-09 20:01:00,2.95,17530.0,United Kingdom
1067367,538171,22750,FELTCRAFT PRINCESS LOLA DOLL,1,2010-12-09 20:01:00,3.75,17530.0,United Kingdom
1067368,538171,22751,FELTCRAFT PRINCESS OLIVIA DOLL,1,2010-12-09 20:01:00,3.75,17530.0,United Kingdom
1067369,538171,20970,PINK FLORAL FELTCRAFT SHOULDER BAG,2,2010-12-09 20:01:00,3.75,17530.0,United Kingdom


In [99]:
# Data Cleaning
combined_data = combined_data.drop_duplicates()
data_cleaned = combined_data.dropna(subset=['Customer ID'])
data_cleaned

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
1044843,536364,21175,GIN + TONIC DIET METAL SIGN,12,2010-11-30 19:35:00,2.10,14441.0,United Kingdom
1044844,536364,21524,DOORMAT SPOTTY HOME SWEET HOME,2,2010-11-30 19:35:00,7.95,14441.0,United Kingdom
1044845,536364,21098,CHRISTMAS TOILET ROLL,12,2010-11-30 19:35:00,1.25,14441.0,United Kingdom
1044846,536364,21181,PLEASE ONE PERSON METAL SIGN,24,2010-11-30 19:35:00,2.10,14441.0,United Kingdom


In [101]:
data_cleaned = data_cleaned.copy()
data_cleaned['Description'].fillna('Unknown')
data_cleaned = data_cleaned[(data_cleaned['Quantity'] >= 0) & (data_cleaned['Price'] >= 0)]
data_cleaned = data_cleaned[(data_cleaned['InvoiceDate'] <= '2011-12-09') & (data_cleaned['InvoiceDate'] >= '2009-12-01')]
data_cleaned

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
1044843,536364,21175,GIN + TONIC DIET METAL SIGN,12,2010-11-30 19:35:00,2.10,14441.0,United Kingdom
1044844,536364,21524,DOORMAT SPOTTY HOME SWEET HOME,2,2010-11-30 19:35:00,7.95,14441.0,United Kingdom
1044845,536364,21098,CHRISTMAS TOILET ROLL,12,2010-11-30 19:35:00,1.25,14441.0,United Kingdom
1044846,536364,21181,PLEASE ONE PERSON METAL SIGN,24,2010-11-30 19:35:00,2.10,14441.0,United Kingdom


In [103]:
# Feature Extraction
data_cleaned['InvoiceDate'] = pd.to_datetime(data_cleaned['InvoiceDate'])
data_cleaned['Revenue'] = data_cleaned['Quantity'] * data_cleaned['Price']
data_cleaned['InvoiceYear'] = data_cleaned['InvoiceDate'].dt.year
data_cleaned['InvoiceMonth'] = data_cleaned['InvoiceDate'].dt.month
data_cleaned['InvoiceDay'] = data_cleaned['InvoiceDate'].dt.day
data_cleaned['InvoiceWeek'] = data_cleaned['InvoiceDate'].dt.isocalendar().week
data_cleaned['InvoiceWeekday'] = data_cleaned['InvoiceDate'].dt.weekday
data_cleaned['Date'] = data_cleaned['InvoiceDate'].dt.date
data_cleaned['Month'] = data_cleaned['InvoiceDate'].dt.to_period('M')
data_cleaned['Hour'] = data_cleaned['InvoiceDate'].dt.hour
data_cleaned

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country,Revenue,InvoiceYear,InvoiceMonth,InvoiceDay,InvoiceWeek,InvoiceWeekday,Date,Month,Hour
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,15.30,2010,12,1,48,2,2010-12-01,2010-12,8
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,2010,12,1,48,2,2010-12-01,2010-12,8
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,22.00,2010,12,1,48,2,2010-12-01,2010-12,8
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,2010,12,1,48,2,2010-12-01,2010-12,8
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,2010,12,1,48,2,2010-12-01,2010-12,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1044843,536364,21175,GIN + TONIC DIET METAL SIGN,12,2010-11-30 19:35:00,2.10,14441.0,United Kingdom,25.20,2010,11,30,48,1,2010-11-30,2010-11,19
1044844,536364,21524,DOORMAT SPOTTY HOME SWEET HOME,2,2010-11-30 19:35:00,7.95,14441.0,United Kingdom,15.90,2010,11,30,48,1,2010-11-30,2010-11,19
1044845,536364,21098,CHRISTMAS TOILET ROLL,12,2010-11-30 19:35:00,1.25,14441.0,United Kingdom,15.00,2010,11,30,48,1,2010-11-30,2010-11,19
1044846,536364,21181,PLEASE ONE PERSON METAL SIGN,24,2010-11-30 19:35:00,2.10,14441.0,United Kingdom,50.40,2010,11,30,48,1,2010-11-30,2010-11,19


In [105]:
# Create daily data
new_df = data_cleaned[["Invoice", "StockCode", "Description", "Quantity", "Price", "Customer ID", "InvoiceYear", "InvoiceMonth", "InvoiceDay", "InvoiceWeek", "InvoiceWeekday", "Revenue", "Date", "Hour"]]
daily_data = new_df.groupby(['Date', 'StockCode']).agg({'Quantity': 'sum'}).reset_index()

# Convert Date to datetime
daily_data['Date'] = pd.to_datetime(daily_data['Date'])
daily_data.sort_values(by='Date', inplace=True)
daily_data

Unnamed: 0,Date,StockCode,Quantity
0,2009-12-01,10002,12
687,2009-12-01,82567,3
688,2009-12-01,82578,1
689,2009-12-01,82580,13
690,2009-12-01,82581,1
...,...,...,...
437966,2011-12-08,22402,2
437965,2011-12-08,22398,50
437964,2011-12-08,22395,48
437970,2011-12-08,22411,37


In [107]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [109]:
data = daily_data
df = pd.DataFrame(data)
df

Unnamed: 0,Date,StockCode,Quantity
0,2009-12-01,10002,12
687,2009-12-01,82567,3
688,2009-12-01,82578,1
689,2009-12-01,82580,13
690,2009-12-01,82581,1
...,...,...,...
437966,2011-12-08,22402,2
437965,2011-12-08,22398,50
437964,2011-12-08,22395,48
437970,2011-12-08,22411,37


In [113]:
# Convert Date to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Sort by Date and StockCode
df = df.sort_values(by=['StockCode', 'Date'])
df

Unnamed: 0,Date,StockCode,Quantity
0,2009-12-01,10002,12
2129,2009-12-03,10002,6
3211,2009-12-04,10002,73
4502,2009-12-06,10002,49
5463,2009-12-07,10002,2
...,...,...,...
21590,2010-01-14,TEST001,5
25704,2010-01-22,TEST001,5
36625,2010-02-12,TEST001,10
69166,2010-04-12,TEST001,5


In [121]:
# Create lag features (e.g., lag_1, lag_2)
for lag in range(1, 4):  # Create lag_1, lag_2, lag_3
    df[f'lag_{lag}'] = df.groupby('StockCode')['Quantity'].shift(lag)
df

Unnamed: 0,Date,StockCode,Quantity,lag_1,lag_2,lag_3
0,2009-12-01,10002,12,,,
2129,2009-12-03,10002,6,12.0,,
3211,2009-12-04,10002,73,6.0,12.0,
4502,2009-12-06,10002,49,73.0,6.0,12.0
5463,2009-12-07,10002,2,49.0,73.0,6.0
...,...,...,...,...,...,...
21590,2010-01-14,TEST001,5,10.0,5.0,20.0
25704,2010-01-22,TEST001,5,5.0,10.0,5.0
36625,2010-02-12,TEST001,10,5.0,5.0,10.0
69166,2010-04-12,TEST001,5,10.0,5.0,5.0


In [123]:
# Create rolling mean and standard deviation features
df['rolling_mean_7'] = df.groupby('StockCode')['Quantity'].transform(lambda x: x.rolling(7, min_periods=1).mean())
df['rolling_std_7'] = df.groupby('StockCode')['Quantity'].transform(lambda x: x.rolling(7, min_periods=1).std())
df

Unnamed: 0,Date,StockCode,Quantity,lag_1,lag_2,lag_3,rolling_mean_7,rolling_std_7
0,2009-12-01,10002,12,,,,12.000000,
2129,2009-12-03,10002,6,12.0,,,9.000000,4.242641
3211,2009-12-04,10002,73,6.0,12.0,,30.333333,37.072002
4502,2009-12-06,10002,49,73.0,6.0,12.0,35.000000,31.675437
5463,2009-12-07,10002,2,49.0,73.0,6.0,28.400000,31.149639
...,...,...,...,...,...,...,...,...
21590,2010-01-14,TEST001,5,10.0,5.0,20.0,10.000000,7.071068
25704,2010-01-22,TEST001,5,5.0,10.0,5.0,9.000000,6.519202
36625,2010-02-12,TEST001,10,5.0,5.0,10.0,9.166667,5.845226
69166,2010-04-12,TEST001,5,10.0,5.0,5.0,8.571429,5.563486


In [127]:
# Impute missing values caused by lag features with 0
df = df.fillna(0)
df

Unnamed: 0,Date,StockCode,Quantity,lag_1,lag_2,lag_3,rolling_mean_7,rolling_std_7
0,2009-12-01,10002,12,0.0,0.0,0.0,12.000000,0.000000
2129,2009-12-03,10002,6,12.0,0.0,0.0,9.000000,4.242641
3211,2009-12-04,10002,73,6.0,12.0,0.0,30.333333,37.072002
4502,2009-12-06,10002,49,73.0,6.0,12.0,35.000000,31.675437
5463,2009-12-07,10002,2,49.0,73.0,6.0,28.400000,31.149639
...,...,...,...,...,...,...,...,...
21590,2010-01-14,TEST001,5,10.0,5.0,20.0,10.000000,7.071068
25704,2010-01-22,TEST001,5,5.0,10.0,5.0,9.000000,6.519202
36625,2010-02-12,TEST001,10,5.0,5.0,10.0,9.166667,5.845226
69166,2010-04-12,TEST001,5,10.0,5.0,5.0,8.571429,5.563486


In [129]:
# Prepare training and testing datasets
X = df[['lag_1', 'lag_2', 'lag_3', 'rolling_mean_7', 'rolling_std_7']]
y = df['Quantity']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train

Unnamed: 0,lag_1,lag_2,lag_3,rolling_mean_7,rolling_std_7
430385,1.0,2.0,5.0,3.857143,3.891382
14803,28.0,25.0,53.0,25.714286,20.295437
208908,42.0,3.0,6.0,9.857143,14.264508
379620,1.0,2.0,6.0,4.000000,2.516611
282262,1.0,2.0,2.0,5.571429,4.117327
...,...,...,...,...,...
344546,10.0,9.0,10.0,9.285714,5.736267
391985,12.0,4.0,12.0,17.714286,10.796825
9487,33.0,17.0,1.0,23.000000,19.891372
48590,12.0,4.0,12.0,10.428571,3.823486


In [131]:
X_test

Unnamed: 0,lag_1,lag_2,lag_3,rolling_mean_7,rolling_std_7
194097,33.0,31.0,1.0,21.428571,17.193576
35845,4.0,2.0,4.0,4.142857,2.193063
296817,1.0,13.0,6.0,4.428571,4.353433
338792,20.0,1.0,40.0,20.142857,11.260973
114057,1.0,0.0,0.0,2.500000,2.121320
...,...,...,...,...,...
111550,24.0,7.0,1.0,13.142857,18.470053
57263,16.0,3.0,12.0,12.000000,9.036961
267903,2.0,30.0,4.0,16.857143,13.606721
144929,3.0,12.0,1.0,4.428571,5.223573


In [133]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [135]:
# Train an XGBoost Regressor
xgb = XGBRegressor(random_state=42, n_estimators=100, learning_rate=0.1, max_depth=5)
xgb.fit(X_train_scaled, y_train)

In [137]:
# Forecast future quantities and calculate forecasted quantities for the test set
forecasted_rows = []
actual_forecast_data = []

# Ensure test set is properly separated for forecasting
test_data = df.iloc[X_test.index]
test_data

Unnamed: 0,Date,StockCode,Quantity,lag_1,lag_2,lag_3,rolling_mean_7,rolling_std_7
25486,2010-01-22,22348,36,15.0,288.0,34.0,98.142857,130.051821
31096,2010-02-02,21077,6,4.0,24.0,384.0,63.571429,141.475389
229004,2010-12-17,22984,12,12.0,12.0,12.0,17.142857,9.441550
170943,2010-10-07,37467,13,1.0,12.0,1.0,6.000000,5.944185
132521,2010-08-04,21826,12,2.0,6.0,24.0,11.142857,6.817345
...,...,...,...,...,...,...,...,...
293770,2011-05-11,21793,8,2.0,10.0,8.0,5.571429,3.735289
90526,2010-05-20,21241,4,8.0,8.0,96.0,20.000000,33.545988
229334,2010-12-19,22736,5,10.0,1.0,10.0,8.285714,6.921327
299884,2011-05-22,22055,16,9.0,8.0,1.0,11.571429,5.855400


In [139]:
for stockcode in test_data['StockCode'].unique():
    stock_data = test_data[test_data['StockCode'] == stockcode]
    
    # Iterate over the test data for this stockcode
    for index, row in stock_data.iterrows():
        # Extract lag features for the current row
        lag_features = row[['lag_1', 'lag_2', 'lag_3', 'rolling_mean_7', 'rolling_std_7']].to_frame().T
        lag_features_scaled = scaler.transform(lag_features)
        
        # Forecast using the trained XGBoost model
        forecasted_quantity = xgb.predict(lag_features_scaled)[0]
        
        # Store the actual vs. forecasted values
        actual_forecast_data.append({
            'Date': row['Date'],
            'StockCode': stockcode,
            'Actual Quantity': row['Quantity'],
            'Forecasted Quantity': forecasted_quantity
        })

# Combine into a DataFrame
forecasted_df = pd.DataFrame(actual_forecast_data)
forecasted_df

Unnamed: 0,Date,StockCode,Actual Quantity,Forecasted Quantity
0,2010-01-22,22348,36,68.516823
1,2010-09-29,22348,15,56.924820
2,2010-11-29,22348,20,22.319950
3,2010-05-17,22348,1,9.762235
4,2010-09-26,22348,8,49.247059
...,...,...,...,...
87742,2011-06-13,90001B,2,2.431317
87743,2010-03-05,35096A,537,156.482300
87744,2009-12-13,90182C,2,2.673605
87745,2011-05-04,90125A,48,17.955582


In [140]:
# Evaluate the model
r2 = r2_score(forecasted_df['Actual Quantity'], forecasted_df['Forecasted Quantity'])
mse = mean_squared_error(forecasted_df['Actual Quantity'], forecasted_df['Forecasted Quantity'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(forecasted_df['Actual Quantity'], forecasted_df['Forecasted Quantity'])

# Print metrics
print(f"R2 Score: {r2:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")

R2 Score: 0.3471
MSE: 9988.5732
RMSE: 99.9428
MAE: 18.2582


In [93]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from xgboost import XGBRegressor

# Seasonality function
def calculate_seasonality(df, sku_column='StockCode', sales_column='Quantity', time_column='week'):
    """
    Calculate seasonality index for each SKU based on its weekly sales pattern.
    
    Parameters:
        df (pd.DataFrame): Input DataFrame containing SKU sales data.
        sku_column (str): Name of the column representing SKU IDs (e.g., 'StockCode').
        sales_column (str): Name of the column representing sales (e.g., 'Quantity').
        time_column (str): Name of the column representing time (e.g., 'week').
        
    Returns:
        pd.DataFrame: DataFrame with a new 'seasonality' feature added.
    """
    # Calculate total yearly sales mean for each SKU
    yearly_mean = df.groupby(sku_column)[sales_column].mean().rename("total_sales")
    input_data = pd.merge(df, yearly_mean, on=sku_column, how='left')
    
    # Calculate seasonality index for the year (sales / total_sales)
    input_data["si_year"] = np.where(input_data["total_sales"] != 0, 
                                     input_data[sales_column] / input_data["total_sales"], 
                                     0)
    
    # Calculate weekly seasonality mean for each SKU
    weekly_mean = input_data.groupby([sku_column, time_column])["si_year"].mean().rename("seasonality")
    input_data = pd.merge(input_data, weekly_mean, on=[sku_column, time_column], how='left')
    
    return input_data


df = pd.DataFrame(daily_data)
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(by=['StockCode', 'Date'])

# Create week column
df['week'] = df['Date'].dt.isocalendar().week

# Generate Lag and Rolling Features
for lag in range(1, 4):
    df[f'lag_{lag}'] = df.groupby('StockCode')['Quantity'].shift(lag)

df['rolling_mean_7'] = df.groupby('StockCode')['Quantity'].transform(lambda x: x.rolling(7, min_periods=1).mean())
df['rolling_std_7'] = df.groupby('StockCode')['Quantity'].transform(lambda x: x.rolling(7, min_periods=1).std())

# Fill Missing Values
df = df.fillna(0)

# Add Seasonality Feature
df_with_seasonality = calculate_seasonality(df, sku_column='StockCode', sales_column='Quantity', time_column='week')

# Prepare Features and Target
X = df_with_seasonality[['lag_1', 'lag_2', 'lag_3', 'rolling_mean_7', 'rolling_std_7', 'seasonality']]
y = df_with_seasonality['Quantity']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train XGBoost Regressor
xgb = XGBRegressor(random_state=42, n_estimators=100, learning_rate=0.1, max_depth=5)
xgb.fit(X_train_scaled, y_train)

# Forecast Future Quantities
forecasted_rows = []
actual_forecast_data = []

test_data = df_with_seasonality.iloc[X_test.index]

for stockcode in test_data['StockCode'].unique():
    stock_data = test_data[test_data['StockCode'] == stockcode]
    
    for index, row in stock_data.iterrows():
        lag_features = row[['lag_1', 'lag_2', 'lag_3', 'rolling_mean_7', 'rolling_std_7', 'seasonality']].to_frame().T
        lag_features_scaled = scaler.transform(lag_features)
        forecasted_quantity = xgb.predict(lag_features_scaled)[0]
        
        actual_forecast_data.append({
            'Date': row['Date'],
            'StockCode': stockcode,
            'Actual Quantity': row['Quantity'],
            'Forecasted Quantity': forecasted_quantity
        })

forecasted_df = pd.DataFrame(actual_forecast_data)

# Evaluate Model Performance
r2 = r2_score(forecasted_df['Actual Quantity'], forecasted_df['Forecasted Quantity'])
mse = mean_squared_error(forecasted_df['Actual Quantity'], forecasted_df['Forecasted Quantity'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(forecasted_df['Actual Quantity'], forecasted_df['Forecasted Quantity'])

print(f"R2 Score: {r2:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
forecasted_df

R2 Score: 0.0653
MSE: 66196.9476
RMSE: 257.2877
MAE: 15.4276


Unnamed: 0,Date,StockCode,Actual Quantity,Forecasted Quantity
0,2010-11-05,22356,1,19.407393
1,2010-06-09,22356,126,68.944237
2,2010-12-10,22356,20,14.705484
3,2010-04-22,22356,30,25.673262
4,2011-03-14,22356,124,35.807503
...,...,...,...,...
87742,2010-09-23,72821,2,6.816642
87743,2009-12-02,78056,2,3.743295
87744,2010-06-01,79140,3,5.570665
87745,2009-12-02,21838,1,3.743295
