In [1]:
import pandas as pd
from xgboost import XGBRegressor
from joblib import dump, load
from sklearn.preprocessing import LabelEncoder

In [2]:
train = pd.read_csv("../../Data/Kaggle/StoreSales/processed_train_v2.csv")

In [3]:
train = train.drop(columns=['id','date'])

In [4]:
train.head()

Unnamed: 0,store_nbr,family,sales,onpromotion,typeholiday,dcoilwtico,city,state,typestores,cluster,day_of_week,day,month,year
0,1,0,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
1,1,1,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
2,1,2,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
3,1,3,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
4,1,4,0.0,0,3,93.14,18,12,3,13,2,1,1,2013


In [5]:
train.tail()

Unnamed: 0,store_nbr,family,sales,onpromotion,typeholiday,dcoilwtico,city,state,typestores,cluster,day_of_week,day,month,year
3000883,9,28,438.133,0,3,47.57,18,12,1,6,2,15,8,2017
3000884,9,29,154.553,1,3,47.57,18,12,1,6,2,15,8,2017
3000885,9,30,2419.729,148,3,47.57,18,12,1,6,2,15,8,2017
3000886,9,31,121.0,8,3,47.57,18,12,1,6,2,15,8,2017
3000887,9,32,16.0,0,3,47.57,18,12,1,6,2,15,8,2017


In [6]:
X = train.drop(['sales'], axis=1)
y = train['sales']

In [7]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000888 entries, 0 to 3000887
Data columns (total 13 columns):
 #   Column       Dtype  
---  ------       -----  
 0   store_nbr    int64  
 1   family       int64  
 2   onpromotion  int64  
 3   typeholiday  int64  
 4   dcoilwtico   float64
 5   city         int64  
 6   state        int64  
 7   typestores   int64  
 8   cluster      int64  
 9   day_of_week  int64  
 10  day          int64  
 11  month        int64  
 12  year         int64  
dtypes: float64(1), int64(12)
memory usage: 297.6 MB


In [8]:
family_encoder = LabelEncoder()
typeholiday_encoder = LabelEncoder()
city_encoder = LabelEncoder()
state_encoder = LabelEncoder()
typestores_encoder = LabelEncoder()

In [9]:
X['family'] = family_encoder.fit_transform(X['family'])
X['typeholiday'] = typeholiday_encoder.fit_transform(X['typeholiday'])
X['city'] = city_encoder.fit_transform(X['city'])
X['state'] = state_encoder.fit_transform(X['state'])
X['typestores'] = typestores_encoder.fit_transform(X['typestores'])

In [10]:
with open('joblib/family_encoder.pkl', 'wb') as file:
    dump(family_encoder, file)
    
with open('joblib/typeholiday_encoder.pkl', 'wb') as file:
    dump(typeholiday_encoder, file)

with open('joblib/city_encoder.pkl', 'wb') as file:
    dump(city_encoder, file)
    
with open('joblib/state_encoder.pkl', 'wb') as file:
    dump(state_encoder, file)
    
with open('joblib/typestores_encoder.pkl', 'wb') as file:
    dump(typestores_encoder, file)

In [11]:
X.shape

(3000888, 13)

In [12]:
X.head()

Unnamed: 0,store_nbr,family,onpromotion,typeholiday,dcoilwtico,city,state,typestores,cluster,day_of_week,day,month,year
0,1,0,0,3,93.14,18,12,3,13,2,1,1,2013
1,1,1,0,3,93.14,18,12,3,13,2,1,1,2013
2,1,2,0,3,93.14,18,12,3,13,2,1,1,2013
3,1,3,0,3,93.14,18,12,3,13,2,1,1,2013
4,1,4,0,3,93.14,18,12,3,13,2,1,1,2013


In [13]:
batch_size = 1000

In [14]:
num_batches = X.shape[0] // batch_size + 1

In [15]:
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

In [16]:
for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = (i + 1) * batch_size

    X_batch = X.iloc[start_idx:end_idx]
    y_batch = y.iloc[start_idx:end_idx]

    model.fit(X_batch, y_batch)

    print(f"Batch {i + 1}/{num_batches} completed")

Batch 1/3001 completed
Batch 2/3001 completed
Batch 3/3001 completed
Batch 4/3001 completed
Batch 5/3001 completed
Batch 6/3001 completed
Batch 7/3001 completed
Batch 8/3001 completed
Batch 9/3001 completed
Batch 10/3001 completed
Batch 11/3001 completed
Batch 12/3001 completed
Batch 13/3001 completed
Batch 14/3001 completed
Batch 15/3001 completed
Batch 16/3001 completed
Batch 17/3001 completed
Batch 18/3001 completed
Batch 19/3001 completed
Batch 20/3001 completed
Batch 21/3001 completed
Batch 22/3001 completed
Batch 23/3001 completed
Batch 24/3001 completed
Batch 25/3001 completed
Batch 26/3001 completed
Batch 27/3001 completed
Batch 28/3001 completed
Batch 29/3001 completed
Batch 30/3001 completed
Batch 31/3001 completed
Batch 32/3001 completed
Batch 33/3001 completed
Batch 34/3001 completed
Batch 35/3001 completed
Batch 36/3001 completed
Batch 37/3001 completed
Batch 38/3001 completed
Batch 39/3001 completed
Batch 40/3001 completed
Batch 41/3001 completed
Batch 42/3001 completed
B

Batch 338/3001 completed
Batch 339/3001 completed
Batch 340/3001 completed
Batch 341/3001 completed
Batch 342/3001 completed
Batch 343/3001 completed
Batch 344/3001 completed
Batch 345/3001 completed
Batch 346/3001 completed
Batch 347/3001 completed
Batch 348/3001 completed
Batch 349/3001 completed
Batch 350/3001 completed
Batch 351/3001 completed
Batch 352/3001 completed
Batch 353/3001 completed
Batch 354/3001 completed
Batch 355/3001 completed
Batch 356/3001 completed
Batch 357/3001 completed
Batch 358/3001 completed
Batch 359/3001 completed
Batch 360/3001 completed
Batch 361/3001 completed
Batch 362/3001 completed
Batch 363/3001 completed
Batch 364/3001 completed
Batch 365/3001 completed
Batch 366/3001 completed
Batch 367/3001 completed
Batch 368/3001 completed
Batch 369/3001 completed
Batch 370/3001 completed
Batch 371/3001 completed
Batch 372/3001 completed
Batch 373/3001 completed
Batch 374/3001 completed
Batch 375/3001 completed
Batch 376/3001 completed
Batch 377/3001 completed


Batch 667/3001 completed
Batch 668/3001 completed
Batch 669/3001 completed
Batch 670/3001 completed
Batch 671/3001 completed
Batch 672/3001 completed
Batch 673/3001 completed
Batch 674/3001 completed
Batch 675/3001 completed
Batch 676/3001 completed
Batch 677/3001 completed
Batch 678/3001 completed
Batch 679/3001 completed
Batch 680/3001 completed
Batch 681/3001 completed
Batch 682/3001 completed
Batch 683/3001 completed
Batch 684/3001 completed
Batch 685/3001 completed
Batch 686/3001 completed
Batch 687/3001 completed
Batch 688/3001 completed
Batch 689/3001 completed
Batch 690/3001 completed
Batch 691/3001 completed
Batch 692/3001 completed
Batch 693/3001 completed
Batch 694/3001 completed
Batch 695/3001 completed
Batch 696/3001 completed
Batch 697/3001 completed
Batch 698/3001 completed
Batch 699/3001 completed
Batch 700/3001 completed
Batch 701/3001 completed
Batch 702/3001 completed
Batch 703/3001 completed
Batch 704/3001 completed
Batch 705/3001 completed
Batch 706/3001 completed


Batch 1000/3001 completed
Batch 1001/3001 completed
Batch 1002/3001 completed
Batch 1003/3001 completed
Batch 1004/3001 completed
Batch 1005/3001 completed
Batch 1006/3001 completed
Batch 1007/3001 completed
Batch 1008/3001 completed
Batch 1009/3001 completed
Batch 1010/3001 completed
Batch 1011/3001 completed
Batch 1012/3001 completed
Batch 1013/3001 completed
Batch 1014/3001 completed
Batch 1015/3001 completed
Batch 1016/3001 completed
Batch 1017/3001 completed
Batch 1018/3001 completed
Batch 1019/3001 completed
Batch 1020/3001 completed
Batch 1021/3001 completed
Batch 1022/3001 completed
Batch 1023/3001 completed
Batch 1024/3001 completed
Batch 1025/3001 completed
Batch 1026/3001 completed
Batch 1027/3001 completed
Batch 1028/3001 completed
Batch 1029/3001 completed
Batch 1030/3001 completed
Batch 1031/3001 completed
Batch 1032/3001 completed
Batch 1033/3001 completed
Batch 1034/3001 completed
Batch 1035/3001 completed
Batch 1036/3001 completed
Batch 1037/3001 completed
Batch 1038/3

Batch 1318/3001 completed
Batch 1319/3001 completed
Batch 1320/3001 completed
Batch 1321/3001 completed
Batch 1322/3001 completed
Batch 1323/3001 completed
Batch 1324/3001 completed
Batch 1325/3001 completed
Batch 1326/3001 completed
Batch 1327/3001 completed
Batch 1328/3001 completed
Batch 1329/3001 completed
Batch 1330/3001 completed
Batch 1331/3001 completed
Batch 1332/3001 completed
Batch 1333/3001 completed
Batch 1334/3001 completed
Batch 1335/3001 completed
Batch 1336/3001 completed
Batch 1337/3001 completed
Batch 1338/3001 completed
Batch 1339/3001 completed
Batch 1340/3001 completed
Batch 1341/3001 completed
Batch 1342/3001 completed
Batch 1343/3001 completed
Batch 1344/3001 completed
Batch 1345/3001 completed
Batch 1346/3001 completed
Batch 1347/3001 completed
Batch 1348/3001 completed
Batch 1349/3001 completed
Batch 1350/3001 completed
Batch 1351/3001 completed
Batch 1352/3001 completed
Batch 1353/3001 completed
Batch 1354/3001 completed
Batch 1355/3001 completed
Batch 1356/3

Batch 1635/3001 completed
Batch 1636/3001 completed
Batch 1637/3001 completed
Batch 1638/3001 completed
Batch 1639/3001 completed
Batch 1640/3001 completed
Batch 1641/3001 completed
Batch 1642/3001 completed
Batch 1643/3001 completed
Batch 1644/3001 completed
Batch 1645/3001 completed
Batch 1646/3001 completed
Batch 1647/3001 completed
Batch 1648/3001 completed
Batch 1649/3001 completed
Batch 1650/3001 completed
Batch 1651/3001 completed
Batch 1652/3001 completed
Batch 1653/3001 completed
Batch 1654/3001 completed
Batch 1655/3001 completed
Batch 1656/3001 completed
Batch 1657/3001 completed
Batch 1658/3001 completed
Batch 1659/3001 completed
Batch 1660/3001 completed
Batch 1661/3001 completed
Batch 1662/3001 completed
Batch 1663/3001 completed
Batch 1664/3001 completed
Batch 1665/3001 completed
Batch 1666/3001 completed
Batch 1667/3001 completed
Batch 1668/3001 completed
Batch 1669/3001 completed
Batch 1670/3001 completed
Batch 1671/3001 completed
Batch 1672/3001 completed
Batch 1673/3

Batch 1951/3001 completed
Batch 1952/3001 completed
Batch 1953/3001 completed
Batch 1954/3001 completed
Batch 1955/3001 completed
Batch 1956/3001 completed
Batch 1957/3001 completed
Batch 1958/3001 completed
Batch 1959/3001 completed
Batch 1960/3001 completed
Batch 1961/3001 completed
Batch 1962/3001 completed
Batch 1963/3001 completed
Batch 1964/3001 completed
Batch 1965/3001 completed
Batch 1966/3001 completed
Batch 1967/3001 completed
Batch 1968/3001 completed
Batch 1969/3001 completed
Batch 1970/3001 completed
Batch 1971/3001 completed
Batch 1972/3001 completed
Batch 1973/3001 completed
Batch 1974/3001 completed
Batch 1975/3001 completed
Batch 1976/3001 completed
Batch 1977/3001 completed
Batch 1978/3001 completed
Batch 1979/3001 completed
Batch 1980/3001 completed
Batch 1981/3001 completed
Batch 1982/3001 completed
Batch 1983/3001 completed
Batch 1984/3001 completed
Batch 1985/3001 completed
Batch 1986/3001 completed
Batch 1987/3001 completed
Batch 1988/3001 completed
Batch 1989/3

Batch 2268/3001 completed
Batch 2269/3001 completed
Batch 2270/3001 completed
Batch 2271/3001 completed
Batch 2272/3001 completed
Batch 2273/3001 completed
Batch 2274/3001 completed
Batch 2275/3001 completed
Batch 2276/3001 completed
Batch 2277/3001 completed
Batch 2278/3001 completed
Batch 2279/3001 completed
Batch 2280/3001 completed
Batch 2281/3001 completed
Batch 2282/3001 completed
Batch 2283/3001 completed
Batch 2284/3001 completed
Batch 2285/3001 completed
Batch 2286/3001 completed
Batch 2287/3001 completed
Batch 2288/3001 completed
Batch 2289/3001 completed
Batch 2290/3001 completed
Batch 2291/3001 completed
Batch 2292/3001 completed
Batch 2293/3001 completed
Batch 2294/3001 completed
Batch 2295/3001 completed
Batch 2296/3001 completed
Batch 2297/3001 completed
Batch 2298/3001 completed
Batch 2299/3001 completed
Batch 2300/3001 completed
Batch 2301/3001 completed
Batch 2302/3001 completed
Batch 2303/3001 completed
Batch 2304/3001 completed
Batch 2305/3001 completed
Batch 2306/3

Batch 2589/3001 completed
Batch 2590/3001 completed
Batch 2591/3001 completed
Batch 2592/3001 completed
Batch 2593/3001 completed
Batch 2594/3001 completed
Batch 2595/3001 completed
Batch 2596/3001 completed
Batch 2597/3001 completed
Batch 2598/3001 completed
Batch 2599/3001 completed
Batch 2600/3001 completed
Batch 2601/3001 completed
Batch 2602/3001 completed
Batch 2603/3001 completed
Batch 2604/3001 completed
Batch 2605/3001 completed
Batch 2606/3001 completed
Batch 2607/3001 completed
Batch 2608/3001 completed
Batch 2609/3001 completed
Batch 2610/3001 completed
Batch 2611/3001 completed
Batch 2612/3001 completed
Batch 2613/3001 completed
Batch 2614/3001 completed
Batch 2615/3001 completed
Batch 2616/3001 completed
Batch 2617/3001 completed
Batch 2618/3001 completed
Batch 2619/3001 completed
Batch 2620/3001 completed
Batch 2621/3001 completed
Batch 2622/3001 completed
Batch 2623/3001 completed
Batch 2624/3001 completed
Batch 2625/3001 completed
Batch 2626/3001 completed
Batch 2627/3

Batch 2907/3001 completed
Batch 2908/3001 completed
Batch 2909/3001 completed
Batch 2910/3001 completed
Batch 2911/3001 completed
Batch 2912/3001 completed
Batch 2913/3001 completed
Batch 2914/3001 completed
Batch 2915/3001 completed
Batch 2916/3001 completed
Batch 2917/3001 completed
Batch 2918/3001 completed
Batch 2919/3001 completed
Batch 2920/3001 completed
Batch 2921/3001 completed
Batch 2922/3001 completed
Batch 2923/3001 completed
Batch 2924/3001 completed
Batch 2925/3001 completed
Batch 2926/3001 completed
Batch 2927/3001 completed
Batch 2928/3001 completed
Batch 2929/3001 completed
Batch 2930/3001 completed
Batch 2931/3001 completed
Batch 2932/3001 completed
Batch 2933/3001 completed
Batch 2934/3001 completed
Batch 2935/3001 completed
Batch 2936/3001 completed
Batch 2937/3001 completed
Batch 2938/3001 completed
Batch 2939/3001 completed
Batch 2940/3001 completed
Batch 2941/3001 completed
Batch 2942/3001 completed
Batch 2943/3001 completed
Batch 2944/3001 completed
Batch 2945/3

In [17]:
dump(model, 'joblib/M100.joblib')

['joblib/M100.joblib']