# Data Cleaning

In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import pyarrow.feather as feather
import warnings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings('ignore', category=RuntimeWarning)

In [2]:
"""load dataset"""
file_path = 'train_data.feather'
data = feather.read_feather(file_path)
data.shape

(567614, 881)

In [3]:
"""Sample"""
df = data.sample(frac=0.2, random_state=42)
print(df.shape)

(113523, 881)


In [4]:
"""Transform target ot log scale"""
df['TLJYWBE'] = np.log(df['TLJYWBE'])
print("Target Transformed to log scale")

Target Transformed to log scale


In [5]:
"""remove empty columns"""
empty = df.columns[df.isna().all()]
df.drop(columns=empty, inplace=True)
print(f"Empty columns dropped: {len(empty)}")

Empty columns dropped: 44


In [6]:
"""Handeling object columns"""
df.replace('nan', np.nan, inplace=True)
obj_cols_to_lower = df.select_dtypes(include='object').nunique().loc[lambda x: x <= 10].index
df[obj_cols_to_lower] = df[obj_cols_to_lower].apply(lambda x: x.str.lower())
binary_col, cat_col = [], []
binary_map = {'true': 1, 'false': 0, np.nan: np.nan}
for c in obj_cols_to_lower:
    unique_values = df[c].dropna().unique()
    num_unique_values = len(unique_values)
    if num_unique_values == 2 or (
            num_unique_values == 3 and set(unique_values) == {'true', 'false'}):
        df[c] = df[c].map(binary_map)
        df[c] = pd.to_numeric(df[c], errors='coerce').astype('Int64')
        binary_col.append(c)
    else:
        dummy_columns = pd.get_dummies(df[c], dummy_na=True, prefix=c)
        df = pd.concat([df, dummy_columns], axis=1)
        df.drop(c, axis=1, inplace=True)

"""convert other object columns to numerical"""
for c in df.select_dtypes(include=[object]).columns:
    df[c] = pd.to_numeric(df[c], errors='coerce')

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 113523 entries, 658196 to 28028
Columns: 857 entries, HKTLMYY to ZHPRSBN_nan
dtypes: bool(40), float64(774), int64(43)
memory usage: 712.8 MB


In [7]:
"""Remove columns with more than 30% missing values"""
missing_cols = list(df.columns[df.isnull().mean() > 0.3])
df.drop(columns=missing_cols, inplace=True)
print(f"Columns with more than 30% missing values dropped: {len(missing_cols)}")
df.shape

Columns with more than 30% missing values dropped: 90


(113523, 767)

In [8]:
"""zero Variance"""
# Create masks for conditions
cols = df.columns
zero_std_mask = df[cols].std() == 0
count_mask = df[cols].count() == df.shape[0]
zero_std_all = cols[zero_std_mask & count_mask].tolist()
zero_std = cols[zero_std_mask & ~count_mask].tolist()
for c in zero_std:
    val = df[c].mode()[0]
    df[c] = df[c].fillna(0)
    df[c] = (df[c] == val).astype(bool)
df.drop(columns=zero_std_all, inplace=True)
print(f"Zero Variance columns dropped: {len(zero_std_all)}")
df.shape

Zero Variance columns dropped: 37


(113523, 730)

In [9]:
"""Boolean columns"""
bool_cols = df.columns[df.nunique(dropna=True) == 2]
for col in bool_cols:
    unique_vals = df[col].dropna().unique()
    mapping = {unique_vals[0]: False, unique_vals[1]: True}
    df[col] = df[col].map(mapping).astype(bool)
print("Boolean cols: ", len(bool_cols))

Boolean cols:  53


In [10]:
"""Outliers in Normal distribution"""
normal_cols = []
non_bool_cols = df.select_dtypes(include=['float64', 'int64']).columns
for column in non_bool_cols:
    data = df[column].dropna()
    if len(data) > 7:
        stat, p = stats.normaltest(data)
        if p > 0.05:
            normal_cols.append(column)

mask = pd.Series(True, index=df.index)
means = df[normal_cols].mean()
std_devs = df[normal_cols].std()
for col in normal_cols:
    col_mask = ((df[col] >= means[col] - 3 * std_devs[col]) &
                (df[col] <= means[col] + 3 * std_devs[col]))
    mask &= col_mask

print("Outliers dropped:", len(df.index[~mask]))
df.drop(df.index[~mask], inplace=True)

Outliers dropped: 871


In [11]:
"""standardizing numeric data"""
# Identify non-boolean columns
df_scaled = df.copy()
non_bool_columns = df_scaled.select_dtypes(exclude=['bool']).columns
scaler = StandardScaler()
# Apply StandardScaler to non-boolean columns
df_scaled[non_bool_columns] = scaler.fit_transform(df_scaled[non_bool_columns])

In [12]:
"""Impute missing values"""
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
print("number of missing values:", df.isnull().sum().sum())
df_imputed = pd.DataFrame(imputer.fit_transform(df_scaled), columns=df_scaled.columns)

# Check if any missing values remain
print("remaining missing value:", df_imputed.isnull().sum().max())

number of missing values: 510775
remaining missing value: 0


In [13]:
duplicates = set()
# Compare each column against all others
for i in range(df_imputed.shape[1]):
    col1 = df_imputed.iloc[:, i]
    for j in range(i + 1, df_imputed.shape[1]):
        col2 = df_imputed.iloc[:, j]
        if col1.equals(col2):
            duplicates.add(df_imputed.columns[j])

# Drop the duplicate columns
df_reduced = df_imputed.drop(columns=list(duplicates))
df_reduced.shape

(112652, 672)

# Final features (selection process below)

In [25]:
"""final feature selected"""
selected_cols = ['MSAHYEA', 'DJANPLY', 'LJWUXOV', 'VZOZKWX', 'MRUWBZT', 'DVSZBLN', 'DMTXDGF',
                 'SJZKEVZ', 'PRVIFOQ', 'VMWDQGF', 'UNFEABR', 'HKTLMYY', 'XIRNYHK', 'OOASVXJ',
                 'FXACWUA', 'KDQUJOB', 'KPQSPBC', 'XEUMIIT', 'DUOKWOI', 'STMKCSJ', 'WLULXFN',
                 'AVSMOFQ', 'CNGIUEP', 'LKFNGDB', 'LLZRQRY', 'NYWUAUO', 'LGVRVXB', 'CMSGVTL',
                 'EULURHL', 'NSERJIK', 'WGQRBHJ', 'SBUETBP', 'DDGIRQA', 'JIOWSAX', 'EYILHOU',
                 'FFJOGRA', 'FPBFOEB', 'JEKQIKE', 'VYTOISY', 'AIKOJYC', 'AHVIXII', 'RTVQHPO',
                 'HFYZKSZ', 'YLHQTEA', 'LCAOHWW', 'AWMGPPL', 'IIYLALO', 'SRQKQTX', 'JNEVNAR',
                 'HWKCDPO', 'UTGKBXG', 'NIVQSOG', 'TPUASCQ', 'FCMPHJR', 'BYEQVGG', 'LLHXSPF',
                 'PDWQQDP', 'LQEHWDR', 'VFXSABJ', 'DTJITAO', 'DAEPCOR', 'OFEEPAC', 'RNKJCEN',
                 'AYAJVQL', 'KPZEEJR', 'MOSECGI', 'XLVHGLO', 'DXWRFDA', 'JTIJWNL', 'AKUNFFN',
                 'KPXMEBJ', 'GBGZJZO', 'GRBGIZR', 'KNFIDTO', 'UOGNHSF', 'PSXOLCG', 'FAIOOOV',
                 'UHMRZHI', 'FBBFYZM', 'SQODRRP', 'NAWMDDT', 'MDBYIRV', 'SPKCFPP', 'BGDAMPC',
                 'DANYGJI', 'OXRCIYW', 'IKMWIOV', 'MQXCIBE', 'OOVNYOI', 'LTCKGYN', 'HBQPQTD',
                 'SDEUDHY', 'BHSHCHU', 'DDPBVDN', 'JIWJHVI', 'ZRYDRXT', 'GOBJALH', 'LOGULZS',
                 'ARVLGNZ', 'MWHYOSB', 'JMFGDPB', 'IHCEXCN', 'GQHLWWM', 'AWJPBPO', 'AGTCLZR',
                 'AEXRRBM', 'OGYQNUB', 'APJFLOK', 'GKACPXS', 'JWXNCNT', 'FUTFIRO', 'ENIRWLT',
                 'NVBZJEU', 'BGHDMAS', 'TKQUNLP', 'RIEVFEX', 'QASZGHA', 'MRBOALK', 'DNHHKRL',
                 'ZKXMWHB', 'SMHHBFH', 'EDASGHM', 'RCJGZLT', 'UZGUYFK', 'LCFCVCB', 'VEUJYWN',
                 'IJRFPEK', 'UMJYMGD', 'XWQPLHB', 'ITWTNIT', 'VTMSCPQ', 'DPPXTGF', 'BGPVFMN',
                 'KEJOIIS', 'HOVFLAR', 'HIFPGGQ', 'ZYMFJDH', 'WLFAYHX', 'NGVHWDO', 'HCSXZKW',
                 'JYSKSPX', 'OKIKPOJ', 'KGJACPV', 'XJJAKPY', 'CWLCCPL', 'HHSFOPV', 'ONKWSSO',
                 'GVDVKJH', 'KXISVUL', 'MUEKGHC', 'YCJYNVQ', 'HEBFRRA', 'KYRXYOZ', 'OADGFBJ',
                 'MKHJGMF', 'JIMYAME', 'ULYIQYO', 'VPAZKWG', 'BEUAZOI', 'TPYJDFW', 'UXZNNNB',
                 'QODSZMV', 'GUEUYTS', 'HSCCVTR', 'LBMUJNZ', 'BQLPPBV', 'RLHGEVY', 'ZCRFMLI',
                 'JOJRNMZ', 'IHLMFTI', 'OPBTWFJ', 'NQQYONU', 'ZPBDTHO', 'QWVFRRZ', 'GYFZSXY',
                 'POUQMMG', 'FWILIBF', 'ALKYTAY', 'IWHIYNA', 'HPCWOKU', 'NYRKOCF', 'JUMKXBC',
                 'LVMHJCI', 'VCPMKTP', 'KUXSPYJ', 'OULTOYT', 'ACVSDTK', 'HCBDBBS', 'RQTEIMY',
                 'DTSZUFG', 'JORKKSF', 'CQZHXYY', 'RAGOFAC', 'RIEGYBR', 'NTYHQRF', 'NGIJPET',
                 'ZMEJKZD', 'UEIYMWS', 'RSQGZAF', 'TTQVFLL', 'DDZYPGM', 'WBCMBKW', 'VOHXBZJ',
                 'UZPPCUZ', 'WTMETCB', 'VRNGUZU', 'PAIOUXI', 'PZYGETW', 'OYSQKAH', 'NMWUWJL',
                 'ACEFRZA', 'VXWAKJT', 'CWWUCQG', 'MHLQVAB', 'NDWFVEZ', 'MIIVLBT', 'PPNLAEY',
                 'XLHIYUD', 'KBNVGJL', 'UJTXHTS', 'UJYQCMY', 'TLJYWBE']

df_selected = df_reduced[selected_cols].copy()

significant_interactions = ['IKMWIOV_APJFLOK', 'IKMWIOV_KNFIDTO', 'IKMWIOV_ACEFRZA',
                            'OOASVXJ_AIKOJYC', 'AKUNFFN_AIKOJYC',
                            'IKMWIOV_QASZGHA', 'ALKYTAY_AIKOJYC', 'IKMWIOV_LLZRQRY',
                            'FFJOGRA_AIKOJYC', 'AIKOJYC_ACEFRZA', 'AVSMOFQ_AKUNFFN']

for pair in significant_interactions:
    i, j = pair.split('_')
    df_selected[pair] = df_selected.eval(f'{i} * {j}')
df_selected.shape

(112652, 232)

In [19]:
"""save results"""
new_name = file_path.replace('.feather', '_processed.feather')
df_selected.to_feather(new_name)
print(f"file saved  - {new_name}")

file saved  - train_data_processed.feather


# Feature selection process

In [None]:
df_reduced.to_feather("after_duplicates_01.feather")

In [None]:
df_reduced = feather.read_feather("train_data.feather")
df_reduced.shape

In [None]:
def test_baseline(df):
    """baseline model"""
    X = df.drop(columns=['TLJYWBE'])
    y = df['TLJYWBE']
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    linear_model = LinearRegression()
    linear_model.fit(X_train, y_train)
    y_val_pred = linear_model.predict(X_val)

    # evaluation
    mse = mean_squared_error(y_val, y_val_pred)
    r2 = r2_score(y_val, y_val_pred)

    print(mse, r2)


def test_xgb(df):
    X = df.drop(columns=['TLJYWBE'])
    y = df['TLJYWBE']
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    model = xgb.XGBRegressor(random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)

    # Evaluate the model
    mse = mean_squared_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)

    print(mse, r2)

In [14]:
"""Feature selection by correlation - filter method"""
correlations = df_reduced.corr()['TLJYWBE'].sort_values()
selected_features = correlations[abs(correlations) > 0.01].index.tolist()
selected_features.remove('TLJYWBE')

print("num selected features:", len(selected_features))
df_ft_corr = df_reduced[selected_features + ['TLJYWBE']]
print(df_ft_corr.shape)

num selected features: 459
(112652, 460)


In [21]:
"""Further feature selection - model-based method"""
import xgboost as xgb

X = df_ft_corr.drop(columns=['TLJYWBE'])
y = df_ft_corr['TLJYWBE']
xg_reg = xgb.XGBRegressor(n_estimators=100, random_state=42)
xg_reg.fit(X, y)
xg_feature_importances = xg_reg.feature_importances_
xg_important_features = pd.Series(xg_feature_importances,
                                  index=X.columns).sort_values(ascending=False)

xg_features = list(xg_important_features[xg_important_features > 0.0015].index)

df_xg = df_ft_corr[xg_features + ['TLJYWBE']]
print("num selected features:", len(xg_features))
df_xg.shape

num selected features: 221


(112652, 222)

In [18]:
# """Feature engineering"""
# from sklearn.preprocessing import PolynomialFeatures
# X = df_xg.drop(columns=['TLJYWBE'])
# y = df_xg['TLJYWBE']
# 
# # Create polynomial features up to the second degree
# poly = PolynomialFeatures(interaction_only=True, include_bias=False)
# X_train_poly = poly.fit_transform(X)
# feature_names = poly.get_feature_names_out(X.columns)
# 
# df_poly = pd.DataFrame(X_train_poly, columns=feature_names)
# df_poly['TLJYWBE'] = y
# df_poly.shape


(112652, 56)

In [None]:
"""obtained by playing around a bit with feature engineering"""
significant_interactions = ['IKMWIOV APJFLOK', 'IKMWIOV KNFIDTO', 'IKMWIOV ACEFRZA',
                            'OOASVXJ AIKOJYC', 'AKUNFFN AIKOJYC',
                            'IKMWIOV QASZGHA', 'ALKYTAY AIKOJYC', 'IKMWIOV LLZRQRY',
                            'FFJOGRA AIKOJYC', 'AIKOJYC ACEFRZA', 'AVSMOFQ AKUNFFN']

"""Feature engineering"""
# from sklearn.preprocessing import PolynomialFeatures
# X = df_xg.drop(columns=['TLJYWBE'])
# y = df_xg['TLJYWBE']
# 
# # Create polynomial features up to the second degree
# poly = PolynomialFeatures(interaction_only=True, include_bias=False)
# X_train_poly = poly.fit_transform(X)
# feature_names = poly.get_feature_names_out(X.columns)
# 
# df_poly = pd.DataFrame(X_train_poly, columns=feature_names)
# df_poly['TLJYWBE'] = y
# df_poly.shape

df_interact = df_xg.copy()
for pair in significant_interactions:
    i, j = pair.split()
    df_interact[pair] = df_interact[i] * df_interact[j]
df_interact.shape

In [None]:
df_reduced.shape, df_ft_corr.shape, df_xg.shape, df_interact.shape

In [None]:
test_baseline(df_reduced)
test_baseline(df_ft_corr)
test_baseline(df_xg)
test_baseline(df_interact)


In [None]:
test_xgb(df_reduced)
test_xgb(df_ft_corr)
test_xgb(df_xg)
test_xgb(df_interact)

In [24]:
selected_features = ['MSAHYEA', 'DJANPLY', 'LJWUXOV', 'VZOZKWX', 'MRUWBZT', 'DVSZBLN', 'DMTXDGF',
                     'SJZKEVZ', 'PRVIFOQ', 'VMWDQGF', 'UNFEABR', 'HKTLMYY', 'XIRNYHK', 'OOASVXJ',
                     'FXACWUA', 'KDQUJOB', 'KPQSPBC', 'XEUMIIT', 'DUOKWOI', 'STMKCSJ', 'WLULXFN',
                     'AVSMOFQ', 'CNGIUEP', 'LKFNGDB', 'LLZRQRY', 'NYWUAUO', 'LGVRVXB', 'CMSGVTL',
                     'EULURHL', 'NSERJIK', 'WGQRBHJ', 'SBUETBP', 'DDGIRQA', 'JIOWSAX', 'EYILHOU',
                     'FFJOGRA', 'FPBFOEB', 'JEKQIKE', 'VYTOISY', 'AIKOJYC', 'AHVIXII', 'RTVQHPO',
                     'HFYZKSZ', 'YLHQTEA', 'LCAOHWW', 'AWMGPPL', 'IIYLALO', 'SRQKQTX', 'JNEVNAR',
                     'HWKCDPO', 'UTGKBXG', 'NIVQSOG', 'TPUASCQ', 'FCMPHJR', 'BYEQVGG', 'LLHXSPF',
                     'PDWQQDP', 'LQEHWDR', 'VFXSABJ', 'DTJITAO', 'DAEPCOR', 'OFEEPAC', 'RNKJCEN',
                     'AYAJVQL', 'KPZEEJR', 'MOSECGI', 'XLVHGLO', 'DXWRFDA', 'JTIJWNL', 'AKUNFFN',
                     'KPXMEBJ', 'GBGZJZO', 'GRBGIZR', 'KNFIDTO', 'UOGNHSF', 'PSXOLCG', 'FAIOOOV',
                     'UHMRZHI', 'FBBFYZM', 'SQODRRP', 'NAWMDDT', 'MDBYIRV', 'SPKCFPP', 'BGDAMPC',
                     'DANYGJI', 'OXRCIYW', 'IKMWIOV', 'MQXCIBE', 'OOVNYOI', 'LTCKGYN', 'HBQPQTD',
                     'SDEUDHY', 'BHSHCHU', 'DDPBVDN', 'JIWJHVI', 'ZRYDRXT', 'GOBJALH', 'LOGULZS',
                     'ARVLGNZ', 'MWHYOSB', 'JMFGDPB', 'IHCEXCN', 'GQHLWWM', 'AWJPBPO', 'AGTCLZR',
                     'AEXRRBM', 'OGYQNUB', 'APJFLOK', 'GKACPXS', 'JWXNCNT', 'FUTFIRO', 'ENIRWLT',
                     'NVBZJEU', 'BGHDMAS', 'TKQUNLP', 'RIEVFEX', 'QASZGHA', 'MRBOALK', 'DNHHKRL',
                     'ZKXMWHB', 'SMHHBFH', 'EDASGHM', 'RCJGZLT', 'UZGUYFK', 'LCFCVCB', 'VEUJYWN',
                     'IJRFPEK', 'UMJYMGD', 'XWQPLHB', 'ITWTNIT', 'VTMSCPQ', 'DPPXTGF', 'BGPVFMN',
                     'KEJOIIS', 'HOVFLAR', 'HIFPGGQ', 'ZYMFJDH', 'WLFAYHX', 'NGVHWDO', 'HCSXZKW',
                     'JYSKSPX', 'OKIKPOJ', 'KGJACPV', 'XJJAKPY', 'CWLCCPL', 'HHSFOPV', 'ONKWSSO',
                     'GVDVKJH', 'KXISVUL', 'MUEKGHC', 'YCJYNVQ', 'HEBFRRA', 'KYRXYOZ', 'OADGFBJ',
                     'MKHJGMF', 'JIMYAME', 'ULYIQYO', 'VPAZKWG', 'BEUAZOI', 'TPYJDFW', 'UXZNNNB',
                     'QODSZMV', 'GUEUYTS', 'HSCCVTR', 'LBMUJNZ', 'BQLPPBV', 'RLHGEVY', 'ZCRFMLI',
                     'JOJRNMZ', 'IHLMFTI', 'OPBTWFJ', 'NQQYONU', 'ZPBDTHO', 'QWVFRRZ', 'GYFZSXY',
                     'POUQMMG', 'FWILIBF', 'ALKYTAY', 'IWHIYNA', 'HPCWOKU', 'NYRKOCF', 'JUMKXBC',
                     'LVMHJCI', 'VCPMKTP', 'KUXSPYJ', 'OULTOYT', 'ACVSDTK', 'HCBDBBS', 'RQTEIMY',
                     'DTSZUFG', 'JORKKSF', 'CQZHXYY', 'RAGOFAC', 'RIEGYBR', 'NTYHQRF', 'NGIJPET',
                     'ZMEJKZD', 'UEIYMWS', 'RSQGZAF', 'TTQVFLL', 'DDZYPGM', 'WBCMBKW', 'VOHXBZJ',
                     'UZPPCUZ', 'WTMETCB', 'VRNGUZU', 'PAIOUXI', 'PZYGETW', 'OYSQKAH', 'NMWUWJL',
                     'ACEFRZA', 'VXWAKJT', 'CWWUCQG', 'MHLQVAB', 'NDWFVEZ', 'MIIVLBT', 'PPNLAEY',
                     'XLHIYUD', 'KBNVGJL', 'UJTXHTS', 'UJYQCMY']

In [None]:
selected_cols = ['MSAHYEA', 'DJANPLY', 'LJWUXOV', 'VZOZKWX', 'MRUWBZT', 'OOASVXJ', 'VMWDQGF',
                 'FXACWUA', 'KPQSPBC', 'SJZKEVZ', 'DVSZBLN', 'UNFEABR', 'LLZRQRY', 'HKTLMYY',
                 'AVSMOFQ', 'XEUMIIT', 'JORKKSF', 'JEKQIKE', 'KDQUJOB', 'DMTXDGF', 'WLULXFN',
                 'SRQKQTX', 'XIRNYHK', 'DUOKWOI', 'NSERJIK', 'AHVIXII', 'CMSGVTL', 'WGQRBHJ',
                 'NYWUAUO', 'PHWLHGO', 'LKFNGDB', 'SBUETBP', 'AIKOJYC', 'PSXOLCG', 'RNKJCEN',
                 'FPBFOEB', 'UXZNNNB', 'RMVEJRV', 'FFJOGRA', 'DDGIRQA', 'AKUNFFN', 'LTCKGYN',
                 'RFVSOEI', 'UTGKBXG', 'GRBGIZR', 'HWKCDPO', 'VTMSCPQ', 'DXWRFDA', 'LTRIIBH',
                 'IKMWIOV', 'STMKCSJ', 'OFEEPAC', 'JNEVNAR', 'RCJGZLT', 'WBCMBKW', 'SPKCFPP',
                 'LFATORD', 'LCAOHWW', 'NGUTDOX', 'TBFLCFW', 'FUTFIRO', 'VFXSABJ', 'JZQULTU',
                 'MKHJGMF', 'NIVQSOG', 'JMFGDPB', 'NAWMDDT', 'JZVNCXT', 'POUQMMG', 'LGVRVXB',
                 'HPIXBEZ', 'BGPVFMN', 'BTKTNSN', 'GOBJALH', 'BTXMHGA', 'ZPBDTHO', 'VPAZKWG',
                 'BHSHCHU', 'KPZEEJR', 'SYJXQKZ', 'UOGNHSF', 'IHCEXCN', 'XPCRCSI', 'KOESNGG',
                 'YLHQTEA', 'AWJPBPO', 'NMWUWJL', 'BUVBEYO', 'HTRZKGZ', 'FWILIBF', 'MQXCIBE',
                 'SLHDFLB', 'BGDAMPC', 'UMJYMGD', 'JWXNCNT', 'DNXCPJX', 'DPPXTGF', 'YZRVJSE',
                 'MOSECGI', 'PFFFNMW', 'HCSXZKW', 'ZMEJKZD', 'SEWCJZQ', 'RAGOFAC', 'JWFXMKH',
                 'HXBFDMA', 'ULYIQYO', 'QVSCEFJ', 'VEOALEL', 'PDWQQDP', 'SQODRRP', 'HPCWOKU',
                 'VQGBZBI', 'VYTOISY', 'JIWJHVI', 'NGVHWDO', 'NTYHQRF', 'OXRCIYW', 'LWRFJGT',
                 'WJKYEMG', 'LNZGNNA', 'ZDWTEVD', 'APJFLOK', 'NVBZJEU', 'TZNKMJP', 'TNUQMZC',
                 'ELDKYEY', 'GYFZSXY', 'BYEQVGG', 'DTSZUFG', 'WFPEMDD', 'XLVHGLO', 'JUMKXBC',
                 'KBNVGJL', 'NXXERZE', 'KWGZVQN', 'NTJRVGR', 'DTJITAO', 'OOVNYOI', 'TBGAWDK',
                 'CWLCCPL', 'KNFIDTO', 'IROKFNR', 'ARVLGNZ', 'IJRFPEK', 'QASZGHA', 'MIIVLBT',
                 'TKNRHFX', 'PJLETFT', 'KFDAVVI', 'OMIIMXP', 'VJNUBJX', 'AYAJVQL', 'RETRSMC',
                 'XWQPLHB', 'XLHIYUD', 'QZOMJMJ', 'RIEVFEX', 'PJGXCUT', 'FAIOOOV', 'HIFPGGQ',
                 'OYSQKAH', 'RQTEIMY', 'NYRKOCF', 'JHQKHHL', 'KYRXYOZ', 'AWMGPPL', 'NQQYONU',
                 'OKMWHQM', 'QEKEIFO', 'MRBOALK', 'PIACHYE', 'OGYQNUB', 'QWVFRRZ', 'BKSGEBR',
                 'VSXTBCN', 'UEIYMWS', 'LBMUJNZ', 'LCFCVCB', 'HBCMCGN', 'CWWUCQG', 'WTCICYP',
                 'QODSZMV', 'LZBJHYC', 'OPBTWFJ', 'MNNVQLZ', 'TFWEIKV', 'SFGWGSJ', 'WPYBJPG',
                 'ETSCGYD', 'ALKYTAY', 'EXAIPZR', 'LOGULZS', 'KOJYJHO', 'INFZDWA', 'RSQGZAF',
                 'PCJBMTF', 'UCXNWDN', 'FBBFYZM', 'JIQIQWD', 'IJQUYPU', 'LNWVWPK', 'DCSSZRQ',
                 'UWOLTWR', 'OZIVQPW', 'OYSVSFU', 'MOGJHYG', 'EEGYIGX', 'GPSLGET', 'GMLIQMK',
                 'LLHXSPF', 'ATSLHPA', 'NFZEWMH', 'KXISVUL', 'HCBDBBS', 'MWHYOSB', 'NGIJPET',
                 'GPDXQZO', 'NRKKRBK', 'JTIJWNL', 'TXVCNZC', 'YCWMHMB', 'SMHHBFH', 'AGTCLZR',
                 'OULTOYT', 'JZYCXXB', 'VXWAKJT', 'VRNGUZU', 'BGHDMAS', 'QRTDBHR', 'RXLZZHO',
                 'TTQVFLL', 'EGJUWUC', 'RTVQHPO', 'ACVSDTK', 'AZLIRWB', 'UPSDFCS', 'YIDCRAN',
                 'IPRULOU', 'UJTXHTS', 'YCJYNVQ', 'EEIRTMR', 'FLYIGDN', 'EULURHL', 'GBGZJZO',
                 'UDQKCAP', 'PAIOUXI', 'PPNLAEY', 'ACEFRZA', 'EDASGHM', 'GJJABFU', 'ETXKSTM',
                 'UAEETIT', 'INLEXOC', 'MSWILHL', 'ONKWSSO', 'WVGOOTV', 'PUYNKTQ', 'WCHWYUP',
                 'ZZTPIJA', 'CZUZJKN', 'QJXZQSD', 'UJYQCMY', 'SOLTBDE', 'AUESMVQ', 'OADGFBJ',
                 'MAFPSTX', 'RLYWHCU', 'UZGUYFK', 'HJVHTYM', 'WBBKQFH', 'IFZDTPK', 'JYSKSPX',
                 'VCPMKTP', 'VOYPYYV', 'SMRUUUS', 'MLKBLNF', 'LGXZTHS', 'JULDEHH', 'OOXHPWM',
                 'KUXSPYJ', 'REAKUIZ', 'XKEMMHR', 'FMPLUQF', 'KPXMEBJ', 'OLNLGZK', 'QSQCEMO',
                 'NOLZTQY', 'RRMMRAJ', 'AEXRRBM', 'PLSHZWK', 'JIOWSAX', 'VEUJYWN', 'TLJYWBE']