Skip to content

Commit

Permalink
fix(datasets): Breast_cancer and adult dataset processing logic.
Browse files Browse the repository at this point in the history
  • Loading branch information
fabclmnt committed Nov 13, 2020
1 parent cc3dc0f commit 18b70aa
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 22 deletions.
40 changes: 21 additions & 19 deletions src/ydata_synthetic/preprocessing/adult.py
Expand Up @@ -12,24 +12,26 @@ def transformations(auto=True):
else:
data = fetch_data('adult')

numerical_features = ['age', 'fnlwgt',
'capital-gain', 'capital-loss',
'hours-per-week']
numerical_transformer = Pipeline(steps=[
('onehot', StandardScaler())])

categorical_features = ['workclass','education', 'marital-status',
'occupation', 'relationship',
'race', 'sex']
categorical_transformer = Pipeline(steps=[
('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)])

processed_data = preprocessor.fit_transform(data)
processed_data = pd.DataFrame.sparse.from_spmatrix(preprocessor.fit_transform(processed_data))
numerical_features = ['age', 'fnlwgt',
'capital-gain', 'capital-loss',
'hours-per-week']
numerical_transformer = Pipeline(steps=[
('onehot', StandardScaler())])

categorical_features = ['workclass','education', 'marital-status',
'occupation', 'relationship',
'race', 'sex']
categorical_transformer = Pipeline(steps=[
('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)])

processed_data = preprocessor.fit_transform(data)
processed_data = pd.DataFrame.sparse.from_spmatrix(preprocessor.fit_transform(processed_data))
return data, processed_data, preprocessor



14 changes: 11 additions & 3 deletions src/ydata_synthetic/preprocessing/breast_cancer_wisconsin.py
Expand Up @@ -11,9 +11,17 @@ def transformations(auto=True):
data = fetch_data('breast_cancer_wisconsin')
else:
data = fetch_data('breast_cancer_wisconsin')
scaler = StandardScaler()
processed_data = scaler.fit_transform(data)
processed_data = pd.DataFrame(processed_data)

scaler = StandardScaler()
processed_data = scaler.fit_transform(data)
processed_data = pd.DataFrame(processed_data)

return data, processed_data, scaler


if __name__ == '__main__':

data = transformations(auto=True)

print(data)

0 comments on commit 18b70aa

Please sign in to comment.