In [2]:
import pandas as pd
from pandas import DataFrame
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.feature_selection import VarianceThreshold
data = {
    'name': ['Rex', 'Peter', 'Sam', 'Rita', 'Hilda'],
    'age': [1, 95, 22, 0, 60],
    'height': [20, 180, 160, 145, 125],
    'Weight': [0, 100, 75, 55, 140],
    'bp': [80, 100, 65, 70, 40]
}
frame = DataFrame(data)
print("Original DataFrame:")
print(frame)
frame.to_csv('patient_data.csv', index=False)
df = pd.read_csv('patient_data.csv')
print("\nDataFrame after reading from CSV:")
print(df)
numeric_cols = df.select_dtypes(include=['number']).columns
df[numeric_cols] = df[numeric_cols].replace(0, df[numeric_cols].mean())
print("\nDataFrame after replacing 0 values with mean:")
print(df)
frame2 = df[['age', 'bp', 'height', 'Weight']]
print("\nSelected columns for scaling:")
print(frame2)
scaler = StandardScaler()
min_max = MinMaxScaler()
scaled = scaler.fit_transform(frame2)
print("\nScaled data using StandardScaler:")
print(scaled)
print("\nMean of scaled data (StandardScaler):")
print(scaled.mean(axis=0))
print("Standard deviation of scaled data (StandardScaler):")
print(scaled.std(axis=0))
scaled2 = min_max.fit_transform(frame2)
print("\nScaled data using MinMaxScaler:")
print(scaled2)
print("\nMean of scaled data (MinMaxScaler):")
print(scaled2.mean(axis=0))
print("Standard deviation of scaled data (MinMaxScaler):")
print(scaled2.std(axis=0))
sel = VarianceThreshold(threshold=(.5 * (1 - .9)))
useful_features1 = sel.fit_transform(scaled)
print("\nUseful feature values based on StandardScaler:")
print(useful_features1)
useful_features2 = sel.fit_transform(scaled2)
print("\nUseful feature values based on MinMaxScaler:")
print(useful_features2)



Original DataFrame:
    name  age  height  Weight   bp
0    Rex    1      20       0   80
1  Peter   95     180     100  100
2    Sam   22     160      75   65
3   Rita    0     145      55   70
4  Hilda   60     125     140   40

DataFrame after reading from CSV:
    name  age  height  Weight   bp
0    Rex    1      20       0   80
1  Peter   95     180     100  100
2    Sam   22     160      75   65
3   Rita    0     145      55   70
4  Hilda   60     125     140   40

DataFrame after replacing 0 values with mean:
    name   age  height  Weight   bp
0    Rex   1.0      20      74   80
1  Peter  95.0     180     100  100
2    Sam  22.0     160      75   65
3   Rita  35.6     145      55   70
4  Hilda  60.0     125     140   40

Selected columns for scaling:
    age   bp  height  Weight
0   1.0   80      20      74
1  95.0  100     180     100
2  22.0   65     160      75
3  35.6   70     145      55
4  60.0   40     125     140

Scaled data using StandardScaler:
[[-1.28721689  0.45927