In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam

# ===== 1) Prepare synthetic dataset (similar style to your PCA/t-SNE example) =====
data = {
    'AnnualIncome': [
        15, 15.5, 16, 16.5, 17, 17.5, 18, 18.5, 19, 19.5, 
        20, 20.5, 21, 21.5, 22, 22.5, 23, 23.5, 24, 24.5, 
        25, 25.5, 26, 26.5, 27, 27.5, 28, 28.5, 29, 29.5, 
        30, 30.5, 31, 31.5, 32, 32.5, 33, 33.5, 34, 34.5, 
        35,   # Normal points
        80, 85, 90  # Outliers
    ],
    'SpendingScore': [
        39, 42, 45, 48, 51, 54, 57, 60, 63, 66,
        69, 72, 75, 78, 81, 84, 87, 90, 93, 96,
        6, 9, 12, 15, 18, 21, 24, 27, 30, 33,
        5, 8, 11, 14, 17, 20, 23, 26, 29, 32,
        56,   # Normal points
        2, 3, 100  # Outliers
    ],
    'Age': [
        20, 20.5, 21, 21.5, 22, 22.5, 23, 23.5, 24, 24.5, 
        25, 25.5, 26, 26.5, 27, 27.5, 28, 28.5, 29, 29.5, 
        30, 30.5, 31, 31.5, 32, 32.5, 33, 33.5, 34, 34.5, 
        35, 35.5, 36, 36.5, 37, 37.5, 38, 38.5, 39, 39.5, 
        40,   # Normal points
        15, 60, 70  # Outliers
    ]
}

df = pd.DataFrame(data)

# ===== 2) Normalize features =====
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# ===== 3) Build simple autoencoder =====
input_dim = X_scaled.shape[1]
encoding_dim = 2  # compress into 2D space for visualization

input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='linear')(encoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
encoder = Model(inputs=input_layer, outputs=encoded)

autoencoder.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

# ===== 4) Train autoencoder =====
history = autoencoder.fit(
    X_scaled, X_scaled,
    epochs=100,
    batch_size=8,
    shuffle=True,
    verbose=0
)

# ===== 5) Get encoded (2D) representation =====
encoded_data = encoder.predict(X_scaled)
encoded_df = pd.DataFrame(encoded_data, columns=['AE1', 'AE2'])

# ===== 6) Visualization =====
# Flag same outliers for comparison
df_plot = df.copy()
df_plot['AE1'] = encoded_df['AE1']
df_plot['AE2'] = encoded_df['AE2']
df_plot['Outlier'] = np.where(
    (df_plot['AnnualIncome'] > 70) |
    (df_plot['SpendingScore'] > 90) |
    (df_plot['Age'] > 55) | (df_plot['Age'] < 18),
    'Yes', 'No'
)

plt.figure(figsize=(7,6))
sns.scatterplot(
    data=df_plot, x='AE1', y='AE2', hue='Outlier',
    palette={'Yes':'red','No':'blue'}, s=70, edgecolor='k'
)
plt.title('Autoencoder: 2D Encoded Representation')
plt.xlabel('AE1')
plt.ylabel('AE2')
plt.legend(title='Outlier')
plt.show()


ModuleNotFoundError: No module named 'tensorflow'