### Load The Data

In [4]:
df = pd.read_csv("three_feature_data.csv")
print(len(df))
df.head()

40


Unnamed: 0,feature1,feature2,feature3,target
0,-0.331617,-1.632386,0.619114,1
1,1.010229,1.43783,2.327788,0
2,0.241106,-0.95251,-0.136267,1
3,1.67686,4.187503,-0.080565,0
4,2.823378,-0.332863,2.637391,0


### Visualization of Data

In [5]:
import plotly.io as pio
import plotly.express as px
pio.renderers.default = 'browser'

In [6]:
# Below output will be display on brawser
fig = px.scatter_3d(df, x=df['feature1'], y=df['feature2'], z=df['feature3'],
              color=df['target'].astype('str'))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

### PCA Implementation

#### Step-1: Standard Scaling

In [7]:
# Step 1 - Apply standard scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df.iloc[:,0:3] = scaler.fit_transform(df.iloc[:,0:3])

#### Step-2: Covariance Matrix Calculation

In [8]:
# Step 2 - Find Covariance Matrix
covariance_matrix = np.cov([df.iloc[:,0],df.iloc[:,1],df.iloc[:,2]])
print('Covariance Matrix:\n', covariance_matrix)

Covariance Matrix:
 [[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


#### Step-3: Calculation of Eigen Values and Eigen Vector

In [12]:
# Step 3 - Finding EV and EVs
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)

In [13]:
eigen_values

array([1.3536065 , 0.94557084, 0.77774573])

In [14]:
eigen_vectors

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442],
       [-0.52848211,  0.72025103,  0.44938304]])

#### Step-4: Analysis of variance captured by principle components and vizualization

In [16]:
# Calculate the sum of all eigenvalues
sum_eigenvalues = np.sum(eigen_values)

# Calculate the proportion of variance captured by each eigenvalue
proportion_variance = eigen_values / sum_eigenvalues

# Calculate cumulative variance for the first, first two, and all three components
cumulative_variance = np.cumsum(proportion_variance)

# Print results
print(f"Sum of Eigenvalues: {sum_eigenvalues}")
print(f"Proportion of Variance Captured by Each Eigenvalue: {proportion_variance}")
print(f"Cumulative Variance Captured by First Component: {cumulative_variance[0]}")
print(f"Cumulative Variance Captured by First Two Components: {cumulative_variance[1]}")
print(f"Cumulative Variance Captured by All Three Components: {cumulative_variance[2]}")

Sum of Eigenvalues: 3.076923076923076
Proportion of Variance Captured by Each Eigenvalue: [0.43992211 0.30731052 0.25276736]
Cumulative Variance Captured by First Component: 0.43992211345332793
Cumulative Variance Captured by First Two Components: 0.7472326366094666
Cumulative Variance Captured by All Three Components: 1.0


In [None]:
import numpy as np
import plotly.graph_objects as go

# Example data
np.random.seed(23)
data = np.random.rand(10, 3)  # 10 points in 3D space
eigen_vectors = np.random.rand(3, 3)  # Example eigenvectors

# Center of the data
center = data.mean(axis=0)

# Create a 3D scatter plot for the data points
fig = go.Figure()

# Add data points
fig.add_trace(
    go.Scatter3d(
        x=data[:, 0],
        y=data[:, 1],
        z=data[:, 2],
        mode='markers',
        marker=dict(size=8, color='blue', opacity=0.7),
        name='Data Points',
    )
)

# Add the center point
fig.add_trace(
    go.Scatter3d(
        x=[center[0]],
        y=[center[1]],
        z=[center[2]],
        mode='markers',
        marker=dict(size=10, color='red'),
        name='Center',
    )
)

# Add eigenvector arrows
for i in range(3):  # Assuming 3 eigenvectors
    fig.add_trace(
        go.Scatter3d(
            x=[center[0], center[0] + eigen_vectors[i, 0]],
            y=[center[1], center[1] + eigen_vectors[i, 1]],
            z=[center[2], center[2] + eigen_vectors[i, 2]],
            mode='lines+markers',
            line=dict(color='green', width=5),
            marker=dict(size=2, color='green'),
            name=f'Eigenvector {i+1}',
        )
    )

# Set axis labels and title
fig.update_layout(
    scene=dict(
        xaxis_title='X-axis',
        yaxis_title='Y-axis',
        zaxis_title='Z-axis',
    ),
    title="3D Plot with Eigenvectors",
)

# Display the plot in the browser
fig.show()


#### Step-5: Reduced to two features and Visualization

In [18]:
pc = eigen_vectors[0:2]
pc

array([[0.82875148, 0.71785184, 0.11922669],
       [0.59638417, 0.1297563 , 0.07753409]])

In [19]:
transformed_df = np.dot(df.iloc[:,0:3],pc.T)
# 40,3 - 3,2
new_df = pd.DataFrame(transformed_df,columns=['PC1','PC2'])
new_df['target'] = df['target'].values
new_df.head()

Unnamed: 0,PC1,PC2,target
0,-1.994117,-0.661773,1
1,1.261353,0.534874,0
2,-1.192288,-0.330311,1
3,3.376264,1.059018,0
4,1.394748,1.282422,0


In [27]:
new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 y=new_df['PC2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()