In [2]:
%matplotlib widget

import numpy as np
import random
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, make_scorer, mean_squared_error
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.lines import Line2D
import yaml
from IPython.display import display  # to display variables in a "nice" way

try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper

pd.options.display.max_rows = 140
pd.options.display.max_columns = 200

In [3]:
rs = 296312+302736
df_path = "responses_hw.csv" 
df = pd.read_csv(df_path)
# display(df)

## Exercise 1: Loading and Preparing the Data

In [4]:
df = df.iloc[:,:-10]

In [5]:
col_entertainment = list(df.columns)[0:63]
col_personality = list(df.columns)[63:140]
np.random.seed(rs)
remove_entertainment = np.random.choice(col_entertainment,5)
remove_personality = np.random.choice(col_personality,10)
print(remove_entertainment)
print(remove_personality)

['Country' 'Geography' 'Latino' 'Slow songs or fast songs' 'Chemistry']
['Personality' 'Loneliness' 'Reliability' 'New environment' 'Fake'
 'Heights' 'Internet usage' 'Prioritising workload' 'Interests or hobbies'
 'Mood swings']


## Exercise 2: Preprocessing

In [6]:
data = {'Smoking': {'never smoked': 1, 'tried smoking':2, 'former smoker':3, 'current smoker':4},
        'Alcohol': {'never':1, 'social drinker': 2, 'drink a lot':3},
        'Punctuality': {'early':1, 'on time':2, 'late':3},
        'Lying': {'never':1, 'only to avoid hurting someone': 2, 'sometimes':3 ,'everytime it suits me':4},
        'Internet usage': {'no time at all':1, 'less than an hour a day':2, 'few hours a day':3, 'most of the day':4}      
       }
df = df.replace(data)
df_1 = df.iloc[:,0:63]
df_2 = df.iloc[:, 63:]

In [7]:
workdf_1 = df_1.drop(columns=remove_entertainment)
workdf_2 = df_2.drop(columns=remove_personality)
workdf_tot = pd.concat((workdf_1, workdf_2), axis = 1)

In [8]:
workdf_tot_na = workdf_tot.dropna()
workdf_1_na = workdf_1.dropna()
X_tot_na = workdf_tot_na.values
X_1_na = workdf_1_na.values

In [9]:
workdf_tot_0 = workdf_tot.fillna(value = 0)
workdf_1_0 = workdf_1.fillna(value = 0)
X_tot_0 = workdf_tot_0.values
X_1_0 = workdf_1_0.values

In [10]:
workdf_1_med = workdf_1.fillna(value = workdf_1.median(axis=0))
workdf_tot_med = workdf_tot.fillna(value = workdf_tot.median(axis=0))
X_tot_med = workdf_tot_med.values
X_1_med = workdf_1_med.values

## Exercise 3: Computation of the PCs

In [11]:
dict_df1 = {"Data with deleted rows" : X_1_na , "Data with 0 instead of NaN": X_1_0, "Data with median instead of NaN": X_1_med}

In [12]:
# aggiungere roba al print
dict_pca_1 = {}
dict_y_1 = {}
for k, v in dict_df1.items():
    pca = PCA(n_components = 0.3)
    pca.fit(v)
    m = min(pca.n_components_, 5)
    print('Il minimo è', m)
    dict_pca_1[k] = (PCA(n_components = m))
    dict_pca_1[k].fit(v)
    dict_y_1[k] = dict_pca_1[k].transform(v)
    
    
dict_pca_1

Il minimo è 4
Il minimo è 4
Il minimo è 4


{'Data with deleted rows': PCA(n_components=4),
 'Data with 0 instead of NaN': PCA(n_components=4),
 'Data with median instead of NaN': PCA(n_components=4)}

Scelta della migliore PCA

In [13]:
for k,v in dict_pca_1.items():
    recon = v.inverse_transform(dict_y_1[k])
    print("score di sklearn {} ".format(v.score(dict_df1[k])))
    rmse = mean_squared_error(dict_df1[k], recon,squared=False)
    print("RMSE: {} ".format(rmse))

score di sklearn -90.35279704771877 
RMSE: 1.0337759010655552 
score di sklearn -91.35677228705954 
RMSE: 1.0547280944667912 
score di sklearn -90.40681317076265 
RMSE: 1.0352708627859404 


In [14]:
pca_1 = dict_pca_1["Data with deleted rows"]
X_1 = dict_df1["Data with deleted rows"]
Y_1 = dict_y_1["Data with deleted rows"]

In [15]:
    m = pca_1.n_components
    # Curve of cumulative percentage of explained variance
    plt.figure()
    plt.plot(np.insert(np.cumsum(pca_1.explained_variance_ratio_), 0, 0))
    plt.title("Data with deleted rows")
    plt.xticks(ticks=np.arange(1, m + 1), 
               labels=[f'PC{i}' for i in range(1, m + 1)])
    plt.xlabel('Principal components')
    plt.ylabel('Cumulative explained variance')
    plt.grid()
    plt.show()
    
    round_expl_var_ratio = np.round(pca_1.explained_variance_ratio_.sum() * 100, decimals=2)

    # Barplot of percentage of explained variance
    plt.figure(figsize=(6, 6))
    plt.bar(range(1, m + 1), pca_1.explained_variance_ratio_)
    plt.title(f"PCs' EXPLAINED VARIANCE ({round_expl_var_ratio}% OF TOT. EXPL. VAR.)")
    plt.xticks(ticks=np.arange(1, m + 1), 
               labels=[f'PC{i}' for i in range(1, m + 1)],
               rotation=45)
    plt.xlabel('Principal Components')
    plt.ylabel('Percentage of Explained variance')
    plt.grid()
    plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Exercise 4: Interpretation of the PCs

In [16]:
cat_colors={} 
 
# Colors initialization for Music Preferences 
for el in list(workdf_1.columns)[:16]: 
    cat_colors[el]= 'tab:blue' 
     
# Colors initialization for Movie Preferences 
for el in list(workdf_1.columns)[16:28]: 
    cat_colors[el]= 'tab:orange' 

# Colors initialization for Hobbies Interests 
for el in list(workdf_1.columns)[28:]: 
    cat_colors[el]= 'tab:green'
     
list_colors = [] 
for k,v in cat_colors.items(): 
    list_colors.append(cat_colors[k])

In [17]:
for i in range(pca_1.n_components_):
    # DEFINE EPSILON
    eps = np.sqrt(1 / pca_1.n_features_)

    plt.figure(figsize = (12, 6))
    
    # --- RED LINE DENOTING THE THRESHOLD [-eps, +eps] ----------------
    plt.plot([-0.5, pca_1.n_features_ - 0.5], [eps, eps], 'red')
    plt.plot([-0.5, pca_1.n_features_ - 0.5], [-eps, -eps], 'red')
    
    plt.bar(np.arange(pca_1.n_features_), pca_1.components_[i, :], color = list_colors)  
    plt.xticks(ticks = np.arange(pca_1.n_features_), 
                   labels = workdf_1_na.columns.to_list(),
                   rotation = 90)
    plt.title(f' YSP - PC{i+1}')
    plt.grid()
    plt.show()
    # THE SELECTION OF THE SKILLS WITH CONTRIBUTE GREATER THAN THE THRESHOLD
    ind_great_pos_PCii = np.argwhere(pca_1.components_[i, :] >= eps).flatten()
    ind_great_neg_PCii = np.argwhere(pca_1.components_[i, :] <= -eps).flatten()
    
    great_pos_PCii = [list(workdf_1_na.columns)[i] for i in ind_great_pos_PCii]
    great_neg_PCii = [list(workdf_1_na.columns)[i] for i in ind_great_neg_PCii]
    
    print('')
    print(f'****************** PC{i+1} **********************')
    print(f'HIGH-VALUED POSITIVE COMPONENTS: {great_pos_PCii}')
    print('')
    print(f'HIGH-VALUED NEGATIVE COMPONENTS: {great_neg_PCii}')
    print('*********************************************')
    print('')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC1 **********************
HIGH-VALUED POSITIVE COMPONENTS: []

HIGH-VALUED NEGATIVE COMPONENTS: ['Folk', 'Classical music', 'Musical', 'Swing, Jazz', 'Rock n roll', 'Alternative', 'Opera', 'Fantasy/Fairy tales', 'History', 'Psychology', 'Biology', 'Reading', 'Foreign languages', 'Medicine', 'Art exhibitions', 'Religion', 'Countryside, outdoors', 'Dancing', 'Musical instruments', 'Writing', 'Theatre']
*********************************************



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC2 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['Metal or Hardrock', 'Punk', 'Horror', 'Thriller', 'Sci-fi', 'War', 'Western', 'Action', 'Politics', 'Mathematics', 'Physics', 'PC', 'Cars', 'Active sport', 'Science and technology', 'Adrenaline sports']

HIGH-VALUED NEGATIVE COMPONENTS: ['Romantic', 'Shopping']
*********************************************



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC3 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['Dance', 'Pop', 'Hiphop, Rap', 'Techno, Trance', 'Romantic', 'Economy Management', 'Law', 'Cars', 'Dancing', 'Passive sport', 'Active sport', 'Celebrities', 'Shopping', 'Adrenaline sports', 'Pets']

HIGH-VALUED NEGATIVE COMPONENTS: ['Metal or Hardrock', 'Alternative']
*********************************************



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC4 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['History', 'Politics', 'Economy Management', 'Foreign languages', 'Law']

HIGH-VALUED NEGATIVE COMPONENTS: ['Rock', 'Metal or Hardrock', 'Punk', 'Fantasy/Fairy tales', 'Animated', 'Biology', 'Medicine', 'Pets']
*********************************************



In [18]:
pc_names = ['Artists', 'Geeks', 'Influencers', 'Preppy']

In [19]:
#Score Graph
fig_winescore = plt.figure(figsize=(8, 8))
ax = fig_winescore.add_subplot(111, projection='3d')
ax.scatter(Y_1[:, 0], Y_1[:, 1], Y_1[:, 2])
plt.title('YSP - SCORE GRAPH')
ax.set_xlabel(pc_names[0])
ax.set_ylabel(pc_names[1])
ax.set_zlabel(pc_names[2])
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Exercise 5: k-Means

In [20]:
# giocare con i parametri in modo furbo

# from sklearn.model_selection import GridSearchCV

# Definizione delle liste di valori tra i quali "scorrere" per gli iper-parametri:
#nclust_list = list(range(3, 11))
#init_list = ['k-means++', 'random']
#n_init_list = [3,10]
#iter_list = [300, 500, 1000]

#hparameters = {'n_clusters':nclust_list,'init': init_list, 'n_init':n_init_list, 'max_iter':iter_list} #dizionario
#km = KMeans()
 
#km_gs = GridSearchCV(estimator = km, 
 #                     param_grid = hparameters, 
  #                    scoring = silhouette_score)

#for y in dict_y_1.values():
 #   km_gs.fit(y)

In [21]:
# INITIALIZE SOME LISTS TO STORE THE TEMPORARY RESULTS AND, THEN, MAKE COMPARISONS

# START THE FOR-CYCLE TO RUN THE k-MEANS AND MEASURING THE SILHOUETTE COEFFICIENT
km_list = []
silcoeff_list = []
k_list = list(range(3, 11))

for i in range(len(k_list)):
    print(f'****************** START k-MEANS WITH k={k_list[i]} ******************')
    print('Computing...')
    km_list.append(KMeans(n_clusters=k_list[i], n_init=10, random_state=rs, max_iter = 10000)) 
    km = km_list[i]
    km.fit(Y_1)
    silcoeff_list.append(silhouette_score(Y_1, km.labels_))
    print(f'****************** END k-MEANS WITH k={k_list[i]} ******************')
    print('')

# FIND THE BEST VALUE OF k AND THE BEST KMeans OBJECT
i_best = np.argmax(silcoeff_list)
k = k_list[i_best]
km = km_list[i_best]

# VISUALIZE THE RESULT
print('')
print('')
print('****************** RESULTS OF THE SEARCH... ******************')
print(f'BEST SILHOUETTE SCORE: {np.max(silcoeff_list)} --> k = {k}')
print('**************************************************************')

****************** START k-MEANS WITH k=3 ******************
Computing...
****************** END k-MEANS WITH k=3 ******************

****************** START k-MEANS WITH k=4 ******************
Computing...
****************** END k-MEANS WITH k=4 ******************

****************** START k-MEANS WITH k=5 ******************
Computing...
****************** END k-MEANS WITH k=5 ******************

****************** START k-MEANS WITH k=6 ******************
Computing...
****************** END k-MEANS WITH k=6 ******************

****************** START k-MEANS WITH k=7 ******************
Computing...
****************** END k-MEANS WITH k=7 ******************

****************** START k-MEANS WITH k=8 ******************
Computing...
****************** END k-MEANS WITH k=8 ******************

****************** START k-MEANS WITH k=9 ******************
Computing...
****************** END k-MEANS WITH k=9 ******************

****************** START k-MEANS WITH k=10 ******************


## Excersice 6

In [22]:
cluster_colors = []
markers_dict = {0: 'o', 1: '^', 2: 'x'}
col_dict = {0:'blue', 1:'orange', 2:'green'}
# Colors initialization for Music Preferences 
for el in km.labels_: 
    if el == 0:
        cluster_colors.append('tab:blue')
    if el == 1:
        cluster_colors.append('tab:orange')
    if el == 2:
        cluster_colors.append('tab:green')

In [23]:
# MAKE THE 3D SCORE GRAPH WITH THE CENTROIDS
sg_3d_km = plt.figure(figsize=(8, 8))
ax_sg_3d_km = sg_3d_km.add_subplot(111, projection='3d')
ax_sg_3d_km.scatter(Y_1[:, 0], Y_1[:, 1], Y_1[:, 2], c=cluster_colors, alpha=0.5)
for i in range(0,3):
    ax_sg_3d_km.scatter(km.cluster_centers_[i, 0], km.cluster_centers_[i, 1], km.cluster_centers_[i, 2],s=50, c='black', marker = markers_dict[i])
plt.title('YPS - SCORE GRAPH')
ax_sg_3d_km.set_xlabel(pc_names[0])
ax_sg_3d_km.set_ylabel(pc_names[1])
ax_sg_3d_km.set_zlabel(pc_names[2])
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [24]:
pd.DataFrame(data = km.cluster_centers_, columns=pc_names, index = ['First centroid', 'Second Centroid', 'Third Centroid'] )

Unnamed: 0,Artists,Geeks,Influencers,Preppy
First centroid,0.822301,-2.822537,0.41951,-0.114976
Second Centroid,-3.243089,0.816936,-0.047561,0.010363
Third Centroid,2.791118,2.083321,-0.393159,0.110776


## Exercise 8

In [25]:
dict_dftot = {"Data with deleted rows" : X_tot_na , "Data with 0 instead of NaN": X_tot_0, "Data with median instead of NaN": X_tot_med}

# aggiungere roba al print
dict_pca_tot = {}
dict_y_tot = {}
for k, v in dict_dftot.items():
    pca = PCA(n_components = 0.3)
    pca.fit(v)
    m = min(pca.n_components_, 6)
    print('Il minimo è', m)
    dict_pca_tot[k] = (PCA(n_components = m))
    dict_pca_tot[k].fit(v)
    dict_y_tot[k] = dict_pca_tot[k].transform(v)
    
    
dict_pca_tot

Il minimo è 6
Il minimo è 6
Il minimo è 6


{'Data with deleted rows': PCA(n_components=6),
 'Data with 0 instead of NaN': PCA(n_components=6),
 'Data with median instead of NaN': PCA(n_components=6)}

In [26]:
for k,v in dict_pca_tot.items():
    recon = v.inverse_transform(dict_y_tot[k])
    print("score di sklearn {} ".format(v.score(dict_dftot[k])))
    rmse = mean_squared_error(dict_dftot[k], recon,squared=False)
    print("RMSE: {} ".format(rmse))

score di sklearn -190.1314019147934 
RMSE: 1.019763145942198 
score di sklearn -192.71491868113216 
RMSE: 1.0441634665589437 
score di sklearn -190.85891019798072 
RMSE: 1.0273410832031478 


In [27]:
pca_tot = dict_pca_tot["Data with deleted rows"]
X_tot = dict_dftot["Data with deleted rows"]
Y_tot = dict_y_tot["Data with deleted rows"]

In [28]:
m = pca_tot.n_components
# Curve of cumulative percentage of explained variance
plt.figure()
plt.plot(np.insert(np.cumsum(pca_tot.explained_variance_ratio_), 0, 0))
plt.title(k)
plt.xticks(ticks=np.arange(1, m + 1), 
               labels=[f'PC{i}' for i in range(1, m + 1)])
plt.xlabel('Principal components')
plt.ylabel('Cumulative explained variance')
plt.grid()
plt.show()
    
round_expl_var_ratio = np.round(pca_tot.explained_variance_ratio_.sum() * 100, decimals=2)

# Barplot of percentage of explained variance
plt.figure(figsize=(6, 6))
plt.bar(range(1, m + 1), pca_tot.explained_variance_ratio_)
plt.title(f"PCs' EXPLAINED VARIANCE ({round_expl_var_ratio}% OF TOT. EXPL. VAR.)")
plt.xticks(ticks=np.arange(1, m + 1), 
               labels=[f'PC{i}' for i in range(1, m + 1)],
               rotation=45)
plt.xlabel('Principal Components')
plt.ylabel('Percentage of Explained variance')
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Exercise 9: Interpretation of the PCs

In [29]:
cat_colors={} 
 
# Colors initialization for Music Preferences 
for el in list(workdf_tot.columns)[:16]: 
    cat_colors[el]= 'tab:blue' 
    
# Colors initialization for Movie Preferences 
for el in list(workdf_tot.columns)[16:28]: 
    cat_colors[el]= 'tab:orange' 

# Colors initialization for Hobbies Interests 
for el in list(workdf_tot.columns)[28:58]: 
    cat_colors[el]= 'tab:green'

# Colors initialization for Phobias
for el in list(workdf_tot.columns)[58:67]: 
    cat_colors[el]= 'tab:purple'
    
# Colors initialization Health Habits 
for el in list(workdf_tot.columns)[67:70]: 
    cat_colors[el]= 'tab:red'
          
# Colors initialization Personality Traits, Views on Life, and Opinions
for el in list(workdf_tot.columns)[70: 119]: 
    cat_colors[el]= 'gold'
    cat_colors={}
    
# Colors initialization for Music Preferences 
for el in list(workdf_tot.columns)[:16]: 
    cat_colors[el]= 'tab:blue' 
    
# Colors initialization for Movie Preferences 
for el in list(workdf_tot.columns)[16:28]: 
    cat_colors[el]= 'tab:orange' 

# Colors initialization for Hobbies Interests 
for el in list(workdf_tot.columns)[28:58]: 
    cat_colors[el]= 'tab:green'

# Colors initialization for Phobias
for el in list(workdf_tot.columns)[58:67]: 
    cat_colors[el]= 'tab:purple'
    
# Colors initialization Health Habits 
for el in list(workdf_tot.columns)[67:70]: 
    cat_colors[el]= 'tab:red'
          
# Colors initialization Personality Traits, Views on Life, and Opinions
for el in list(workdf_tot.columns)[70: 119]: 
    cat_colors[el]= 'gold'
    
# Colors initialization Spending Habits
for el in list(workdf_tot.columns)[119:]: 
    cat_colors[el]= 'turquoise'
    
list_colors = [] 
for k,v in cat_colors.items(): 
    list_colors.append(cat_colors[k])

In [31]:
for i in range(pca_tot.n_components_):
    # DEFINE EPSILON
    eps = np.sqrt(1 / pca_tot.n_features_)

    plt.figure(figsize = (18, 25))
    
    # --- RED LINE DENOTING THE THRESHOLD [-eps, +eps] ----------------
    plt.plot([eps, eps], [-0.5, pca_tot.n_features_ - 0.5], 'red')
    plt.plot([-eps, -eps], [-0.5, pca_tot.n_features_ - 0.5], 'red')
    
    plt.barh(np.arange(pca_tot.n_features_), pca_tot.components_[i, :], color = list_colors)  
    plt.yticks(ticks = np.arange(pca_tot.n_features_), 
                   labels = workdf_tot.columns.to_list())
    plt.title(f' YSP - PC{i+1}')
    plt.grid()
    plt.show()
    
    # THE SELECTION OF THE SKILLS WITH CONTRIBUTE GREATER THAN THE THRESHOLD
    ind_great_pos_PCii = np.argwhere(pca_tot.components_[i, :] >= eps).flatten()
    ind_great_neg_PCii = np.argwhere(pca_tot.components_[i, :] <= -eps).flatten()
    
    great_pos_PCii = [list(workdf_tot.columns)[i] for i in ind_great_pos_PCii]
    great_neg_PCii = [list(workdf_tot.columns)[i] for i in ind_great_neg_PCii]
    
    print('')
    print(f'****************** PC{i+1} **********************')
    print(f'HIGH-VALUED POSITIVE COMPONENTS: {great_pos_PCii}')
    print('')
    print(f'HIGH-VALUED NEGATIVE COMPONENTS: {great_neg_PCii}')
    print('*********************************************')
    print('')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC1 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['Metal or Hardrock', 'Horror', 'Thriller', 'Sci-fi', 'War', 'Western', 'Action', 'PC', 'Cars', 'Science and technology', 'Adrenaline sports', 'Small - big dogs', 'Spending on gadgets']

HIGH-VALUED NEGATIVE COMPONENTS: ['Musical', 'Romantic', 'Fantasy/Fairy tales', 'Animated', 'Psychology', 'Biology', 'Reading', 'Foreign languages', 'Medicine', 'Art exhibitions', 'Religion', 'Dancing', 'Musical instruments', 'Writing', 'Gardening', 'Celebrities', 'Shopping', 'Theatre', 'Storm', 'Darkness', 'Spiders', 'Snakes', 'Rats', 'Dangerous dogs', 'Writing notes', 'Workaholism', 'Final judgement', 'Empathy', 'Giving', 'God', 'Children', 'Life struggles', 'Finding lost valuables', 'Shopping centres']
*********************************************



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC2 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['Pop', 'Celebrities', 'Shopping', 'Spiders', 'Snakes', 'Rats', 'Dangerous dogs', 'Public speaking', 'Shopping centres', 'Spending on looks']

HIGH-VALUED NEGATIVE COMPONENTS: ['Folk', 'Classical music', 'Musical', 'Rock', 'Metal or Hardrock', 'Punk', 'Swing, Jazz', 'Rock n roll', 'Alternative', 'Opera', 'Sci-fi', 'War', 'Documentary', 'Western', 'History', 'Psychology', 'Politics', 'Mathematics', 'Physics', 'Biology', 'Reading', 'Medicine', 'Art exhibitions', 'Religion', 'Countryside, outdoors', 'Musical instruments', 'Writing', 'Science and technology', 'Theatre', 'Workaholism', 'Elections']
*********************************************



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC3 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['Metal or Hardrock', 'Fear of public speaking', 'Public speaking']

HIGH-VALUED NEGATIVE COMPONENTS: ['Dance', 'Pop', 'Hiphop, Rap', 'Techno, Trance', 'Action', 'Politics', 'Economy Management', 'Law', 'Cars', 'Dancing', 'Passive sport', 'Active sport', 'Celebrities', 'Shopping', 'Science and technology', 'Fun with friends', 'Adrenaline sports', 'Pets', 'Daily events', 'Giving', 'Cheating in school', 'Number of friends', 'Appearence and gestures', 'Socializing', 'Assertiveness', 'Knowing the right people', 'Energy levels', 'Shopping centres', 'Branded clothing', 'Entertainment spending', 'Spending on looks', 'Spending on gadgets', 'Spending on healthy eating']
*********************************************



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC4 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['Rock', 'Metal or Hardrock', 'Punk', 'Swing, Jazz', 'Rock n roll', 'Alternative', 'Horror', 'Reading', 'Art exhibitions', 'Writing', 'Theatre', 'Darkness', 'Spiders', 'Ageing', 'Smoking', 'Loss of interest', 'Criminal damage', 'Hypochondria', 'Cheating in school', 'Changing the past', 'Getting angry', 'Small - big dogs', 'Getting up', 'Entertainment spending']

HIGH-VALUED NEGATIVE COMPONENTS: ['Dance', 'Mathematics', 'Physics', 'Biology', 'Cars', 'Religion', 'Gardening', 'Writing notes', 'Workaholism', 'Thinking ahead', 'Final judgement', 'God', 'Children', 'Finding lost valuables', 'Finances']
*********************************************



  plt.figure(figsize = (18, 25))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC5 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['Dancing', 'Elections', 'Number of friends', 'Socializing', 'Energy levels', 'Small - big dogs', 'Entertainment spending']

HIGH-VALUED NEGATIVE COMPONENTS: ['Metal or Hardrock', 'Punk', 'Techno, Trance', 'Horror', 'Thriller', 'Sci-fi', 'War', 'Documentary', 'Western', 'Action', 'Mathematics', 'Physics', 'Internet', 'PC', 'Cars', 'Science and technology', 'Flying', 'Spiders', 'Snakes', 'Rats', 'Ageing', 'Dangerous dogs', 'Fear of public speaking', 'Thinking ahead', 'Criminal damage', 'Decision making', 'Self-criticism', 'Hypochondria', 'Eating to survive', 'Health', 'Changing the past', 'Public speaking', 'Finances', 'Spending on gadgets']
*********************************************



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


****************** PC6 **********************
HIGH-VALUED POSITIVE COMPONENTS: ['History', 'Politics', 'Economy Management', 'Law', 'Snakes', 'Rats', 'Dangerous dogs', 'Daily events', 'Writing notes', 'Workaholism', 'Thinking ahead', 'Elections', 'Self-criticism', 'Getting angry', 'Knowing the right people', 'Branded clothing']

HIGH-VALUED NEGATIVE COMPONENTS: ['Rock', 'Metal or Hardrock', 'Punk', 'Reggae, Ska', 'Rock n roll', 'Fantasy/Fairy tales', 'Animated', 'Biology', 'Medicine', 'Countryside, outdoors', 'Passive sport', 'Active sport', 'Gardening', 'Adrenaline sports', 'Pets', 'Compassion to animals', 'Life struggles']
*********************************************



## Exercise 10: k-Means

## Exercise 11: Centroid Interpretation and Visualization

## Exercise 12: Centroids Evaluation