In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, r2_score, mean_absolute_error, mean_squared_error
from scipy import stats

In [None]:
data = pd.read_csv("SpotifyFeatures.csv")
df = pd.DataFrame(data)
df.info()
df.head(15)

In [None]:
df.isnull().sum()
df['artist_name'].unique()
df['key'].unique()

In [None]:
sort_popularity = df.sort_values('popularity',ascending=True, inplace= True)

In [None]:
most_popular = df.query('popularity>90').sort_values('popularity',ascending=False)
most_popular

In [None]:
df['duration'] = df['duration_ms'].apply(lambda x:(x/1000))
df['duration'].head(8)
dir(df['genre'])


In [None]:
le = LabelEncoder()
he = OneHotEncoder(sparse_output=False)
df['mode'] = le.fit_transform(df['mode'])
df['genre'] = le.fit_transform(df[['genre']])
df['mode'].head(10)
df['genre'].head(10)


In [None]:
numerical_feature = df.select_dtypes(include=['int', 'float64'])
data_corr = numerical_feature.corr()

In [None]:
plt.figure(figsize=(14,6))
heatmap = sns.heatmap(data_corr,vmin=1, vmax=1, annot=True, cmap='inferno')
heatmap.set_title("this is the co-relation chart.")
heatmap.set_xticklabels(heatmap.get_xticklabels, rotation=90,ha='right')



In [None]:
#find out outlier in the data sets.
z_score = stats.zscore(df['loudness'])
abs_z_score = np.abs(z_score)
print(abs_z_score.head(10))
print(abs_z_score.sort_values())
outlier = df[abs_z_score>7.0]
print(outlier)

In [None]:
#spliting the data for training and testing.
x = df[['loudness']]
y = df['energy']
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=42)

In [None]:
model = LinearRegression()
model.fit(X_train,y_train)
y_predicted = model.predict(X_test)

In [None]:
plt.scatter(X_train, y_train, color="blue", label='Data points')
plt.plot(X_test, y_predicted, color="red", label='Regression Line')
plt.xlabel('loudness')
plt.ylabel('energy')
plt.title('Linear Regression of Loudness vs Energy')
plt.legend()
plt.show()

In [None]:
#evaluate model in basis of the some of the basic perameters.
print(f"r2_score :{r2_score(y_test, y_predicted)}")
print(f"mse :{mean_squared_error(y_test,y_predicted)}")
print(f"mae :{mean_absolute_error(y_test, y_predicted)}")

In [None]:
#using data visulization find out some valuable insights from data
