### Importing libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

### Importing dataset 

In [None]:
df = pd.read_csv(r'C:\Users\hp\Desktop\Sales\Sales.csv',index_col=0,header=0,encoding= 'unicode_escape')
df.head()

### Checking shape of dataset

In [None]:
df.shape

### Checking columns in dataset

In [None]:
df.columns

### Checking details of dataset

In [None]:
df.info()

### Dropping unnecessary columns

In [None]:
df = df.drop(columns=['Status','unnamed1'])
df

### Getting statistical insights of data

In [None]:
df.describe()

### Checking for null values

In [None]:
df.isnull().sum()

In [None]:
amount_mean = df['Amount'].mean()
amount_median = df['Amount'].median()
amount_mode = df['Amount'].mode()[0]
print('Mean:',amount_mean)
print('Median:',amount_median)
print('Mode:',amount_mode)

In [None]:
df.boxplot(column = 'Amount')
plt.show()

In [None]:
df['Amount'].fillna(amount_mean, inplace=True)
df.isnull().sum()

In [None]:
CustomersAge = df['Age']
bins = [CustomersAge.min(),15,21,60,CustomersAge.max()]
binlabels = ['Children', 'Adolescents', 'Adult', 'Senior']

categories = pd.cut(CustomersAge,bins,labels = binlabels)
categories.head(20)

In [None]:
df['Labelled_Age'] = categories
df.tail(20)

In [None]:
print(df['Labelled_Age'].isna().sum())
print(df.shape)

In [None]:
df = df.dropna(how='any',subset=['Labelled_Age'])
print(df['Labelled_Age'].isna().sum())
print(df.shape)

### Checking percentage of customers of different age groups

In [None]:
df['Labelled_Age'].value_counts(normalize = True)*100

In [None]:
data = df['Labelled_Age'].value_counts(normalize = True)*100
keys = ['Children', 'Adolescents', 'Adult', 'Senior']

explode = [0, 0, 0.2, 0] 
palette_color = sns.color_palette("bright") 
plt.pie(data, labels=keys, colors=palette_color, explode=explode, autopct='%.0f%%') 
plt.savefig('test1.png')
plt.show()

In [None]:
ax = sns.countplot(data=df,x='Labelled_Age',palette='flare')
for bars in ax.containers:
    ax.bar_label(bars)
plt.savefig('test2.png')
plt.show()

In [None]:
ax = sns.countplot(data=df,x='Labelled_Age',palette='viridis',hue='Gender')
for bars in ax.containers:
    ax.bar_label(bars)
plt.savefig('test8.png')
plt.show()

In [None]:
age_sales = df.groupby(['Labelled_Age'], as_index = False)['Orders'].sum().sort_values(by = 'Orders', ascending = True)

ax = sns.barplot(data = age_sales, x = 'Labelled_Age', y = 'Orders', palette='coolwarm')
for bars in ax.containers:
    ax.bar_label(bars)
plt.xticks(rotation = 45)
plt.show()

### Checking number of Males vs Females 

In [None]:
df['Gender'].value_counts()

In [None]:
ax = sns.countplot(data=df,x='Gender',palette='ch:s=.25,rot=-.25')
for bars in ax.containers:
    ax.bar_label(bars)
plt.savefig('test3.png')
plt.show()

In [None]:
gender_amount = df.groupby(['Gender'], as_index = False)['Amount'].sum().sort_values(by = 'Amount', ascending = True)

ax = sns.barplot(data=gender_amount,x='Gender',y='Amount',palette='viridis')
plt.savefig('test6.png')
plt.show()

### Checking number of orders

In [None]:
df['Orders'].value_counts()

In [None]:
ax = sns.countplot(data=df,x='Orders',palette='blend:#7AB,#EDA')
for bars in ax.containers:
    ax.bar_label(bars)
plt.savefig('test4.png')
plt.show()

### Checking percentage of customers who are married

In [None]:
df['Marital_Status'].value_counts(normalize = True)*100

In [None]:
data = df['Marital_Status'].value_counts(normalize = True)*100
keys = [0,1]

explode = [0, 0.1] 
palette_color = sns.color_palette("bright")  
plt.pie(data, labels=keys, colors=palette_color, explode=explode, autopct='%.0f%%') 
plt.show()

### Checking number of unique Occupation with values counts

In [None]:
df['Occupation'].value_counts()

In [None]:
ax = sns.countplot(data=df,x='Occupation',palette='blend:#7AB,#EDA')
plt.xticks(rotation=90)
for bars in ax.containers:
    ax.bar_label(bars)
plt.savefig('test5.png')
plt.show()

### Getting results of customers who works in IT Sector  

In [None]:
IT = df[df['Occupation'] == 'IT Sector']
IT

### Checking number of unique States with values counts

In [None]:
df['State'].value_counts()

In [None]:
State_sales = df.groupby(['State'], as_index=False)['Orders'].sum().sort_values(by=['Orders'], ascending=False).head(10)

ax = sns.barplot(data=State_sales,x='State',y='Orders',palette='blend:#7AB,#EDA')
plt.xticks(rotation=90)
for bars in ax.containers:
    ax.bar_label(bars)
plt.savefig('test6.png')
plt.show()

In [None]:
State_amount = df.groupby(['State'], as_index=False)['Amount'].sum().sort_values(by=['Amount'], ascending=True).head(10)

ax = sns.barplot(data=State_amount,x='State',y='Amount',palette='coolwarm')
plt.xticks(rotation=90)
plt.savefig('test9.png')
plt.show()

### Getting results of customers lives in Maharashtra

In [None]:
maharashtra = df[df['State'] == 'Maharashtra']
maharashtra

### Getting results of customers from Maharashtra and gender should be Male whose age is greater then 22 and are from IT Sector

In [None]:
mahmaleit = df[(df['State'] == 'Maharashtra')&(df['Gender'] == 'M')&(df['Occupation'] == 'IT Sector')&(df['Age'] >= 22)]
mahmaleit

In [None]:
Age_gender = pd.crosstab(df['Labelled_Age'],df['Gender'], margins=True, margins_name='Total')
Age_gender

In [None]:
State_age = pd.crosstab(df['Labelled_Age'],df['State'], margins=True, margins_name='Total')
State_age

In [None]:
df.info()

In [None]:
Sales_occupation = df.groupby(['Product_Category'], as_index=False)['Orders'].sum().sort_values(by='Orders', ascending=True)

ax = sns.barplot(data=Sales_occupation, x='Product_Category', y='Orders', palette='coolwarm' )
plt.xticks(rotation=90)
for bars in ax.containers:
    ax.bar_label(bars)
plt.savefig('test11.png')
plt.show()

In [None]:
.style.set_properties(**{"background-color": "#F3FFFF","color":"black","border": "2px  solid black"})