In [2]:
import pandas as pd
import plotly.express as px #used for creating interactive and visually appealing plots and charts 
import plotly.graph_objects as go #provides a lower-level interface for creating more customized and complex plots
import plotly.io as pio #provides functions for working with plot settings and templates
pio.templates.default = "plotly_white"

In [3]:
data = pd.read_csv("Instagram data.csv", encoding='latin-1') 
# encoding='latin-1': This part specifies the character encoding to use when reading the CSV file.

In [5]:
#Check the first 5 rows
data.head(5)

Unnamed: 0,Impressions,From Home,From Hashtags,From Explore,From Other,Saves,Comments,Shares,Likes,Profile Visits,Follows,Caption,Hashtags
0,3920,2586,1028,619,56,98,9,5,162,35,2,Here are some of the most important data visua...,#finance #money #business #investing #investme...
1,5394,2727,1838,1174,78,194,7,14,224,48,10,Here are some of the best data science project...,#healthcare #health #covid #data #datascience ...
2,4021,2085,1188,0,533,41,11,1,131,62,12,Learn how to train a machine learning model an...,#data #datascience #dataanalysis #dataanalytic...
3,4528,2700,621,932,73,172,10,7,213,23,8,Heres how you can write a Python program to d...,#python #pythonprogramming #pythonprojects #py...
4,2518,1704,255,279,37,96,5,4,123,8,0,Plotting annotations while visualizing your da...,#datavisualization #datascience #data #dataana...


In [6]:
data.columns

Index(['Impressions', 'From Home', 'From Hashtags', 'From Explore',
       'From Other', 'Saves', 'Comments', 'Shares', 'Likes', 'Profile Visits',
       'Follows', 'Caption', 'Hashtags'],
      dtype='object')

In [7]:
#Check the columns info
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119 entries, 0 to 118
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Impressions     119 non-null    int64 
 1   From Home       119 non-null    int64 
 2   From Hashtags   119 non-null    int64 
 3   From Explore    119 non-null    int64 
 4   From Other      119 non-null    int64 
 5   Saves           119 non-null    int64 
 6   Comments        119 non-null    int64 
 7   Shares          119 non-null    int64 
 8   Likes           119 non-null    int64 
 9   Profile Visits  119 non-null    int64 
 10  Follows         119 non-null    int64 
 11  Caption         119 non-null    object
 12  Hashtags        119 non-null    object
dtypes: int64(11), object(2)
memory usage: 12.2+ KB


In [8]:
# Check the descriptive statistics of the data
data.describe()

Unnamed: 0,Impressions,From Home,From Hashtags,From Explore,From Other,Saves,Comments,Shares,Likes,Profile Visits,Follows
count,119.0,119.0,119.0,119.0,119.0,119.0,119.0,119.0,119.0,119.0,119.0
mean,5703.991597,2475.789916,1887.512605,1078.10084,171.092437,153.310924,6.663866,9.361345,173.781513,50.621849,20.756303
std,4843.780105,1489.386348,1884.361443,2613.026132,289.431031,156.317731,3.544576,10.089205,82.378947,87.088402,40.92158
min,1941.0,1133.0,116.0,0.0,9.0,22.0,0.0,0.0,72.0,4.0,0.0
25%,3467.0,1945.0,726.0,157.5,38.0,65.0,4.0,3.0,121.5,15.0,4.0
50%,4289.0,2207.0,1278.0,326.0,74.0,109.0,6.0,6.0,151.0,23.0,8.0
75%,6138.0,2602.5,2363.5,689.5,196.0,169.0,8.0,13.5,204.0,42.0,18.0
max,36919.0,13473.0,11817.0,17414.0,2547.0,1095.0,19.0,75.0,549.0,611.0,260.0


In [10]:
#Check any missing values
data.isnull().sum()

Impressions       0
From Home         0
From Hashtags     0
From Explore      0
From Other        0
Saves             0
Comments          0
Shares            0
Likes             0
Profile Visits    0
Follows           0
Caption           0
Hashtags          0
dtype: int64

In [14]:
#Take  a look at the distribution of the Impressions
fig = px.histogram(data, x='Impressions', nbins=10, title='Distribution of Impressions' )

#fig: This variable is used to store the figure or plot that you're creating using Plotly Express.
#px.histogram: This is a function from Plotly Express specifically designed for creating histogram plots_
#x='Impressions': This parameter specifies the variable (column) from your DataFrame
#nbins=10: This parameter sets the number of bins or bars in the histogram
#title='Distribution of Impressions': This parameter sets the title of the histogram plot.

fig.show()

In [15]:
#Check number of impressions on each post over time
fig = px.line(data, x = data.index, y = 'Impressions', title= 'Impression Over Time')

fig.show()

In [16]:
data.head(5)

Unnamed: 0,Impressions,From Home,From Hashtags,From Explore,From Other,Saves,Comments,Shares,Likes,Profile Visits,Follows,Caption,Hashtags
0,3920,2586,1028,619,56,98,9,5,162,35,2,Here are some of the most important data visua...,#finance #money #business #investing #investme...
1,5394,2727,1838,1174,78,194,7,14,224,48,10,Here are some of the best data science project...,#healthcare #health #covid #data #datascience ...
2,4021,2085,1188,0,533,41,11,1,131,62,12,Learn how to train a machine learning model an...,#data #datascience #dataanalysis #dataanalytic...
3,4528,2700,621,932,73,172,10,7,213,23,8,Heres how you can write a Python program to d...,#python #pythonprogramming #pythonprojects #py...
4,2518,1704,255,279,37,96,5,4,123,8,0,Plotting annotations while visualizing your da...,#datavisualization #datascience #data #dataana...


In [21]:
print(data["Likes"].max())
print(data["Likes"].min())
print(data["Likes"].mean())

549
72
173.78151260504202


In [25]:
#Look at all the metrics like Likes, Saves, and Follows from each post over time

fig = go.Figure() #This line initializes a new empty figure object called fig.

fig.add_trace(go.Scatter(x=data.index, y=data['Likes'], name='Likes')) # This line adds a scatter trace to the figure.
#go.Scatter: This creates a scatter plot trace.
#The x-axis data is taken from the index of your DataFrame 
#y=data['Likes']: The y-axis data is taken from the 'Likes' column of your DataFrame (data['Likes']).
#name='Likes': This sets the name or label for this trace, which will be used in the legend.

fig.add_trace(go.Scatter(x=data.index, y=data['Saves'], name='Saves'))
fig.add_trace(go.Scatter(x=data.index, y=data['Follows'], name='Follows'))

fig.update_layout(title='Metrics Over Time',                  # Sets the title of the plot to 'Metrics Over Time'.
                  xaxis_title ='Date', yaxis_title='Count')

fig.show()


In [26]:
#look at the distribution of reach from different sources
reach_sources = ['From Home','From Hashtags','From Explore', 'From Other']
reach_count = [data[source].sum() for source in reach_sources] #This line calculates the total reach count for each source specified in the reach_sources

colors = ['#FFB6C1', '#87CEFA', '#90EE90', '#FFDAB9']

fig = px.pie(data_frame=data, names=reach_sources, values=reach_count,
             title='Reach From different Sources', color_discrete_sequence=colors)

fig.show()

In [41]:
import random

#have a look at the distribution of engagement sources
engagement_metrics = ['Saves','Comments','Shares','Likes']
engagement_count = [data[metric].sum() for metric in engagement_metrics]

# Generate random color codes for each data source
def generate_random_color():
    return "#{:02x}{:02x}{:02x}".format(random.randint(0,100), random.randint(0, 219), random.randint(0, 255))

colors = [generate_random_color() for _ in reach_sources]

fig = px.pie(data_frame=data, names=engagement_metrics, values=engagement_count, title='Engagement Sources', color_discrete_sequence=colors)

fig.show()