# `Python Development internship at Digital Mozarts Tasks `

### Tasks :-
    1 Calculate total impressions for the age group 30-40
    2 Get all ad_ids for every campaign_id (a campaign contains multiple ads)
    3 Get total clicks where report_start between dates 19/08/2017 to 22/08/2017 (both inclusive).

### Note :-
impressions, ad_id, campaign_id, report_start, etc. are columns of the data

### 1) Importing Tools

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### 2) Preparing data

In [2]:
data = pd.read_csv("data.csv")

In [3]:
# Cheching data
data

Unnamed: 0,ad_id,reporting_start,reporting_end,campaign_id,fb_campaign_id,age,gender,interest1,interest2,interest3,impressions,clicks,spent,total_conversion,approved_conversion
0,708746,17/08/2017,17/08/2017,916,103916,30-34,M,15,17,17,7350.000000,1,1.43,2.0,1.0
1,708749,17/08/2017,17/08/2017,916,103917,30-34,M,16,19,21,17861.000000,2,1.82,2.0,0.0
2,708771,17/08/2017,17/08/2017,916,103920,30-34,M,20,25,22,693.000000,0,0.00,1.0,0.0
3,708815,30/08/2017,30/08/2017,916,103928,30-34,M,28,32,32,4259.000000,1,1.25,1.0,0.0
4,708818,17/08/2017,17/08/2017,916,103928,30-34,M,28,33,32,4133.000000,1,1.29,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,1314410,19/08/2017,19/08/2017,45-49,F,109,111,114,1129773,252,358.189997,13,2.00,,
1139,1314411,19/08/2017,19/08/2017,45-49,F,110,111,116,637549,120,173.880003,3,0.00,,
1140,1314412,19/08/2017,19/08/2017,45-49,F,111,113,117,151531,28,40.289999,2,0.00,,
1141,1314414,17/08/2017,17/08/2017,45-49,F,113,114,117,790253,135,198.710001,8,2.00,,


In [4]:
# Getting information from data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1143 entries, 0 to 1142
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ad_id                1143 non-null   int64  
 1   reporting_start      1143 non-null   object 
 2   reporting_end        1143 non-null   object 
 3   campaign_id          1143 non-null   object 
 4   fb_campaign_id       1143 non-null   object 
 5   age                  1143 non-null   object 
 6   gender               1143 non-null   object 
 7   interest1            1143 non-null   int64  
 8   interest2            1143 non-null   int64  
 9   interest3            1143 non-null   int64  
 10  impressions          1143 non-null   float64
 11  clicks               1143 non-null   int64  
 12  spent                1143 non-null   float64
 13  total_conversion     761 non-null    float64
 14  approved_conversion  761 non-null    float64
dtypes: float64(4), int64(5), object(6)
mem

In [5]:
# Checking that data contains null values
data.isnull().sum()

ad_id                    0
reporting_start          0
reporting_end            0
campaign_id              0
fb_campaign_id           0
age                      0
gender                   0
interest1                0
interest2                0
interest3                0
impressions              0
clicks                   0
spent                    0
total_conversion       382
approved_conversion    382
dtype: int64

### Task-1 
Calculate total impressions for the age group 30-40

In [6]:
# Firstly getting only data have age range 30-34 in data_age_30_34
data_age_30_34 = data[data["age"] == "30-34"]
data_age_30_34

Unnamed: 0,ad_id,reporting_start,reporting_end,campaign_id,fb_campaign_id,age,gender,interest1,interest2,interest3,impressions,clicks,spent,total_conversion,approved_conversion
0,708746,17/08/2017,17/08/2017,916,103916,30-34,M,15,17,17,7350.0,1,1.430000,2.0,1.0
1,708749,17/08/2017,17/08/2017,916,103917,30-34,M,16,19,21,17861.0,2,1.820000,2.0,0.0
2,708771,17/08/2017,17/08/2017,916,103920,30-34,M,20,25,22,693.0,0,0.000000,1.0,0.0
3,708815,30/08/2017,30/08/2017,916,103928,30-34,M,28,32,32,4259.0,1,1.250000,1.0,0.0
4,708818,17/08/2017,17/08/2017,916,103928,30-34,M,28,33,32,4133.0,1,1.290000,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
615,1121282,24/08/2017,24/08/2017,1178,144570,30-34,M,7,11,10,185823.0,25,38.549999,4.0,1.0
616,1121284,24/08/2017,24/08/2017,1178,144571,30-34,M,66,68,70,175631.0,23,40.760000,1.0,0.0
617,1121285,24/08/2017,24/08/2017,1178,144571,30-34,M,66,68,70,37187.0,4,6.370000,1.0,0.0
618,1121286,24/08/2017,24/08/2017,1178,144571,30-34,M,66,67,69,10991.0,0,0.000000,1.0,0.0


In [7]:
# Now from data_age_30_34, we get impressions column and after that we use sum() function to find total impressions
total_impressions = data_age_30_34["impressions"].sum()

In [8]:
print("Total impressions -",total_impressions)

Total impressions - 35678593.0


### Task-2
Get all ad_ids for every campaign_id (a campaign contains multiple ads)

In [9]:
dict = {}
arr = data["campaign_id"].unique()
for value in arr:
    ad_id = data[data["campaign_id"] == value]["ad_id"]
    dict[value] = ad_id.to_list()

`We create dictionary, namely dict, to store campagin_id as key and list of all ad_id for every campaign_id as value`


In [10]:
dict

{'916': [708746,
  708749,
  708771,
  708815,
  708818,
  708820,
  708889,
  708895,
  708953,
  708958,
  708979,
  709023,
  709038,
  709040,
  709059,
  709105,
  709115,
  709124,
  709179,
  709183,
  709320,
  709323,
  709326,
  709327,
  709328,
  709455,
  709544,
  709614,
  709756,
  709761,
  709899,
  709901,
  710045,
  710088,
  710360,
  710477,
  710480,
  710571,
  710617,
  710623,
  710628,
  710682,
  710763,
  710836,
  710867,
  710880,
  710961,
  710968,
  711217,
  711623,
  711764,
  711785,
  711877,
  712052],
 '936': [734209,
  734210,
  734215,
  734243,
  734266,
  734272,
  734290,
  734313,
  734314,
  734352,
  734361,
  734381,
  734399,
  734418,
  734421,
  734427,
  734433,
  734582,
  734605,
  734660,
  734666,
  734726,
  734737,
  734785,
  734794,
  734796,
  734800,
  734803,
  734852,
  734854,
  734856,
  734866,
  734881,
  734901,
  734903,
  734925,
  734939,
  734968,
  734999,
  735014,
  735032,
  735033,
  735043,
  735048,
  735

### Task - 3
Get total clicks where report_start between dates 19/08/2017 to 22/08/2017 (both inclusive).

In [11]:
data.sort_values(by="reporting_start",inplace=True,ignore_index=True)

In [12]:
data

Unnamed: 0,ad_id,reporting_start,reporting_end,campaign_id,fb_campaign_id,age,gender,interest1,interest2,interest3,impressions,clicks,spent,total_conversion,approved_conversion
0,708746,17/08/2017,17/08/2017,916,103916,30-34,M,15,17,17,7350.000000,1,1.43,2.0,1.0
1,1122089,17/08/2017,17/08/2017,40-44,F,23,25,29,195220,51,78.060000,1,0.00,,
2,1122056,17/08/2017,17/08/2017,40-44,F,16,19,21,925555,182,262.889998,4,2.00,,
3,1122052,17/08/2017,17/08/2017,40-44,F,16,20,19,265038,51,78.459999,2,1.00,,
4,1122047,17/08/2017,17/08/2017,40-44,F,15,18,21,582725,142,194.809999,9,2.00,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,778689,30/08/2017,30/08/2017,936,115892,30-34,M,28,32,32,7453.000000,1,1.68,1.0,1.0
1139,709059,30/08/2017,30/08/2017,916,103968,30-34,M,20,26,23,14669.000000,7,10.28,1.0,1.0
1140,709105,30/08/2017,30/08/2017,916,103976,30-34,M,28,30,32,1241.000000,0,0.00,1.0,1.0
1141,1121365,30/08/2017,30/08/2017,1178,144584,35-39,M,27,33,31,110503.000000,25,32.68,4.0,0.0


In [13]:
reporting_start = data[data["reporting_start"]>="19/08/2017"]
reporting_start = reporting_start[reporting_start["reporting_start"]<="22/08/2017"]
reporting_start

Unnamed: 0,ad_id,reporting_start,reporting_end,campaign_id,fb_campaign_id,age,gender,interest1,interest2,interest3,impressions,clicks,spent,total_conversion,approved_conversion
163,1121499,19/08/2017,19/08/2017,1178,144607,40-44,M,25,27,31,264222.00,63,87.790000,1.0,1.0
164,735140,19/08/2017,19/08/2017,936,108809,40-44,F,26,27,31,6907.00,2,2.350000,1.0,0.0
165,1121628,19/08/2017,19/08/2017,45-49,M,21,25,22,59838,7,11.11,1,0.000000,,
166,735065,19/08/2017,19/08/2017,936,108797,40-44,F,7,10,12,648.00,0,0.000000,1.0,0.0
167,735143,19/08/2017,19/08/2017,936,108810,40-44,F,27,28,30,39035.00,13,19.330000,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
554,776553,22/08/2017,22/08/2017,936,115523,30-34,F,16,21,17,66765.00,8,11.050000,1.0,0.0
555,776469,22/08/2017,22/08/2017,936,115509,45-49,F,28,33,29,45397.00,15,25.419999,1.0,1.0
556,776473,22/08/2017,22/08/2017,936,115510,30-34,M,16,22,22,23086.00,2,3.310000,1.0,1.0
557,776552,22/08/2017,22/08/2017,936,115523,30-34,F,16,17,18,21596.00,2,2.810000,1.0,0.0


In [14]:
total_click = reporting_start["clicks"].sum()
print("Total Number of clicks -",total_click)

Total Number of clicks - 3763


# `!!!END!!!`