# Streaming
This notebook is the original work of Xavier López

## 0. Read Data

In [1]:
import pandas as pd

In [2]:
df = pd.read_excel("Data Analyst Test.xlsx")
df

Unnamed: 0,timestamp,xQcOW,NICKMERCS,summit1g
0,1612915218356,72396,,35144.0
1,1612915277899,67165,,34762.0
2,1612915337312,73907,,34504.0
3,1612915397655,76404,,34505.0
4,1612915457875,76404,,35144.0
...,...,...,...,...
95,1612920918530,75752,60153.0,22823.0
96,1612920978019,86198,24010.0,36330.0
97,1612921039478,70673,60837.0,29870.0
98,1612921098812,87831,20567.0,30565.0


## 1. Create output file

In [3]:
output = pd.DataFrame()

In [4]:
streamers = ["xQcOW","NICKMERCS","summit1g","TOTAL"]

In [5]:
output["streamers"] = streamers
streamers

['xQcOW', 'NICKMERCS', 'summit1g', 'TOTAL']

## 2. Compute Peak of Viewers

In [6]:
#The total of peak viewers are the maximum peak viewers of the list of streamers
peak_viewers = [max(df["xQcOW"]), max(df["NICKMERCS"].dropna()), max(df["summit1g"])]
peak_viewers.append(max(peak_viewers))
output["peak_viewers"] = peak_viewers
output

Unnamed: 0,streamers,peak_viewers
0,xQcOW,87831.0
1,NICKMERCS,69157.0
2,summit1g,39279.0
3,TOTAL,87831.0


## 3. Compute Hours Watched

In [7]:
#Hours watched will be the sum of viewers of timestamps (assuming each timestmap is a minute) divided by 60
hours_watched = []
for x in streamers[:-1]:
    hours_watched.append(df[["timestamp",x]].dropna()[x].sum()/60)
    
hours_watched

#The total of hours streamed will be the sum of all the streamers

hours_watched.append(sum(hours_watched))
hours_watched

output["hours_watched"] = hours_watched
output

Unnamed: 0,streamers,peak_viewers,hours_watched
0,xQcOW,87831.0,132740.533333
1,NICKMERCS,69157.0,66019.066667
2,summit1g,39279.0,51529.866667
3,TOTAL,87831.0,250289.466667


## 3. Compute Average viewers

In [8]:
#The average viewers will be the average of non null timestamps (if a streamer is not streaming we do not count 0 viewers)
avg_viewers = []
for x in streamers[:-1]:
    avg_viewers.append(df[[x]].dropna().mean()[0])
    
avg_viewers

#The total average will be the average of the average viewers of each streamer

avg_viewers.append(sum(avg_viewers)/len(avg_viewers))
avg_viewers

output["avg_viewers"] = avg_viewers
output

Unnamed: 0,streamers,peak_viewers,hours_watched,avg_viewers
0,xQcOW,87831.0,132740.533333,79644.32
1,NICKMERCS,69157.0,66019.066667,47724.626506
2,summit1g,39279.0,51529.866667,35134.0
3,TOTAL,87831.0,250289.466667,54167.648835


In [9]:
OUTPUT = output.T
OUTPUT

Unnamed: 0,0,1,2,3
streamers,xQcOW,NICKMERCS,summit1g,TOTAL
peak_viewers,87831,69157,39279,87831
hours_watched,132741,66019.1,51529.9,250289
avg_viewers,79644.3,47724.6,35134,54167.6


In [10]:
OUTPUT.to_csv("OUTPUT.csv", header = False)