# Facebook Data

**Save Your Facebook File In Data and Set the File Name Below**

In [123]:
import pandas as pd
import json
import os
import plotly.express as px
import plotly.graph_objects as go
import csv

pd.options.display.max_rows = None

FACEBOOK_FOLDER='facebook'


## Off Facebook Data

PreReq: Convert To CSV using scripts/off_facebook_to_csv.py


### Transform Data into CSV and save in same folder

In [124]:
off_facebook_file = 'data/{}/ads_and_businesses/your_off-facebook_activity'.format(FACEBOOK_FOLDER)

f = open(off_facebook_file + '.json')
raw_json = json.load(f)
places = (raw_json['off_facebook_activity'])
f.close()


with open(off_facebook_file + '.csv','w') as file:
  writer = csv.writer(file)
  writer.writerow(['name','id','type','date'])
  for place in places:
    for event in place['events']:
      writer.writerow([place['name'],event['id'],event['type'],event['timestamp']])
if os.path.exists(off_facebook_file + '.csv'):
    print('Done')

Done


### Bar Graph of Top 100 Websites

In [125]:

off_facebook_file_csv = 'data/{}/ads_and_businesses/your_off-facebook_activity.csv'.format(FACEBOOK_FOLDER)

off_facebook_df = pd.read_csv(off_facebook_file_csv)

counts_s = off_facebook_df['name'].value_counts()
counts_df = pd.DataFrame({'name':counts_s.index, 'count':counts_s.values})[0:100]

fig = px.bar(counts_df,x='name',y='count',color='name',width=1400)
fig.update_layout(showlegend=False)
fig.update_layout(xaxis={'title':''})
fig.update_layout(yaxis={'title':''})
fig.show()







### Timeline of 15 most Frequently Recorded Visits

In [140]:

fig = go.Figure()
top_df = off_facebook_df[off_facebook_df['name'].isin(counts_df[0:15]['name'])]
top_df.loc[:,('date')] = pd.to_datetime(top_df['date'],unit='s')
for x in top_df.name.unique():
    t_df = top_df.loc[top_df['name'] == x]
    fig.add_trace(go.Scatter(x=t_df['date'],y=t_df['name'],mode='markers',marker_symbol='star-triangle-up'))
fig.update_layout(showlegend=False)
fig.show()


## Messages Data

Step 1: Flatten to CSV

In [127]:

messages_location = 'data/{}/messages/inbox'.format(FACEBOOK_FOLDER)
messages_list = []
for root,dirs,files in os.walk(messages_location):
  for name in dirs:
    person_path = os.path.join(root,name,'message_1.json')
    try:
        f = open(person_path)
        raw_json = json.load(f)
        if len(raw_json['messages']) > 3:
            messages_list.append({"title":raw_json['title'],"count":len(raw_json['messages'])})
        f.close()
    except FileNotFoundError:
        pass
messages_df = pd.DataFrame(messages_list)


fig = px.bar(messages_df,x='title',y='count',color='count',width=1400,height=800)
fig.update_layout(xaxis={'title':''})
fig.update_layout(yaxis={'title':''})
fig.show()


### Complete Timeline of Messages For Top 20

In [141]:
fig = go.Figure()
top_df = messages_df.sort_values(by=['count'], ascending=False)[0:20]
whole_ass_df = pd.read_csv('data/{}/messages/messages_flat.csv'.format(FACEBOOK_FOLDER))
keys = list(top_df.title.values)
temp_ass_df = whole_ass_df[whole_ass_df['title'].isin(keys)]

temp_ass_df['timestamp_ms'] = pd.to_datetime(temp_ass_df['timestamp_ms'],unit='ms')
for x in temp_ass_df.title.unique():
    t_df = temp_ass_df.loc[temp_ass_df['title'] == x]
    fig.add_trace(go.Scatter(x=t_df['timestamp_ms'],y=t_df['title'],text=t_df['content'],mode='markers',marker_symbol='star-triangle-up'))
fig.update_layout(showlegend=False,height=700,width=900)

fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)
fig.show()

## Likes and Reactions

Step 1: Flatten to CSV

In [142]:
likes_and_comments_location = 'data/{}/likes_and_reactions/posts_and_comments'.format(FACEBOOK_FOLDER)

with open(likes_and_comments_location + '.csv','w') as file:
  writer = csv.writer(file)
  writer.writerow(['actor','recipient','timestamp','reaction'])    
  try:
    f = open(likes_and_comments_location + '.json')
    raw_json = json.load(f)
  
    reactions = (raw_json['reactions'])
    for reaction in reactions:
      try:
        timestamp = reaction['timestamp']
        data = reaction['data'][0]
        actor = data['reaction']['actor']
        action = data['reaction']['reaction']
        title = reaction['title']

        if len(title.split('likes')) > 1:
          recipient = title.split('likes')
          recipient = recipient[1].split('\'s')[0]
          writer.writerow([actor,recipient,action,timestamp])
        elif len(title.split('liked')) > 1:
          recipient = title.split('liked')
          recipient = recipient[1].split('\'s')[0]
          writer.writerow([actor,recipient,action,timestamp])
        elif len(title.split('reacted to')) > 1:
          recipient = title.split('reacted to')[1]
          recipient = recipient.split('\'s')[0]
          writer.writerow([actor,recipient,action,timestamp])
        else:
          print(title)
      except KeyError as e:
        pass
    f.close()
  except FileNotFoundError:
    f.close()
    print("File Not Found")



### Your most frequent Interactions

In [145]:
posts_and_comments_csv = 'data/{}/likes_and_reactions/posts_and_comments.csv'.format(FACEBOOK_FOLDER)
p_and_c_df = pd.read_csv(posts_and_comments_csv)

counts_s = p_and_c_df['recipient'].value_counts()
counts_df = pd.DataFrame({'recipient':counts_s.index, 'count':counts_s.values})[0:100]

fig = px.bar(counts_df,x='recipient',y='count',color='count',width=1400,height=800)
fig.update_layout(xaxis={'title':''})
fig.update_layout(yaxis={'title':''})
fig.show()