In [29]:
#!/usr/bin/python
# coding: utf-8

import sys
import requests
from datetime import date, datetime, timedelta
import pandas as pd
from bs4 import BeautifulSoup
if sys.version_info[0] == 2:
    import urllib2 as ul # Python2
else:
    import urllib.request as ul # Python3
import json


def FacebookPageData(page_id, access_token):
    
    # construct the URL string
    base = 'https://graph.facebook.com/v2.8'
    node = '/' + page_id
    parameters = '/?access_token=%s&fields=name,talking_about_count,fan_count' % access_token
    url = base + node + parameters
    
    # retrieve data
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    
    print('Facebook page :', data['name'])
    return [int(data[metric]) for metric in ['fan_count', 'talking_about_count']]

def YoutubePageData(page_id, access_token):
    base = 'https://www.googleapis.com/youtube/v3/channels'
    parameters = '?part=statistics&id=' + page_id + '&key=' + access_token
    url = base + parameters
    
    # retrieve data
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    statistics = data['items'][0]['statistics']

    return [int(statistics[metric]) for metric in ['subscriberCount', 'viewCount', 'videoCount']]

def YoutubeVideosData(page_id, access_token):
    base = 'https://www.googleapis.com/youtube/v3/search'
    parameters = '?order=date&part=snippet&channelId=' + page_id + '&maxResults=10&key=' + access_token
    url = base + parameters
    
    # retrieve list of the most recently published videos on the channel (10 or less)
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    videoIds = [e['id']['videoId'] for e in data['items'] if 'videoId' in e['id']]
    
    base = 'https://www.googleapis.com/youtube/v3/videos'
    parameters = '?part=statistics&id=' + ','.join(videoIds) + '&key=' + access_token
    url = base + parameters
    
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))

    df_stats = pd.DataFrame([item['statistics'] for item in data['items']]).fillna(value=0).astype(int)
    n = len(df_stats.index)
    print('Getting average metrics for the latest', n, 'videos of the channel')

    return [df_stats[metric].mean() for metric in ['viewCount', 'likeCount', 'dislikeCount']]

def get_metrics():
    # {Candidat : [Chaine Youtube, Compte Facebook, Compte Twitter]}
    accounts = {#'Alliot-Marie': [None, 'MAlliotMarie', 'MAlliotMarie'],
               #'Arthaud': ['UCZsh-MrJftAOP_-ZgRgLScw', 'nathaliearthaud', 'n_arthaud'],
               #'Bayrou': [None, 'bayrou', 'bayrou'],
               #'Cheminade': ['UCCPw8MX-JcsiTzItY-qq1Fg', 'Jcheminade', 'Jcheminade'],
               #'Dupont-Aignan': ['UCfA5DnCDX3Ixy5QOAMGtBlA', 'nicolasdupontaignan', 'dupontaignan'],
               'Fillon': ['UCp1R4BFJrKw34PfUc3GDLkw', 'FrancoisFillon', 'francoisfillon'],
               'Hamon': ['UCcMryUp6ME3BvP2alkS1dKg', 'hamonbenoit', 'benoithamon'],
               #'Jadot': ['UCsUMhb2ygeTSS2mXLTIDHMQ', 'yannick.jadot', 'yjadot'],
               'Le Pen': ['UCU3z3px1_RCqYBwrs8LJVWg', 'MarineLePen', 'MLP_officiel'],
               'Macron': ['UCJw8np695wqWOaKVhFjkRyg', 'EmmanuelMacron', 'emmanuelmacron'],
               'Melenchon': ['UCk-_PEY3iC6DIGJKuoEe9bw', 'JLMelenchon', 'JLMelenchon'],
               #'Poutou': [None, 'poutou.philippe', 'PhilippePoutou']
    }

    app_id = "615202351999343"
    app_secret = "ea787efd843d1de746817ec6e9bf7e94"
    access_token = app_id + "|" + app_secret
    google_key = 'AIzaSyBkRrj_kFDUv-T76CJaI3Pd-g3v7UY4GMA'

    df = pd.DataFrame()
    for candidate in accounts:
        print('-' * 20)

        stats = {}
        try: # Twitter : [_, tweets, followers]
            print('Analyzing Twitter account', accounts[candidate][2])
            soup = BeautifulSoup(requests.get('https://twitter.com/' + accounts[candidate][2] + '?lang=en').text, 'lxml')
            stats_tw = [int(tag.attrs['title'].replace(',', '').split(' ')[0])
                        for tag in soup.find_all(class_='ProfileNav-stat', limit=3) if 'title' in tag.attrs]
        except:
            stats_tw = ['-', '-', '-']
            print('Profil Twitter : une erreur est survenue...')

        _, _, stats['0_tw_followers'] = stats_tw

        if accounts[candidate][0] is not None:
            print('Scanning Youtube Channel')
            try: # Youtube [abonnés, total vues, nombre de vidéos]
                stats_yt = YoutubePageData(accounts[candidate][0], google_key)
            except:
                stats_yt = ['-', '-', '-']
                print('Page Youtube : une erreur est survenue...')
            #try: # Youtube [total vues, compte de likes, compte de dislikes]
            stats_yt2 = YoutubeVideosData(accounts[candidate][0], google_key)
            #except:
            # stats_yt2 = ['-', '-', '-']
            # print('Vidéos Youtube : une erreur est survenue...')
        else:
            print('No Youtube Channel')
            stats_yt, stats_yt2 = ['-', '-', '-'], ['-', '-', '-']

        stats['2_yt_subscribers'], stats['3_yt_views_count'], stats['x_yt_videos_count'] = stats_yt
        _, stats['x_yt_like_count'], stats['x_yt_dislike_count'] = stats_yt2

        try:
            stats['4_yt_reaction_rate'] = round((float(stats_yt2[1] + stats_yt2[2]) / stats_yt2[0]) * 100, 1)
            stats['5_yt_satisfaction_rate'] = round((float(stats_yt2[1]) / (stats_yt2[2] + stats_yt2[1])) * 100, 1)
        except:
            stats['4_yt_reaction_rate'] = '-'
            stats['5_yt_satisfaction_rate'] = '-'

        try: # Facebook : [likes, people talking about this]
            stats_fb = FacebookPageData(accounts[candidate][1], access_token)
        except:
            stats_fb = ['-', '-']
            print('Page Facebook : une erreur est survenue...')

        stats['6_fb_likes'], stats['7_fb_talking_about'] = stats_fb

        print()
        print(stats)

        # ajout de la ligne du candidat dans le dataframe
        rec = pd.DataFrame([stats.values()], columns=stats.keys(), index=[candidate])
        df = df.append(rec, verify_integrity=False)

    df.sort_index(axis=0, inplace=True)
    df.fillna(value='-', inplace=True)

    return df

def save_metrics(df, timestamp): # sauvegarde des colonnes du dataframe dans les différents .json
    path = 'data/' # save path
    #path = '/var/www/html/metrics/data/'

    for metric in df:
        try:
            current_df = pd.read_json(path + metric + '.json', orient='split')

            if timestamp in current_df:
                current_df[timestamp] = df[metric]
            else:
                current_df = pd.concat([current_df, df[metric]], axis=1)
                current_df.rename(columns={metric:timestamp}, inplace=True)
            
        except ValueError: # si le fichier n'existe pas
            current_df = pd.DataFrame(df[metric], columns=[metric], index=[df.index])
            current_df.rename(columns={metric:timestamp}, inplace=True)
        
        current_df.to_json(path + metric + '.json', orient='split')
        print('Data saved as ' + path + metric + '.json')
        
    return

In [30]:
app_id = "615202351999343"
app_secret = "ea787efd843d1de746817ec6e9bf7e94"
access_token = app_id + "|" + app_secret
google_key = 'AIzaSyBkRrj_kFDUv-T76CJaI3Pd-g3v7UY4GMA'

YoutubeVideosData('UCJw8np695wqWOaKVhFjkRyg', google_key)

Getting average metrics for the latest 10 videos of the channel


[5934.3, 187.9, 194.3]

In [37]:
pd.read_json('data/0_tw_followers.json', orient='split')

Unnamed: 0,2017-02-15 00:00:00,2017-02-16 00:00:00,2017-02-17 00:00:00,2017-02-18 00:00:00,2017-02-19 00:00:00,2017-02-20 00:00:00,2017-02-21 00:00:00,2017-02-22 00:00:00,2017-02-23 00:00:00,2017-02-24 00:00:00,...,2017-03-07 00:00:00,2017-03-08 00:00:00,2017-03-09 00:00:00,2017-03-10 00:00:00,2017-03-11 00:00:00,2017-03-12 00:00:00,2017-03-13 00:00:00,2017-03-14 00:00:00,2017-03-15 00:00:00,2017-03-16 00:00:00
Fillon,424011,425149,426014,426789,427675,428638,429347,430143,430835,431585,...,448673,449698,450722,451641,452544,453580,454327,455009,455953,456524
Hamon,327730,328988,329775,330478,331323,331974,332387,332865,333374,333899,...,340218,340815,341421,342218,342576,342768,342946,343235,343684,344026
Le Pen,1274053,1275497,1276784,1278058,1279544,1281410,1284466,1287438,1289158,1290691,...,1313884,1315306,1316938,1318486,1319819,1321599,1323344,1324596,1326549,1328247
Macron,496473,499164,501489,503556,505678,508232,510271,513332,515402,517712,...,544300,546901,549529,551699,553740,556027,558547,560401,562852,564988
Melenchon,965153,966236,967289,968269,969236,970551,971523,972501,973566,974895,...,986502,987424,988388,989204,990033,991081,991975,992721,994265,995636


In [36]:
#__________________________
today = (datetime.utcnow() + timedelta(hours=1)).date()
print('Maj du', today)

save_metrics(get_metrics(), today)

Maj du 2017-03-16
--------------------
Analyzing Twitter account francoisfillon
Scanning Youtube Channel
Getting average metrics for the latest 10 videos of the channel
Facebook page : François Fillon

{'3_yt_views_count': 1101296, '7_fb_talking_about': 61460, '4_yt_reaction_rate': 3.3, 'x_yt_videos_count': 270, '6_fb_likes': 307408, '2_yt_subscribers': 4626, 'x_yt_like_count': 164.2, 'x_yt_dislike_count': 43.6, '0_tw_followers': 456524, '5_yt_satisfaction_rate': 79.0}
--------------------
Analyzing Twitter account MLP_officiel
Scanning Youtube Channel
Getting average metrics for the latest 10 videos of the channel
Facebook page : Marine Le Pen

{'3_yt_views_count': 2311994, '7_fb_talking_about': 142019, '4_yt_reaction_rate': 6.4, 'x_yt_videos_count': 166, '6_fb_likes': 1252148, '2_yt_subscribers': 15946, 'x_yt_like_count': 279.4, 'x_yt_dislike_count': 18.2, '0_tw_followers': 1328247, '5_yt_satisfaction_rate': 93.9}
--------------------
Analyzing Twitter account emmanuelmacron
Scanning