In [1]:
#!/usr/bin/python
# coding: utf-8

import sys
import requests
from datetime import date, timedelta
import pandas as pd
from bs4 import BeautifulSoup
if sys.version_info[0] == 2:
    import urllib2 as ul # Python2
else:
    import urllib.request as ul # Python3
import json

In [2]:
def FacebookPageData(page_id, access_token):
    
    # construct the URL string
    base = 'https://graph.facebook.com/v2.8'
    node = '/' + page_id
    parameters = '/?access_token=%s&fields=name,talking_about_count,fan_count' % access_token
    url = base + node + parameters
    
    # retrieve data
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    
    print('Facebook page :', data['name'])
    return [int(data[metric]) for metric in ['fan_count', 'talking_about_count']]

def YoutubePageData(page_id, access_token):
    base = 'https://www.googleapis.com/youtube/v3/channels'
    parameters = '?part=statistics&id=' + page_id + '&key=' + access_token
    url = base + parameters
    
    # retrieve data
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    statistics = data['items'][0]['statistics']

    return [int(statistics[metric]) for metric in ['subscriberCount', 'viewCount', 'videoCount']]

def YoutubeVideosData(page_id, access_token):
    base = 'https://www.googleapis.com/youtube/v3/search'
    parameters = '?order=date&part=snippet&channelId=' + page_id + '&maxResults=10&key=' + access_token
    url = base + parameters
    
    # retrieve list of the most recently published videos on the channel
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    videoIds = [e['id']['videoId'] for e in data['items'] if 'videoId' in e['id']]
    
    base = 'https://www.googleapis.com/youtube/v3/videos'
    parameters = '?part=statistics&id=' + ','.join(videoIds) + '&key=' + access_token
    url = base + parameters
    
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    keys = data['items'][0]['statistics'].keys() # list of metrics
    n = len(data['items'])
    
    # Construction du dictionnaire des valeurs moyennes pour chaque clé sur les vidéos analysées
    videoStats = {key: int(round(sum([int(e['statistics'][key]) for e in data['items']]) / n)) for key in keys}
    
    print('Getting average metrics for the latest', n, 'videos of the channel')

    return [videoStats[metric] for metric in ['viewCount', 'likeCount', 'dislikeCount']]

In [3]:
# {Candidat : [Chaine Youtube, Compte Facebook, Compte Twitter]}
accounts = {'Alliot-Marie': [None, 'MAlliotMarie', 'MAlliotMarie'],
           'Arthaud': ['UCZsh-MrJftAOP_-ZgRgLScw', 'nathaliearthaud', 'n_arthaud'],
           'Bayrou': [None, 'bayrou', 'bayrou'],
           'Cheminade': ['UCCPw8MX-JcsiTzItY-qq1Fg', 'Jcheminade', 'Jcheminade'],
           'Dupont-Aignan': ['UCfA5DnCDX3Ixy5QOAMGtBlA', 'nicolasdupontaignan', 'dupontaignan'],
           'Fillon': ['UCp1R4BFJrKw34PfUc3GDLkw', 'FrancoisFillon', 'francoisfillon'],
           'Hamon': ['UCcMryUp6ME3BvP2alkS1dKg', 'hamonbenoit', 'benoithamon'],
           'Jadot': ['UCsUMhb2ygeTSS2mXLTIDHMQ', 'yannick.jadot', 'yjadot'],
           'Le Pen': ['UCU3z3px1_RCqYBwrs8LJVWg', 'MarineLePen', 'MLP_officiel'],
           'Macron': ['UCJw8np695wqWOaKVhFjkRyg', 'EnMarche', 'enmarchefr'],
           'Melenchon': ['UCk-_PEY3iC6DIGJKuoEe9bw', 'JLMelenchon', 'JLMelenchon'],
           'Poutou': [None, 'poutou.philippe', 'PhilippePoutou']}

app_id = "615202351999343"
app_secret = "ea787efd843d1de746817ec6e9bf7e94"
access_token = app_id + "|" + app_secret
google_key = 'AIzaSyBkRrj_kFDUv-T76CJaI3Pd-g3v7UY4GMA'

In [15]:
today = date.today()
path = 'data/' # save path
# path = '/var/www/html/duel/data/'

df = pd.DataFrame()
print('Maj du', today)

for candidate in accounts:
    print('-' * 20)
    print(candidate)
    print('-' * 20)

    stats = {}
    try: # Twitter : [tweets, following, followers]
        print('Analyzing Twitter account', accounts[candidate][2])
        soup = BeautifulSoup(requests.get('https://twitter.com/' + accounts[candidate][2] + '?lang=en').text, 'lxml')
        stats_tw = [int(tag.attrs['title'].replace(',', '').split(' ')[0])
                    for tag in soup.find_all(class_='ProfileNav-stat', limit=3) if 'title' in tag.attrs]
    except:
        stats_tw = ['-', '-']

    stats['1_tw_tweets'], _, stats['0_tw_followers'] = stats_tw

    if accounts[candidate][0] is not None:
        print('Scanning Youtube Channel')
        try: # Youtube [abonnés, total vues, nombre de vidéos]
            stats_yt = YoutubePageData(accounts[candidate][0], google_key)
        except:
            stats_yt = ['-', '-', '-']
        try: # Youtube [moyenne vues 5 vidéos, moyenne likes 5 vidéos, moyenne dislikes 5 vidéos]
            stats_yt2 = YoutubeVideosData(accounts[candidate][0], google_key)
        except:
            stats_yt2 = ['-', '-', '-']
    else:
        print('No Youtube Channel')
        stats_yt, stats_yt2 = [0, 0, 0], [0, 0, 0]

    stats['2_yt_subscribers'], stats['6_yt_views'], stats['7_yt_videos'] = stats_yt
    stats['3_yt_views_avg'], stats['4_yt_likes_avg'], stats['5_yt_dislikes_avg'] = stats_yt2

    try: # Facebook : [likes, people talking about this]
        stats_fb = FacebookPageData(accounts[candidate][1], access_token)
    except:
        stats_fb = ['-', '-']

    stats['8_fb_likes'], stats['9_fb_talking_about'] = stats_fb

    print()
    print(stats)
    
    # ajout de la ligne du candidat dans le dataframe
    rec = pd.DataFrame([stats.values()], columns=stats.keys(), index=[candidate])
    df = df.append(rec, verify_integrity=False)

# sauvegarde des données
df.sort_index(axis=0).sort_index(axis=1).to_json(path + str(today) + '.json', orient='split')

Maj du 2017-02-13
--------------------
Dupont-Aignan
--------------------
Analyzing Twitter account dupontaignan
Scanning Youtube Channel
Getting average metrics for the latest 9 videos of the channel
Facebook page : Nicolas Dupont-Aignan

Collected data
{'1_tw_tweets': 9204, '2_yt_subscribers': 3190, '3_yt_views_avg': 848, '8_fb_likes': 64364, '9_fb_talking_about': 8538, '6_yt_views': 1242335, '5_yt_dislikes_avg': 1, '0_tw_followers': 87484, '4_yt_likes_avg': 41, '7_yt_videos': 903}

--------------------
Fillon
--------------------
Analyzing Twitter account francoisfillon
Scanning Youtube Channel
Getting average metrics for the latest 10 videos of the channel
Facebook page : François Fillon

Collected data
{'1_tw_tweets': 15822, '2_yt_subscribers': 4011, '3_yt_views_avg': 8768, '8_fb_likes': 277757, '9_fb_talking_about': 136594, '6_yt_views': 922546, '5_yt_dislikes_avg': 232, '0_tw_followers': 422511, '4_yt_likes_avg': 221, '7_yt_videos': 270}

--------------------
Jadot
-------------

In [16]:
a = pd.read_json('data/' + str(today) + '.json', orient='split')
a

Unnamed: 0,0_tw_followers,1_tw_tweets,2_yt_subscribers,3_yt_views_avg,4_yt_likes_avg,5_yt_dislikes_avg,6_yt_views,7_yt_videos,8_fb_likes,9_fb_talking_about
Alliot-Marie,26317,1604,0,0,0,0,0,0,11872,507
Arthaud,1787,594,172,192,10,0,10832,58,1754,383
Bayrou,548504,3996,0,0,0,0,0,0,69791,11147
Cheminade,3842,6577,856,1424,17,1,165292,135,5335,259
Dupont-Aignan,87484,9204,3190,848,41,1,1242335,903,64364,8538
Fillon,422511,15822,4011,8768,221,232,922546,270,277757,136594
Hamon,325997,7103,4272,4600,118,48,257552,110,121136,113317
Jadot,27433,22016,681,302,11,0,8105,35,24316,9609
Le Pen,1271937,12434,11599,1632,135,11,1549237,102,1198513,222481
Macron,57741,928,7066,1075,43,35,589388,90,96727,22963
