In [8]:
#!/usr/bin/python
# coding: utf-8

import sys
import requests
from datetime import date, timedelta
import pandas as pd
from bs4 import BeautifulSoup
if sys.version_info[0] == 2:
    import urllib2 as ul # Python2
else:
    import urllib.request as ul # Python3
import json

In [2]:
def FacebookPageData(page_id, access_token):
    
    # construct the URL string
    base = 'https://graph.facebook.com/v2.8'
    node = '/' + page_id
    parameters = '/?access_token=%s&fields=name,talking_about_count,fan_count' % access_token
    url = base + node + parameters
    
    # retrieve data
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    
    print('Facebook page :', data['name'])
    return [int(data[metric]) for metric in ['fan_count', 'talking_about_count']]

def YoutubePageData(page_id, access_token):
    base = 'https://www.googleapis.com/youtube/v3/channels'
    parameters = '?part=statistics&id=' + page_id + '&key=' + access_token
    url = base + parameters
    
    # retrieve data
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    statistics = data['items'][0]['statistics']

    return [int(statistics[metric]) for metric in ['subscriberCount', 'viewCount', 'videoCount']]

def YoutubeVideosData(page_id, access_token):
    base = 'https://www.googleapis.com/youtube/v3/search'
    parameters = '?order=date&part=snippet&channelId=' + page_id + '&maxResults=10&key=' + access_token
    url = base + parameters
    
    # retrieve list of the most recently published videos on the channel
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    videoIds = [e['id']['videoId'] for e in data['items'] if 'videoId' in e['id']]
    
    base = 'https://www.googleapis.com/youtube/v3/videos'
    parameters = '?part=statistics&id=' + ','.join(videoIds) + '&key=' + access_token
    url = base + parameters
    
    response = ul.urlopen(url)
    data = json.loads(response.read().decode('utf-8'))
    keys = data['items'][0]['statistics'].keys() # list of metrics
    n = len(data['items'])
    
    # Construction du dictionnaire des valeurs moyennes pour chaque clé sur les vidéos analysées
    videoStats = {key: int(round(sum([int(e['statistics'][key]) for e in data['items']]) / n)) for key in keys}
    
    print('Getting average metrics for the latest', n, 'videos of the channel')

    return [videoStats[metric] for metric in ['viewCount', 'likeCount', 'dislikeCount']]

In [4]:
# Youtube, Facebook, Twitter, fichier .json de sauvegarde
accounts = {'Alliot-Marie': [None, 'MAlliotMarie', 'MAlliotMarie', 'MAM'],
           'Arthaud': ['UCZsh-MrJftAOP_-ZgRgLScw', 'nathaliearthaud', 'n_arthaud', 'NAR'],
           'Bayrou': [None, 'bayrou', 'bayrou', 'FBA'],
           'Cheminade': ['UCCPw8MX-JcsiTzItY-qq1Fg', 'Jcheminade', 'Jcheminade', 'JCH'],
           'Dupont-Aignan': ['UCfA5DnCDX3Ixy5QOAMGtBlA', 'nicolasdupontaignan', 'dupontaignan', 'NDA'],
           'Fillon': ['UCp1R4BFJrKw34PfUc3GDLkw', 'FrancoisFillon', 'francoisfillon', 'FFI'],
           'Hamon': ['UCcMryUp6ME3BvP2alkS1dKg', 'hamonbenoit', 'benoithamon', 'BHA'],
           'Jadot': ['UCsUMhb2ygeTSS2mXLTIDHMQ', 'yannick.jadot', 'yjadot', 'YJA'],
           'Le Pen': ['UCU3z3px1_RCqYBwrs8LJVWg', 'MarineLePen', 'MLP_officiel', 'MLP'],
           'Macron': ['UCJw8np695wqWOaKVhFjkRyg', 'EnMarche', 'enmarchefr', 'EMA'],
           'Melenchon': ['UCk-_PEY3iC6DIGJKuoEe9bw', 'JLMelenchon', 'JLMelenchon', 'JLM'],
           'Poutou': [None, 'poutou.philippe', 'PhilippePoutou', 'PPO']}

app_id = "615202351999343"
app_secret = "ea787efd843d1de746817ec6e9bf7e94"
access_token = app_id + "|" + app_secret
google_key = 'AIzaSyBkRrj_kFDUv-T76CJaI3Pd-g3v7UY4GMA'

In [13]:
today = date.today()
print(today)

path = 'data/' # save path
# path = '/var/www/html/duel/data/'

df = pd.DataFrame()
print('Maj du', today)

for candidate in accounts:
    print('-' * 20)
    print(candidate)
    print('-' * 20)

    stats = {}
    try: # Twitter : [tweets, following, followers]
        print('Analyzing Twitter account', accounts[candidate][2])
        soup = BeautifulSoup(requests.get('https://twitter.com/' + accounts[candidate][2] + '?lang=en').text, 'lxml')
        stats_tw = [int(tag.attrs['title'].replace(',', '').split(' ')[0])
                    for tag in soup.find_all(class_='ProfileNav-stat', limit=3) if 'title' in tag.attrs]
    except:
        stats_tw = ['-', '-', '-']

    stats['tw_tweets'], stats['tw_following'], stats['tw_followers'] = stats_tw

    if accounts[candidate][0] is not None:
        print('Scanning Youtube Channel')
        try: # Youtube [abonnés, total vues, nombre de vidéos]
            stats_yt = YoutubePageData(accounts[candidate][0], google_key)
        except:
            stats_yt = ['-', '-', '-']
        try: # Youtube [moyenne vues 5 vidéos, moyenne likes 5 vidéos, moyenne dislikes 5 vidéos]
            stats_yt2 = YoutubeVideosData(accounts[candidate][0], google_key)
        except:
            stats_yt2 = ['-', '-', '-']
    else:
        print('No Youtube Channel')
        stats_yt, stats_yt2 = [0, 0, 0], [0, 0, 0]

    stats['yt_subscribers'], stats['yt_views'], stats['yt_videos'] = stats_yt
    stats['yt_views_avg'], stats['yt_likes_avg'], stats['yt_dislikes_avg'] = stats_yt2

    try: # Facebook : [likes, people talking about this]
        stats_fb = FacebookPageData(accounts[candidate][1], access_token)
    except:
        stats_fb = ['-', '-']

    stats['fb_likes'], stats['fb_talking_about'] = stats_fb

    print()
    print('Collected data')
    print(stats)
    print()
    
    # ajout de la ligne du candidat dans le dataframe
    rec = pd.DataFrame([stats.values()], columns=stats.keys(), index=[accounts[candidate][3]])
    df = df.append(rec, verify_integrity=False)

# sauvegarde des données
df.to_json(path + str(today) + '.json', orient='split')

2017-02-13
Maj du 2017-02-13
--------------------
Cheminade
--------------------
Analyzing Twitter account Jcheminade
Scanning Youtube Channel
Getting average metrics for the latest 10 videos of the channel
Facebook page : Jacques Cheminade

Collected data
{'tw_following': 137, 'tw_tweets': 6577, 'yt_dislikes_avg': 1, 'yt_views_avg': 1424, 'yt_videos': 135, 'yt_likes_avg': 17, 'yt_subscribers': 856, 'fb_likes': 5335, 'yt_views': 165292, 'fb_talking_about': 259, 'tw_followers': 3841}

--------------------
Melenchon
--------------------
Analyzing Twitter account JLMelenchon
Scanning Youtube Channel
Getting average metrics for the latest 10 videos of the channel
Facebook page : Jean-Luc Mélenchon

Collected data
{'tw_following': 5145, 'tw_tweets': 19152, 'yt_dislikes_avg': 385, 'yt_views_avg': 194596, 'yt_videos': 394, 'yt_likes_avg': 14601, 'yt_subscribers': 212168, 'fb_likes': 650067, 'yt_views': 13380101, 'fb_talking_about': 150175, 'tw_followers': 963393}

--------------------
Poutou


In [17]:
a = pd.read_json('data/' + str(today) + '.json', orient='split')
a

Unnamed: 0,tw_following,tw_tweets,yt_dislikes_avg,yt_views_avg,yt_videos,yt_likes_avg,yt_subscribers,fb_likes,yt_views,fb_talking_about,tw_followers
JCH,137,6577,1,1424,135,17,856,5335,165292,259,3841
JLM,5145,19152,385,194596,394,14601,212168,650067,13380101,150175,963393
PPO,198,2969,0,0,0,0,0,10515,0,1718,96684
EMA,633,928,34,1072,90,43,7061,96706,589388,22963,57707
NAR,65,594,0,192,58,10,172,1754,10832,383,1787
FFI,2849,15822,232,8753,270,219,4012,277704,922546,136594,422401
BHA,2043,7102,48,4581,110,117,4263,121055,257552,113317,325899
MLP,4088,12434,11,1626,102,134,11593,1198415,1549237,222481,1271806
MAM,2116,1604,0,0,0,0,0,11872,0,507,26323
FBA,2023,3996,0,0,0,0,0,69789,0,11147,548460
