## Projet InPoDa :
Dans le c√¢dre du TD11-12 nous devions travailler sur un programme permettant de fournir de la statistique sur des tweet r√©cup√©r√©s sous format *json*.

In [22]:
#############################
### Import des librairies ###
import json, re
from textblob import TextBlob
import tkinter as tk
from tkinter import filedialog
import matplotlib #pas encore utilis√©
#############################

In [23]:
###############################################################
###########################Fonctions###########################
def Tweet(data:dict):
    '''Prend un tweet sous format dictionnaire.\n 
    Proc√®de √† un tri des informations pour ne garder que l'essentiel.\n
    Renvoi un nouveau dictionnaire avec des donn√©es pr√™tes √† l'emploi. '''
    self = {}
    self['text'] = data['text']
    self['cleaned_text'] = data['text']
    self['author'] = data['author_id']
    self['hashtags'] = []
    self['urls'] = []
    self['mentions'] = []
    self['topics'] = []
    if 'entities' in data.keys():
        if 'hashtags' in data['entities']:
            for i in data['entities']['hashtags']:
                if i['tag'] not in self['hashtags']:
                    self['hashtags'].append(i['tag'])
        if 'urls' in data['entities']:
            for i in data['entities']['urls']:
                if i['url'] not in self['urls']: 
                    self['urls'].append(i['url'])
                    self['cleaned_text'] = self['cleaned_text'].replace(i['url'],'')
        if 'mentions' in data['entities']:
            for i in data['entities']['mentions']:
                if i['id'] not in self['mentions']:
                    self['mentions'].append(i['id'])
    if 'context_annotations' in data.keys():
        for i in data['context_annotations']:
            if i['entity']['name'] not in self['topics']:
                self['topics'].append(i['entity']['name'])
    self['cleaned_text'] = re.sub(r'#\w+|@\w+|(\r\n|\r|\n)|[^a-zA-Z√†√¢√ß√©√®√™√´√Æ√Ø√¥√ª√π√º√¶≈ì\d\s]','',self['cleaned_text'])
    self['cleaned_text'] = re.sub(r'^ +','',self['cleaned_text'])
    self['sentiment'] = TextBlob(self['cleaned_text']).sentiment
    return self
###############################################################

In [24]:
class InPoDa():
    def __init__(self, json_path):
        '''# Utilit√© :\n
        Permet d'initialiser une instance InPoDa selon un json
        contenant des tweets, vous pouvez ensuite appeler ses diff√©rentes m√©thodes
        pour acc√®der √† diff√©rent stats.'''
        with open(json_path) as f:
            self.json = json.load(f)
        self.TweetData = []
        for raw in self.json:
            self.TweetData.append(Tweet(raw))
        self.pub_per_users = {}
        self.pub_per_tag = {}
        self.pub_per_topic = {}
        for i in self.TweetData:
            if i['author'] in self.pub_per_users.keys():
                self.pub_per_users[i['author']] += 1
            else:
                self.pub_per_users[i['author']] = 1
            for tag in i['hashtags']:
                if tag in self.pub_per_tag.keys():
                    self.pub_per_tag[tag] += 1
                else:
                    self.pub_per_tag[tag] = 1
            for topic in i['topics']:
                if topic in self.pub_per_topic.keys():
                    self.pub_per_topic[topic] += 1
                else:
                    self.pub_per_topic[topic] = 1

    
    def top_k_hashtags(self,k):
        '''Retourne le top k des Hashtags les plus utilis√©s dans l'instance'''
        return sorted(self.pub_per_tag.items(), key=lambda x:x[1],reverse=True)[:k]
    
    def top_k_user(self,k):
        '''Retourne le top k des utilisateurs les plus actifs dans l'instance'''
        return sorted(self.pub_per_users.items(), key=lambda x:x[1],reverse=True)[:k]
    
    def top_k_mentionned_user(self,k):
        '''Retourne le top k des utilisateurs les plus mentionn√©s dans l'instance'''
        mentions = {}
        for tweet in self.TweetData:
            for user in tweet['mentions']:
                if user in mentions.keys():
                    mentions[user] += 1
                else:
                    mentions[user] = 1
        return sorted(mentions.items(), key=lambda x:x[1],reverse=True)[:k]
    
    def top_k_topic(self,k):
        '''Retourne le top k des topics les plus mentionn√©s dans l'instance'''
        return sorted(self.pub_per_topic.items(), key=lambda x:x[1],reverse=True)[:k]
    
    def get_user_tweets(self,user_id:str):
        '''Retourne tout les tweets d'un utilisateur existant dans l'instance'''
        output = []
        for tweet in self.TweetData:
            if tweet['author'] == user_id:
                output.append(tweet)
        return output
    
    def get_tweets_user_mentionned(self,user_id:str):
        '''Retourne tout les tweets o√π utilisateur existant dans l'instance est mentionn√©'''
        output = []
        for tweet in self.TweetData:
            for m_id in tweet['mentions']:
                if m_id == user_id:
                    output.append(tweet)
        return output
    
    def get_users_by_hashtag(self,tag:str):
        '''Retourne tout les utilisateurs de ce Hashtag'''
        output = {}
        for tweet in self.TweetData:
            for hashtag in tweet['hashtags']:
                if tag == hashtag:
                    if tweet['author'] not in output.keys():
                        output[tweet['author']] = 1
                    else:
                        output[tweet['author']] += 1
        return output
    
    def get_author_mentions(self,user_id:str):
        '''Retourne toutes les mentions que cet utilisateur a fait'''
        output = {}
        for tweet in self.TweetData:
            if tweet['author'] == user_id:
                for m in tweet['mentions']:
                    if m not in output.keys():
                        output[m] = 1
                    else:
                        output[m] += 1
        return output


## D√©monstration √† l'usage:

In [25]:
instance1 = InPoDa('versailles_tweets_100.json')

In [26]:
print(instance1.top_k_user(5))

[('1339914264522461187', 4), ('992904738516717570', 4), ('717025418', 2), ('3169236915', 2), ('372993152', 2)]


In [27]:
print(instance1.top_k_hashtags(5))

[('CIV', 2), ('twitter225', 1), ('SupportriceMazo', 1), ('domie', 1), ('jifa', 1)]


In [28]:
print(instance1.top_k_mentionned_user(5))

[('3200704501', 3), ('19811019', 2), ('4827016745', 1), ('254068589', 1), ('781489936184651776', 1)]


In [29]:
print(instance1.top_k_topic(5))

[('Tokyo 2020 Summer Olympics', 2), ('Annie Mac', 2), ('Max Gradel', 1), ('Eric Bailly', 1), ('Jungle Cruise', 1)]


In [30]:
print(instance1.pub_per_users)

{'1339914264522461187': 4, '717025418': 2, '992904738516717570': 4, '736523371': 1, '1471684208': 1, '3169236915': 2, '16267684': 1, '60117154': 1, '372993152': 2, '105241852': 1, '2357913366': 1}


In [31]:
print(instance1.pub_per_tag)

{'twitter225': 1, 'SupportriceMazo': 1, 'domie': 1, 'CIV': 2, 'jifa': 1, 'versailles': 1, 'nocturne': 1, 'appollon': 1}


In [32]:
print(instance1.pub_per_topic)

{'Tokyo 2020 Summer Olympics': 2, 'Max Gradel': 1, 'Eric Bailly': 1, 'Jungle Cruise': 1, 'Action & adventure films': 1, 'Annie Mac': 2, 'Yebba': 1, 'Jazz': 1}


In [33]:
print(instance1.get_user_tweets('992904738516717570'))

[{'text': '@isabelle170516 @leonna_julie @Steiner2502 Vous avez tt √† fait raison! le silence incompr√©hensible du gouver-noument et des merdias sur ce tr√®s important et dramatique sujet prouve de mani√®re irr√©futable\n leur implication √† ce plan  diabolique maquill√©!', 'cleaned_text': 'Vous avez tt √† fait raison le silence incompr√©hensible du gouvernoument et des merdias sur ce tr√®s important et dramatique sujet prouve de mani√®re irr√©futable leur implication √† ce plan  diabolique maquill√©', 'author': '992904738516717570', 'hashtags': [], 'urls': [], 'mentions': ['781489936184651776', '3200704501', '1246352652700659713'], 'topics': [], 'sentiment': Sentiment(polarity=0.4, subjectivity=1.0)}, {'text': '@LynLyna12 @leonna_julie La grande muette continue et continuera de le rester! √Ä part quelques irr√©ductibles √† la retraite?', 'cleaned_text': 'La grande muette continue et continuera de le rester  part quelques irr√©ductibles √† la retraite', 'author': '992904738516717570', 

In [34]:
print(instance1.get_tweets_user_mentionned('3200704501'))

[{'text': '@isabelle170516 @leonna_julie @Steiner2502 Vous avez tt √† fait raison! le silence incompr√©hensible du gouver-noument et des merdias sur ce tr√®s important et dramatique sujet prouve de mani√®re irr√©futable\n leur implication √† ce plan  diabolique maquill√©!', 'cleaned_text': 'Vous avez tt √† fait raison le silence incompr√©hensible du gouvernoument et des merdias sur ce tr√®s important et dramatique sujet prouve de mani√®re irr√©futable leur implication √† ce plan  diabolique maquill√©', 'author': '992904738516717570', 'hashtags': [], 'urls': [], 'mentions': ['781489936184651776', '3200704501', '1246352652700659713'], 'topics': [], 'sentiment': Sentiment(polarity=0.4, subjectivity=1.0)}, {'text': '@LynLyna12 @leonna_julie La grande muette continue et continuera de le rester! √Ä part quelques irr√©ductibles √† la retraite?', 'cleaned_text': 'La grande muette continue et continuera de le rester  part quelques irr√©ductibles √† la retraite', 'author': '992904738516717570', 

In [35]:
print(instance1.get_users_by_hashtag('CIV'))

{'1339914264522461187': 2}


In [36]:
print(instance1.get_author_mentions('992904738516717570'))

{'781489936184651776': 1, '3200704501': 3, '1246352652700659713': 1, '1355767640036438016': 1, '1071056487278104577': 1, '4216955975': 1}


## Interface Utilisateur du Programme :

In [6]:
def process():
    global instance1
    instance1 = InPoDa(var_path.get())
    var_infoproc.set(str(len(instance1.TweetData))+' tweets trouv√©s.')
    label_infoproc.grid(column=0,row=4)

def browse():
    var_path.set(filedialog.askopenfilename(title='Select your JSON'))

def view_tweet():
    instance1.TweetData

def user_request(m:str):
    if m == 'tku':
        instance1.top_k_user(int(var_entry.get()))
    elif m == 'tkmu':
        instance1.top_k_mentionned_user(int(var_entry.get()))
    elif m == 'tkh':
        instance1.top_k_hashtags(int(var_entry.get()))
    elif m == 'tkt':
        instance1.top_k_topic(int(var_entry.get()))
    elif m == 'gumt':
        instance1.get_tweets_user_mentionned(var_entry.get())
    elif m == 'guh':
        instance1.get_users_by_hashtag(var_entry.get())
    elif m == 'gam':
        instance1.get_author_mentions(var_entry.get())
    elif m == 'gut':
        instance1.get_user_tweets(var_entry.get())

root = tk.Tk()
root.title("InPoDa")

var_entry = tk.StringVar()
var_path = tk.StringVar(value='There is no path selected')
var_infoproc = tk.StringVar()

button_browse = tk.Button(root, text='Browse for JSON', command=browse)
label_path = tk.Label(root, textvariable=var_path)
entry = tk.Entry(root, textvariable = var_entry)
button_proc = tk.Button(root, text='Process', command=process)
label_infoproc = tk.Label(root,textvariable=var_infoproc)
button_view_tweet = tk.Button(root, text='View tweets', command=view_tweet)

button_topkUsers = tk.Button(root, text='Browse for JSON', command=lambda:user_request('tku'))
button_topkMentionnedUsers = tk.Button(root, text='Browse for JSON', command=lambda:user_request('tkmu'))
button_topkHashtags = tk.Button(root, text='Browse for JSON', command=lambda:user_request('tkh'))
button_topkTopics = tk.Button(root, text='Browse for JSON', command=lambda:user_request('tkt'))
button_getUserMentionnedTweet = tk.Button(root, text='Browse for JSON', command=lambda:user_request('gumt'))
button_getUsersHashtags = tk.Button(root, text='Browse for JSON', command=lambda:user_request('guh'))
button_getAuthorMentions = tk.Button(root, text='Browse for JSON', command=lambda:user_request('gam'))
button_getUserTweets = tk.Button(root, text='Browse for JSON', command=lambda:user_request('gut'))


button_browse.grid(column=0,row=0)
label_path.grid(column=0,row=1)
entry.grid(column=0,row=2)
button_proc.grid(column=0,row=3)

root.mainloop()