In [11]:
import json
from textblob import TextBlob
import matplotlib, re

In [12]:
class Tweet():
    def __init__(self,data:dict):
        self.text = data['text']
        self.cleaned_text = data['text']
        self.author = data['author_id']
        self.hashtags = []
        self.urls = []
        self.mentions = []
        if 'entities' in data.keys():
            if 'hashtags' in data['entities']:
                for i in data['entities']['hashtags']:
                    if i['tag'] not in self.hashtags:
                        self.hashtags.append(i['tag'])
            if 'urls' in data['entities']:
                for i in data['entities']['urls']:
                    if i['url'] not in self.urls:
                        self.urls.append(i['url'])
                        self.cleaned_text = self.cleaned_text.replace(i['url'],'')
            if 'mentions' in data['entities']:
                for i in data['entities']['mentions']:
                    if i['id'] not in self.mentions:
                        self.mentions.append(i['id'])
        self.cleaned_text = re.sub(r'#\w+|@\w+|[^a-zA-Zàâçéèêëîïôûùüæœ\d\s]','',self.cleaned_text)
        self.textblob = TextBlob(self.cleaned_text)
        self.sentiment = self.textblob.sentiment
        self.topics = list(self.textblob.noun_phrases)

In [13]:
class InPoDa():

    def __init__(self, json_path):
        with open(json_path) as f:
            self.json = json.load(f)
        self.TweetData = []
        for raw in self.json:
            self.TweetData.append(Tweet(raw))
        self.pub_per_users = {}
        self.pub_per_tag = {}
        self.pub_per_topic = {}
        for i in self.TweetData:
            if i.author in self.pub_per_users.keys():
                self.pub_per_users[i.author] += 1
            else:
                self.pub_per_users[i.author] = 1
            for tag in i.hashtags:
                if tag in self.pub_per_tag.keys():
                    self.pub_per_tag[tag] += 1
                else:
                    self.pub_per_tag[tag] = 1
            for topic in i.topics:
                if topic in self.pub_per_topic.keys():
                    self.pub_per_topic[topic] += 1
                else:
                    self.pub_per_topic[topic] = 1

    
    def top_k_hashtags(self,k):
        return sorted(self.pub_per_tag.items(), key=lambda x:x[1],reverse=True)[:k]
    
    def top_k_user(self,k):
        return sorted(self.pub_per_users.items(), key=lambda x:x[1],reverse=True)[:k]
    
    def top_k_mentionned_user(self,k):
        mentions = {}
        for tweet in self.TweetData:
            for user in tweet.mentions:
                if user in mentions.keys():
                    mentions[user] += 1
                else:
                    mentions[user] = 1
        return sorted(mentions.items(), key=lambda x:x[1],reverse=True)[:k]
    
    def top_k_topic(self,k):
        return sorted(self.pub_per_topic.items(), key=lambda x:x[1],reverse=True)[:k]
    
    def get_user_tweets(self,user_id:str):
        output = []
        for tweet in self.TweetData:
            if tweet.author == user_id:
                output.append(tweet)
        return output
    
    def get_tweets_user_mentionned(self,user_id:str):
        output = []
        for tweet in self.TweetData:
            for m_id in tweet.mentions:
                if m_id == user_id:
                    output.append(tweet)
        return output
    
    def get_users_by_hashtag(self,tag:str):
        output = {}
        for tweet in self.TweetData:
            for hashtag in tweet.hashtags:
                if tag == hashtag:
                    if tweet.author not in output.keys():
                        output[tweet.author] = 1
                    else:
                        output[tweet.author] += 1
        return output
    
    def get_author_mentions(self,user_id:str):
        output = {}
        for tweet in self.TweetData:
            if tweet.author == user_id:
                for m in tweet.mentions:
                    if m not in output.keys():
                        output[m] = 1
                    else:
                        output[m] += 1
        return output


In [14]:
instance1 = InPoDa('versailles_tweets_100.json')

In [16]:
print(instance1.top_k_user(5))
print(instance1.top_k_hashtags(5))
print(instance1.top_k_mentionned_user(5))
print(instance1.top_k_topic(5))
print(instance1.pub_per_users)
print(instance1.pub_per_tag)
print(instance1.pub_per_topic)
print(instance1.get_user_tweets('992904738516717570'))
print(instance1.get_tweets_user_mentionned('3200704501'))
print(instance1.get_users_by_hashtag('CIV'))
print(instance1.get_author_mentions('992904738516717570'))

[('1339914264522461187', 4), ('992904738516717570', 4), ('717025418', 2), ('3169236915', 2), ('372993152', 2)]
[('CIV', 2), ('twitter225', 1), ('SupportriceMazo', 1), ('domie', 1), ('jifa', 1)]
[('3200704501', 3), ('19811019', 2), ('4827016745', 1), ('254068589', 1), ('781489936184651776', 1)]
[('versailles', 4), ('goumin', 1), ('des lphants joueurs', 1), ('mme fatigue mme', 1), ('mes tontons vous avez fait votre part', 1)]
{'1339914264522461187': 4, '717025418': 2, '992904738516717570': 4, '736523371': 1, '1471684208': 1, '3169236915': 2, '16267684': 1, '60117154': 1, '372993152': 2, '105241852': 1, '2357913366': 1}
{'twitter225': 1, 'SupportriceMazo': 1, 'domie': 1, 'CIV': 2, 'jifa': 1, 'versailles': 1, 'nocturne': 1, 'appollon': 1}
{'goumin': 1, 'des lphants joueurs': 1, 'mme fatigue mme': 1, 'mes tontons vous avez fait votre part': 1, 'jo': 1, 'final au moins': 1, 'bravo': 1, 'sommeil l sera compliqu est limin des': 1, 'jo ahi': 1, 'peut faire': 1, 'juillet journe internationale': 