In [1]:
import pandas as pd
import numpy as np
from apyori import apriori
import unidecode

In [2]:
def removeBadCharacters(word):
    
    bad_symbols = ".,?!@$&*()\/{}{}".format("'", '"')
    
    #removendo caracteres ruins
    clean_word = word.translate({ord(c): None for c in bad_symbols})
    
    #tornando a string unicode
    clean_word = unidecode.unidecode(clean_word)
    
    return clean_word

In [3]:
STOPWORDS = ['lei','rouanet','?','faz','.','assim','a','eles','ate','via','pelo','-','ela','ta','era',
             'quem','eu','tá','foi','pela','ser',',','tem','só','sem','vai',
             'é','e','ne','né','isso','onde','estão','esta','está','ver','pq','essa','vez','nas',
             'mas','qual','porque','ele','ter','pois','este','vez','q',
             'para','já','aos','pode','outro',
             'pra','desse','alguns','meio','entre','das',
             'podem','esse','seu','também','são','quando','de', 
             'que','em','os','as','da','como','dos','ou','se','um',
             'uma','para','na','ao','mais','por','não','ainda','muito','sua','a',
             'vc','voce','o','a','você','ai','aí','vocês','vcs','fazer','usou',"do","com","nao"] 

In [4]:
tweets_data = pd.read_csv('lei_rouanet_final.csv')
tweets_data['dia'] = pd.to_datetime(tweets_data['dia'])

tweets_2019 = tweets_data.loc[(tweets_data['dia'] >= "01/01/2019") & (tweets_data['dia'] <= "31/12/2019")]["tweet"].values
tweets_2020 = tweets_data.loc[(tweets_data['dia'] >= "01/01/2020") & (tweets_data['dia'] <= "31/12/2020")]["tweet"].values
tweets_2021 = tweets_data.loc[(tweets_data['dia'] >= "01/01/2021") & (tweets_data['dia'] <= "31/12/2021")]["tweet"].values

In [5]:
len(tweets_2019)

9559

In [6]:
len(tweets_2020)

3921

In [7]:
len(tweets_2021)

2985

In [8]:
import math

def getAssociationRules(vetor_tweets, min_support, min_confidence, min_lift, min_length, max_length):
    
    tweets_lists = []

    #Gerando um vetor de tweets, com cada palavra separada dentro de uma lista
    for tweet in vetor_tweets:
        
        tweet = tweet.lower()
        tweet_words = tweet.split()
        tweet_words = list(dict.fromkeys(tweet_words))
        new_tweet_list = []
        
        for word in tweet_words:
            word = removeBadCharacters(word)
            if word.strip() not in STOPWORDS:
                new_tweet_list.append(word)

        tweets_lists.append(new_tweet_list)
       
   
    #Chamando o apriori
    association_rules = apriori(tweets_lists, min_support=min_support, min_confidence=min_confidence, 
                                min_lift=min_lift, max_length=max_length)
    
    association_rules = filter(lambda x: len(x.items) >=  min_length, association_rules)

    #Mostrando Resultado
    for item in association_rules:
        
        print("\n")

        print("Palavras Associadas: ", list(item.items), " Support: ", item.support)

        print("----------------------------------------------------------------------------------\n")


        ordered_statistics = item.ordered_statistics

        for statistic in ordered_statistics:
            if(len(list(statistic.items_base)) <= math.floor(min_length/2)):
                print("{} -> {}".format(list(statistic.items_base),list(statistic.items_add)))
                print("Confidence: ", statistic.confidence)
                print("Lift: {}\n".format(statistic.lift))


        print("----------------------------------------------------------------------------------\n")

    print("Concluido!")

In [9]:
getAssociationRules(tweets_2020, 0.017, 0.7, 2, 9, 10)



Palavras Associadas:  ['boca', '15', 'fraudes', 'livre', 'apura', 'federal', 'policia', 'anos', 'operacao']  Support:  0.01734251466462637
----------------------------------------------------------------------------------

['15'] -> ['boca', 'fraudes', 'livre', 'apura', 'federal', 'policia', 'anos', 'operacao']
Confidence:  0.7640449438202247
Lift: 44.056179775280896

['apura'] -> ['boca', '15', 'fraudes', 'livre', 'federal', 'policia', 'anos', 'operacao']
Confidence:  0.8831168831168832
Lift: 50.184076792772444

['fraudes'] -> ['boca', '15', 'livre', 'apura', 'federal', 'policia', 'anos', 'operacao']
Confidence:  0.7391304347826088
Lift: 42.619565217391305

['livre'] -> ['boca', '15', 'fraudes', 'apura', 'federal', 'policia', 'anos', 'operacao']
Confidence:  0.7391304347826088
Lift: 42.619565217391305

['operacao'] -> ['boca', '15', 'fraudes', 'livre', 'apura', 'federal', 'policia', 'anos']
Confidence:  0.7010309278350516
Lift: 40.422680412371136

['policia'] -> ['boca', '15', 'frau

## Operação "Boca Livre" da Polícia Federal apura 15 anos de fraudes na Lei Rouanet

In [10]:
getAssociationRules(tweets_2021, 0.018, 0.7, 1, 4, 10)



Palavras Associadas:  ['projetos', 'mario', 'passaporte', 'frias']  Support:  0.023450586264656615
----------------------------------------------------------------------------------

['passaporte', 'frias'] -> ['projetos', 'mario']
Confidence:  0.7216494845360825
Lift: 25.953297726990435

['projetos', 'frias'] -> ['mario', 'passaporte']
Confidence:  0.7777777777777778
Lift: 26.086142322097377

['mario', 'passaporte'] -> ['projetos', 'frias']
Confidence:  0.7865168539325842
Lift: 26.086142322097377

['projetos', 'mario'] -> ['frias', 'passaporte']
Confidence:  0.8433734939759036
Lift: 25.953297726990435

----------------------------------------------------------------------------------



Palavras Associadas:  ['vacina', 'mario', 'passaporte', 'frias']  Support:  0.019765494137353432
----------------------------------------------------------------------------------

['vacina', 'frias'] -> ['mario', 'passaporte']
Confidence:  0.8309859154929576
Lift: 27.870707390409873

['mario', 'vaci

## Mario Frias proíbe passaporte da vacina em projetos da Lei Rouanet

In [11]:
getAssociationRules(tweets_2019, 0.015, 0.7, 1, 4, 10)



Palavras Associadas:  ['miami', 'doria', 'bia', 'exposicao']  Support:  0.01757506015273564
----------------------------------------------------------------------------------

['bia'] -> ['exposicao', 'doria', 'miami']
Confidence:  0.9545454545454545
Lift: 54.3125

['doria'] -> ['exposicao', 'bia', 'miami']
Confidence:  0.8842105263157893
Lift: 50.310526315789474

['exposicao'] -> ['doria', 'miami', 'bia']
Confidence:  0.9438202247191011
Lift: 53.70224719101124

['miami'] -> ['doria', 'exposicao', 'bia']
Confidence:  0.9438202247191011
Lift: 52.76010250344963

['doria', 'bia'] -> ['exposicao', 'miami']
Confidence:  0.9599999999999999
Lift: 54.62285714285714

['bia', 'exposicao'] -> ['doria', 'miami']
Confidence:  0.982456140350877
Lift: 55.900584795321635

['bia', 'miami'] -> ['doria', 'exposicao']
Confidence:  1.0
Lift: 55.900584795321635

['doria', 'exposicao'] -> ['bia', 'miami']
Confidence:  0.982456140350877
Lift: 55.900584795321635

['doria', 'miami'] -> ['bia', 'exposicao']
Co

## Bia Doria usou lei rouanet para fazer exposição em miami