In [2]:
#%pip install tweepy

In [1]:
import tweepy
import pandas as pd

client = tweepy.Client(bearer_token="AAAAAAAAAAAAAAAAAAAAAIHQcQEAAAAAltFqxFhiitQ6AT8R9LP8mX%2F9y8w%3DSTDzL94lipFbvKOqaLMNTFQKx2Tf0Y3fODsrwq0bjSkggdF5fY")

tweets = client.search_recent_tweets("(#bitcoin OR #Bitcoin OR #BTC OR #btc) -is:retweet lang:en",max_results=100,).data
tweets_df = pd.DataFrame([tweet.data for tweet in tweets])
tweets_df.drop(columns=['id','edit_history_tweet_ids'],inplace=True)
tweets_df.head()

Unnamed: 0,text
0,#FIS : SHORT TERM SIGNAL:\n\nBuy within 6mins ...
1,@Ashcryptoreal Check the latest post bout ape ...
2,@yogimechcool Daily Ultra Activity: 0\n\nDaily...
3,#Bitcoin two elliot wave analysis has common p...
4,💰 I bought your bags!\n\n➕ Bought : SHIBUSDT\n...


In [2]:
import re

def cleanTweets(txt:str) -> str:
    """This function takes a string as input and returns a cleaned version of the 
    string by removing hashtags, newlines, URLs, @ symbols, and emojis. It also
    converts the string to lowercase.

    Args:
        txt (str): The input string to be cleaned.

    Returns:
        str: The cleaned version of the input string.
    """
    txt = str(txt)
    txt = re.sub(r'#','',txt)
    txt = re.sub(r'\n','',txt)
    txt = re.sub(r'https?:\/\/\S+','',txt)
    txt = re.sub(r'@','',txt)
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002500-\U00002BEF"  # chinese char
                               u"\U00002702-\U000027B0"
                               u"\U00002702-\U000027B0"
                               u"\U000024C2-\U0001F251"
                               u"\U0001f926-\U0001f937"
                               u"\U00010000-\U0010ffff"
                               u"\u2640-\u2642"
                               u"\u2600-\u2B55"
                               u"\u200d"
                               u"\u23cf"
                               u"\u23e9"
                               u"\u231a"
                               u"\ufe0f"  # dingbats
                               u"\u3030"
                               "]+", flags=re.UNICODE)
    
    return emoji_pattern.sub(r'', txt).lower()

In [3]:
tweets_df.text = tweets_df.text.apply(cleanTweets)
tweets_df.head(10)

Unnamed: 0,text
0,fis : short term signal:buy within 6mins max: ...
1,ashcryptoreal check the latest post bout ape c...
2,yogimechcool daily ultra activity: 0daily bitc...
3,bitcoin two elliot wave analysis has common pr...
4,i bought your bags! bought : shibusdt buy pri...
5,join me at bybit and earn exclusive rewards w...
6,current bitcoin price is $17257 btc crypto
7,bscgemsalert hzmcoin_bitcoin_arabhzm coin the ...
8,"if the bitcoin is the crypto gold, then elysiu..."
9,pamela commons btc cecilia beard


# FinBERT

In [6]:
#%pip install -U torch transformers

In [7]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import tqdm

# create a tokenizer object
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

# fetch the pretrained model 
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

def sentim_analyzer(df, tokenizer:AutoTokenizer, model:AutoModelForSequenceClassification):
    ''' Given a df that contains a column 'headline' with article healine texts, it runs inference on the healine with the 'model' (FinBert) 
       and inserts output sentiment features into the dataframe in the respective columns (Positive_sentim, Negative_sentim, Neutral_sentim)
       
        Parameters :
          df : A dataframe that contains headlines in a column called 'headline' . 
          tokenizer(AutoTokenizer) : A pre-processing tokenizer object from Hugging Face lib. 
          model (AutoModelForSequenceClassification) : A hugging face transformer model.     
          
          returns df : The initial dataframe with the 3 sentiment features as columns for each headline'''
    
    for i in df.index :
        try:
            headline = df.loc[i, 'text']
        except:
            return print(' \'headline\' column might be missing from dataframe')
        # Pre-process input phrase
        input = tokenizer(headline, padding = True, truncation = True, return_tensors='pt')
        # Estimate output
        output = model(**input)
        # Pass model output logits through a softmax layer.
        predictions =  torch.nn.functional.softmax(output.logits, dim=-1)
        df.loc[i, 'Positive'] = predictions[0][0].tolist()
        df.loc[i, 'Negative'] = predictions[0][1].tolist()
        df.loc[i, 'Neutral']  = predictions[0][2].tolist()
    return df

2023-01-09 11:05:30.763646: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-09 11:05:32.117326: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-01-09 11:05:40.369000: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-01-09 11:05:40.372630: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

In [8]:
tweets_df = sentim_analyzer(tweets_df, tokenizer, model)
tweets_df['Sentiment']=tweets_df.apply(lambda x: 'Positive' if x['Positive']>x['Negative'] else 'Negative', axis=1)
tweets_df.drop(columns=['Positive','Negative','Neutral'],inplace=True)
tweets_df.Sentiment.value_counts()

Positive    87
Negative    13
Name: Sentiment, dtype: int64

# Affinn

In [9]:
from afinn import Afinn

afinn=Afinn()

def classify_tweet(tweet_text:str)->str:
    score = afinn.score(tweet_text)
    return 'Negative' if score<0 else 'Positive' if score>0 else 'Neutral'

tweets_df['afinn'] = tweets_df.text.apply(classify_tweet) 
tweets_df.head(15)

Unnamed: 0,text,Sentiment,afinn
0,"block 771103 9th jan 2023, 10:04:17 gmt 1,8...",Positive,Negative
1,"price update: 1 bitcoin = $17,237.29 cryptocu...",Positive,Neutral
2,chaindustry virtually everything but most impo...,Positive,Positive
3,"""14 years later: unbelievable impact of satosh...",Positive,Positive
4,"current bitcoin price:usd $17,250.05gbp £14,41...",Positive,Neutral
5,"block 771,103 was just mined. it had 1,843 tra...",Negative,Negative
6,crypto bitcoin altcoin nfts btc eth xrpbitcoin...,Positive,Neutral
7,btc+2.46rr banked easilycryptocurrency crypto ...,Positive,Neutral
8,polyxusdt polyx signal 1 last signal: n/a bef...,Positive,Neutral
9,andrewgriuk eyes on $bc bcthe shill 2 earn dap...,Positive,Neutral


# Textblob

In [4]:
from afinn import Afinn

afinn=Afinn()

def get_sentiment_2(tweet:str) -> str:
    """Get the sentiment of a tweet using the afinn library.

    Args:
        tweet (str): The tweet text.

    Returns:
        str: "Positive" if the sentiment is positive, "Negative" if the sentiment is negative.
    """
    score = afinn.score(tweet)
    return "Negative" if score < 0 else "Positive"

tweets_df['Sentiment_afinn'] = tweets_df['text'].apply(get_sentiment_2)

In [5]:
tweets_df.Sentiment_afinn.value_counts().to_dict()

{'Positive': 87, 'Negative': 13}

In [None]:
dict_1 = {'Positive': 87, 'Negative': 13}
dict_2 = {'Positive': 90, 'Negative': 15}

In [21]:
from torch.nn.functional import softmax
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# create a tokenizer object
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

# fetch the pretrained model 
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")


def get_sentiment_3(tweet:str) -> str:
    """Get the sentiment of a tweet using the FinBert model.

    Args:
        tweet (str): The tweet text.

    Returns:
        str: "Positive" if the sentiment is positive,
            "Negative" if the sentiment is negative.
    """
    # Pre-process input phrase
    input = tokenizer(tweet, padding = True, truncation = True, return_tensors='pt')
    # Estimate output
    output = model(**input)
    # Pass model output logits through a softmax layer.
    predictions = softmax(output.logits, dim=-1)
    
    return "Negative" if predictions[0][1].tolist() > predictions[0][0].tolist() else "Positive" 
    

tweets_df['Sentiment_finbert'] = tweets_df['text'].apply(get_sentiment_3)

In [22]:
from textblob import TextBlob

def get_sentiment(tweet:str) -> str:
    """Get the sentiment of a tweet using the TextBlob library.

    Args:
        tweet (str): The tweet text.

    Returns:
        str: "Positive" if the sentiment is positive, "Negative" if the sentiment is negative.
    """
    score = TextBlob(tweet).sentiment.polarity
    return "Negative" if score < 0 else "Positive"

tweets_df['Sentiment_textblob'] = tweets_df['text'].apply(get_sentiment)

In [24]:
tweets_df

Unnamed: 0,text,Sentiment_afinn,Sentiment_finbert,Sentiment_textblob
0,cointelegraph the scenario of bitcoin going t...,Positive,Positive,Positive
1,the ethereal will be used for e-commerce and o...,Positive,Positive,Positive
2,hakitoken facts - nft staking- marketplace...,Positive,Positive,Positive
3,ethereum price update: eth $1322.34 usdbitcoin...,Positive,Positive,Positive
4,tripdawg blockchain musicgood morning... strea...,Positive,Positive,Positive
...,...,...,...,...
95,cryptotea_ do well to understand that every di...,Positive,Positive,Positive
96,this zesty faucet from _bitcoiner is making me...,Positive,Positive,Positive
97,cryptokaleo don’t get to optimistic from this ...,Negative,Negative,Negative
98,cryptogems555 saylor yeg0rpetrov the scenario ...,Positive,Positive,Positive


In [10]:
from textblob import TextBlob

def getSubjectivity(txt):
    return TextBlob(txt).sentiment.subjectivity
tweets_df['subjectivity'] = tweets_df.text.apply(getSubjectivity)
def getPolarity(txt):
    return TextBlob(txt).sentiment.polarity
tweets_df['polarity'] = tweets_df.text.apply(getPolarity)

tweets_df['textblob'] = tweets_df.polarity.apply(lambda score:  "Negative" if score<0 else "Positive")
tweets_df.drop(columns=['polarity'],inplace=True)

tweets_df.head()

Unnamed: 0,text,Sentiment,afinn,subjectivity,textblob
0,"block 771103 9th jan 2023, 10:04:17 gmt 1,8...",Positive,Negative,0.0,Positive
1,"price update: 1 bitcoin = $17,237.29 cryptocu...",Positive,Neutral,0.0,Positive
2,chaindustry virtually everything but most impo...,Positive,Positive,0.55,Positive
3,"""14 years later: unbelievable impact of satosh...",Positive,Positive,0.42,Positive
4,"current bitcoin price:usd $17,250.05gbp £14,41...",Positive,Neutral,0.4,Positive


In [14]:

tweets_df.head(20)

Unnamed: 0,text,Sentiment,afinn,subjectivity,textblob
0,"block 771103 9th jan 2023, 10:04:17 gmt 1,8...",Positive,Negative,0.0,Positive
1,"price update: 1 bitcoin = $17,237.29 cryptocu...",Positive,Neutral,0.0,Positive
2,chaindustry virtually everything but most impo...,Positive,Positive,0.55,Positive
3,"""14 years later: unbelievable impact of satosh...",Positive,Positive,0.42,Positive
4,"current bitcoin price:usd $17,250.05gbp £14,41...",Positive,Neutral,0.4,Positive
5,"block 771,103 was just mined. it had 1,843 tra...",Negative,Negative,0.0,Positive
6,crypto bitcoin altcoin nfts btc eth xrpbitcoin...,Positive,Neutral,0.9,Positive
7,btc+2.46rr banked easilycryptocurrency crypto ...,Positive,Neutral,0.0,Positive
8,polyxusdt polyx signal 1 last signal: n/a bef...,Positive,Neutral,0.355556,Negative
9,andrewgriuk eyes on $bc bcthe shill 2 earn dap...,Positive,Neutral,0.0,Positive


In [15]:
tweets_df.iloc[4].text

'current bitcoin price:usd $17,250.05gbp £14,414.00euro €16,804.07bitcoin btc $btc btcusd btcgbp btceur crypto cryptocurrency'