In [2]:
import os
import requests
import re

import logging

from textblob import TextBlob
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer


from transformers import pipeline
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

import finnhub
from dotenv import load_dotenv
from pathlib import Path    
import numpy as np
import pandas as pd
import datetime
from bs4 import BeautifulSoup
import sys
import time
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
sys.path.append('../') # Change the python path at runtime
warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning)

# Self-created modules
from src.utils import path as path_yq

load_dotenv()

SUBJECTIVITY_API_KEY = os.environ.get('SUBJECTIVITY_API_KEY')
POLARITY_API_KEY = os.environ.get('POLARITY_API_KEY')
INTENSITY_API_KEY = os.environ.get('INTENSITY_API_KEY')
FINNHUB_API_KEY = os.environ.get('FINNHUB_API_KEY')

BT_START_DATE = '2023-11-01'
BT_START_STR = '20231101'
BT_END_DATE = '2024-01-31'
BT_END_STR = '20240131'

cur_dir = Path.cwd()
root_dir = path_yq.get_root_dir(cur_dir)

logging.basicConfig(filename=Path.joinpath(root_dir, 'logs', 'trading_system.log'),
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    level=logging.DEBUG)



In [3]:
cols = ['cln_hdl', 'cln_smr', 'cln_news',
        'cln_hdl_lemma', 'cln_smr_lemma', 'cln_news_lemma']

import ast

def convert_data(row):
    try:
        # First, try to evaluate the row as a list
        evaluated = ast.literal_eval(row)
        # If the result is a list, return it directly
        if isinstance(evaluated, list):
            return evaluated
        # If not, it's already the correct type (int, float, etc.)
        return evaluated
    except ValueError:
        # Handle the case where the row is not a valid Python literal
        # This could be a string that should not be converted
        return row
    except SyntaxError:
        # Handle syntax errors which might occur if ast.literal_eval can't parse the string
        return row
    except Exception as e:
        print(f'Exception: {e}')
        return row

cleaned_path = Path.joinpath(root_dir, 'data', 'proc', f'BA_cln_{BT_START_STR}_{BT_END_STR}.csv')
df7 = pd.read_csv(cleaned_path, index_col=False)

# Assuming 'list_column' is the name of your column containing the string representation of lists
for col in cols:
    df7[col] = df7[col].apply(convert_data)

# pol: polarity, sub: subjectivity
def sentic_api(text, KEY):
    APIURL = f'https://sentic.net/api/en/{KEY}.py?text='

    attempt = 0
    max_attempt = 5
    while attempt < 5:
        try:
            attempt += 1
            # Adding a timeout of 5 seconds as an example
            response = requests.get(APIURL + text, timeout=120)
            # Assuming the API returns a string in the expected format
            label = str(response.content)[2:-3]
            return label
        except requests.exceptions.Timeout:
            # Handle timeout exception
            if attempt < max_attempt: 
                print(f"Sentic request timed out. Attempt {attempt} of {max_attempt}.")
            else:
                print("No more retries.") 
                return None
        except Exception as e:
            # Handle other requests exceptions
            if attempt < max_attempt: 
                print(f"Attempt {attempt} of {max_attempt}. Error: {e}")
            else:
                print("No more retries.")
                return None

def sentic_anal_pol(text_list):
    polarity_list = []
    for i in range(len(text_list)):
        try:
            polarity_cat = sentic_api(text_list[i], POLARITY_API_KEY) # Polarity category (positive, negative)
            intensity = float(sentic_api(text_list[i], INTENSITY_API_KEY)) / 100
            if polarity_cat == "POSITIVE":
                polarity_list.append(intensity)
            elif polarity_cat == "NEGATIVE":
                polarity_list.append(-intensity)
            elif polarity_cat == "NEUTRAL":
                polarity_list.append(0)
            else:
                print(f"Unknown polarity: {polarity_cat}")
            
            print(f"{(i + 1)}/{len(text_list)} sentic_anal_pol completed.")
        except Exception as e:
            polarity_list.append(None)
            print(f"sentic_anal_pol exception for text: {text_list[i]}, {e}")

    print(f"sentic_anal_pol completed.")

    return(polarity_list)

def batch_sentic_anal(df):
    for col in cols:
        df[f'{col}_pol_stc'] = df[col].apply(sentic_anal_pol)
        # df[f'{col}_sub_stc'] = df[col].apply(sentic_anal_sub)
        df.to_csv(stm_path, index=False)

def batch_anal(df):
    # After performing sentiment
    
    batch_sentic_anal(df)
    print('batch_sentic_anal completed.')

    df.to_csv(stm_path, index=False)




In [5]:
# TODO: Change columns, and pathname, and df7
cols = ['cln_hdl_lemma', 'cln_smr_lemma', 'cln_news_lemma']
stm_path = root_dir.joinpath('data', 'proc', f'BA_stm_stc2_{BT_START_STR}_{BT_END_STR}.csv') 

batch_anal(df7.head(2))

1/1 sentic_anal_pol completed.
sentic_anal_pol completed.
1/1 sentic_anal_pol completed.
sentic_anal_pol completed.
1/3 sentic_anal_pol completed.
2/3 sentic_anal_pol completed.
3/3 sentic_anal_pol completed.
sentic_anal_pol completed.
1/3 sentic_anal_pol completed.
2/3 sentic_anal_pol completed.
3/3 sentic_anal_pol completed.
sentic_anal_pol completed.
1/19 sentic_anal_pol completed.
2/19 sentic_anal_pol completed.
3/19 sentic_anal_pol completed.
4/19 sentic_anal_pol completed.
5/19 sentic_anal_pol completed.
6/19 sentic_anal_pol completed.
7/19 sentic_anal_pol completed.
8/19 sentic_anal_pol completed.
9/19 sentic_anal_pol completed.
10/19 sentic_anal_pol completed.
11/19 sentic_anal_pol completed.
12/19 sentic_anal_pol completed.
13/19 sentic_anal_pol completed.
14/19 sentic_anal_pol completed.
15/19 sentic_anal_pol completed.
16/19 sentic_anal_pol completed.
17/19 sentic_anal_pol completed.
18/19 sentic_anal_pol completed.
19/19 sentic_anal_pol completed.
sentic_anal_pol completed.