In [None]:
from elasticsearch import Elasticsearch
from datetime import datetime
from flask import Flask, render_template, request
import csv
from textblob import TextBlob



In [None]:

es = Elasticsearch([{'host': 'localhost', 'port':9200, 'scheme': 'http'}], http_auth=("vijay", "password"))
es.info()



In [None]:
tech_companies_financial_data_documents = list()

with open("data.csv", "r", encoding='utf-8') as file:

    reader = csv.reader(file)
    
    headers = next(reader)

    for row in reader:

        data = dict()

        i = 0

        for h in headers:

            data[h] = row[i]  
            i += 1

        
        tech_companies_financial_data_documents.append(data)


print(tech_companies_financial_data_documents[:10])

    

In [None]:
cnbc_documents = list()
guardian_documents = list()
reuters_documents = list()

with open("cnbc_headlines.csv", "r", encoding='utf-8') as file:

    reader = csv.reader(file)

    headers = next(reader)

    for row in reader:

        data = dict()

        data['News Organization'] = 'CNBC'

        i = 0

        for h in headers:

            data[h] = row[i]
            i += 1
        
        cnbc_documents.append(data)


with open("guardian_headlines.csv", "r", encoding='utf-8') as file:

    reader = csv.reader(file)

    headers = next(reader)

    for row in reader:

        data = dict()

        data['News Organization'] = 'The Guardian'

        i = 0

        for h in headers:

            data[h] = row[i]
            i += 1
        
        guardian_documents.append(data)


with open("reuters_headlines.csv", "r", encoding='utf-8') as file:

    reader = csv.reader(file)

    headers = next(reader)

    for row in reader:

        data = dict()

        data['News Organization'] = 'Reuters'

        i = 0

        for h in headers:

            data[h] = row[i]
            i += 1
        
        reuters_documents.append(data)


print(cnbc_documents[:10])
print()

print(guardian_documents[:10])
print()

print(reuters_documents[:10])
print()

In [None]:
def index_documents(index, id, document):
    
    resp = es.index(index=index, id=id, document=document)


In [None]:
for i in range(len(tech_companies_financial_data_documents)):

    es.index(index = "tech_company_index", id = i + 1, document = tech_companies_financial_data_documents[i])



In [None]:
for i in range(len(cnbc_documents)):
    
    es.index(index = "cnbc_index", id = i + 1, document = cnbc_documents[i])

for i in range(len(guardian_documents)):
    
    es.index(index = "guardian_index", id = i + 1, document = guardian_documents[i])

for i in range(len(reuters_documents)):
    
    es.index(index = "reuters_index", id = i + 1, document = reuters_documents[i])

In [None]:
resp = es.get(index="tech_company_index", id=1)
print(resp['_source'])
print()

In [None]:
resp = es.get(index="cnbc_index", id=1)
print(resp['_source'])
print()

resp = es.get(index="guardian_index", id=1)
print(resp['_source'])
print()

resp = es.get(index="reuters_index", id=1)
print(resp['_source'])
print()

In [None]:
search_history = []

In [None]:
def analyze_search_history(history):

    if len(history) < 2:

        return None
    
    most_frequent_term_dict = {}

    for query_term in history[:-1]:

        if query_term in most_frequent_term_dict:

            most_frequent_term_dict[query_term] += 1
        
        else:

            most_frequent_term_dict[query_term] = 1
    
    result = max(most_frequent_term_dict, key=most_frequent_term_dict.get)
    
    return result

In [None]:
def search_query(query, datasets_to_search, index_names):

    global search_history

    most_frequent_term = analyze_search_history(search_history)

    if most_frequent_term:

        query += " " + most_frequent_term

    print(query)
    
    body = {
        "size": 20,
        "query": {
            "bool": {
                "should": [
                    {"match": {"Headlines": query}},
                    {"match": {"Description": query}}
                ],
                "minimum_should_match": 1
            }
        }
    }

    results = []

    for i in range(len(datasets_to_search)):

        resp = es.search(index=index_names[i], body=body)

        for hit in resp['hits']['hits']:
            
            news_organization = hit["_source"].get('News Organization')
            headline = hit["_source"].get('Headlines')
            time = hit["_source"].get('Time')
            description = hit["_source"].get('Description')

            if description:

                sentiment = TextBlob(description).sentiment.polarity
                sentiment_label = 'positive' if sentiment > 0 else 'negative' if sentiment < 0 else 'neutral'
            
            else:

                sentiment_label = 'Not Applicable'



            formatted_result = f"'News Organization': {news_organization}\nHeadline: {headline}\nTime: {time}\nDescription: {description}\nSentiment: {sentiment_label}\n\n"
            
            formatted_result = formatted_result.replace('\n', '<br>')

            results.append(formatted_result)

    return results


In [None]:
financial_synonyms = {
    "stock": ["shares", "equity", "stock market", "securities"],
    "bond": ["debt securities", "fixed income", "notes", "debt"],
    "market": ["marketplace", "trading place", "exchange"],
    "investment": ["investing", "capital placement", "asset allocation"],
    "economy": ["economic system", "financial system", "market system"],
    "currency": ["money", "cash", "legal tender", "fiat"],
    "bank": ["banking institution", "lender", "financial institution"],
    "trade": ["trading", "buying and selling", "commerce"],
    "inflation": ["price rise", "economic inflation", "monetary inflation"],
    "mortgage": ["home loan", "property loan", "loan for real estate"],
    "recession": ["economic downturn", "depression", "slowdown"],
    "portfolio": ["investment mix", "asset mix", "asset collection"],
    "dividend": ["profit sharing", "payout", "share of profits"],
    "risk": ["uncertainty", "exposure", "financial risk"],
    "credit": ["borrowing", "lending", "credit line"],
    "tax": ["taxation", "levy", "duty"],
    "profit": ["earnings", "gain", "financial gain"],
    "loss": ["deficit", "financial loss", "shortfall"],
    "interest": ["interest rate", "borrowing cost", "lending rate"],
    "commodity": ["raw material", "basic good", "tradeable item"],
    "acquisition": ["takeover", "purchase", "buyout"],
    "bankruptcy": ["insolvency", "financial failure", "liquidation"]
}



def expand_query(query):

    expanded_terms = list()

    for word in query.split():

        expanded_terms.append(word)

        if word in financial_synonyms:

            expanded_terms.extend(financial_synonyms[word])
    
    return " OR ".join(expanded_terms)

In [None]:
def search_tech_companies(query):

    body = {
        "size": 20,
        "query": {
            "match": {"Company": query}
        }   
    }

    results = []
    
    resp = es.search(index="tech_company_index", body=body)

    for hit in resp['hits']['hits']:

        if len(results) < 20:
        
            company = hit["_source"].get('Company')
            date = hit["_source"].get('Date')
            last_trade = hit["_source"].get('Close/Last')
            volume = hit["_source"].get('Volume')
            
            open = hit["_source"].get('Open')
            high = hit["_source"].get('High')
            low = hit["_source"].get('Low')

            formatted_result = f"Company: {company}\nDate: {date}\nClose/Last: {last_trade}\nVolume: {volume}\nOpen: {open}\nHigh: {high}\nLow: {low}\n\n"
            
            formatted_result = formatted_result.replace('\n', '<br>')

            results.append(formatted_result)
        
        else:

            break

    return results


In [None]:
app = Flask(__name__)

@app.route('/', methods=['GET', 'POST'])
def homepage():

    global search_history

    results = []

    search_executed = False

    show_search_history = False

    clear_search_history = False

    show_news_form = False

    checked_state = {'cnbc' : False, 'the_guardian' : False, 'reuters' : False}

    show_historical_data_form = False

    if request.method == 'POST':

        if 'news' in request.form:

            show_news_form = True
        
        elif 'news_search' in request.form:

            query = request.form.get('search_query')
            selected_sentiment = request.form.get('sentiment')

            if query:
                
                search_history.append(query) 
            
            search_executed = True

            datasets_to_search = list()
            index_names = list()

            if request.form.get('cnbc'):

                datasets_to_search.append(cnbc_documents)
                index_names.append("cnbc_index")
                checked_state['cnbc'] = True
                
            
            if request.form.get('the_guardian'):

                datasets_to_search.append(guardian_documents)
                index_names.append("guardian_index")
                checked_state['the_guardian'] = True
            
            if request.form.get('reuters'):

                datasets_to_search.append(reuters_documents)
                index_names.append("reuters_index")
                checked_state['reuters'] = True


            results = search(query, datasets_to_search, index_names)


            if selected_sentiment != 'all':

                filtered_results = []

                for result in results:

                    if selected_sentiment in result:

                        filtered_results.append(result)
                
                results = filtered_results
        

        if 'historical_data' in request.form:

            show_historical_data_form = True
        
        elif "historical_data_search" in request.form:

            query = request.form.get("historical_search_query")

            if query:
                
                search_history.append(query) 

            search_executed = True

            results = search_tech_companies(query)
        

        if 'show_history' in request.form:

            show_search_history = True
        

        if 'clear_history' in request.form:

            clear_search_history = True
            search_history.clear()

    return render_template('test.html', results=results, checked_state=checked_state, search_executed=search_executed, show_news_form=show_news_form, 
                           show_historical_data_form=show_historical_data_form, search_history=search_history, show_search_history=show_search_history, 
                           clear_search_history=clear_search_history)

def search(query, datasets_to_search, index_names):
    
    return search_query(query, datasets_to_search, index_names)


app.run(debug=False)
