# INVESTOR INTELLIGENCE AGENT - COMPANY INFORMATION RETRIEVAL

## TASK:

1. Retrieve Background of Company
2. Retrieve Stock Price of Company (Image?)
3. Retrieve Financial Metrics of Company
4. Time series analysis (Revenue, profit margins)


In [None]:
# Please uncomment and press shift+enter to install some requirements
# !pip install rpa 
# !pip install yfinance

---

In [None]:
# import rpa as t for the new vision of TagUI same function just different name
import tagui as t
import re
import yfinance as yf
from datetime import datetime
import datetime

In [None]:
t.close()

In [None]:
# Snap and save Stock Price Chart
def chart_snapshot(ticker, chart_duration):

    for duration in chart_duration:
        t.click('//button[contains(text(), "' + duration + '")]') # click duration
        formatted_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
        t.snap('//canvas[contains(@aria-label,"trendArea chart")]', ticker + '/' + duration + '_chart_' + formatted_time + '.png')

    return 

# # Example usage:
# chart_duration = ["1d", "1y"] # Options of "1d", "5d", "1m", "6m", "YTD", "1y", "5y", and "Max". Default set to 1 day and 1 year charts
# chart_snapshot(chart_duration)

In [None]:
# Extract Company Information from Yahoo Finance

def extract_company_info_yfinance(company, chart_duration):
    try:
        # Step 1: to access yahoo finance webpage and key in company of interest
        t.init(visual_automation = True) # visual automation if keyboard automation required in subsequent code
        t.url('https://sg.finance.yahoo.com/') # go to google finance website
        t.click('//*[@id="yfin-usr-qry"]') # click on search bar.
        t.type('//*[@id="yfin-usr-qry"]',  company+'[enter]') # search for company.
        # t.click('//*[contains(@data-id,"result-quotes-0")]') # click the top return company. if above enter does not work
        
        # Step 2: scrape from main summary page
        live_price = float(t.read('//*[@id="quote-header-info"]//*[contains(@data-field,"regularMarketPrice")]')) # reads live stock exchange price
        ticker = t.read('//*[@id="quote-header-info"]//h1').split("(")[1].split(".")[0].split(")")[0]
        stock_market = t.read('//*[@id="quote-header-info"]//div[contains(@class,"Fz(12px)")]/span').split("-")[0].split(" ")[0]
        # Split the string based on the word "Currency"
        split_string = t.read('//*[@id="quote-header-info"]//div[contains(@class,"Fz(12px)")]/span').split("Currency in ")
        currency = split_string[1].strip()  # Remove any leading or trailing spaces
        prev_close = t.read('//*[contains(@data-test,"left-summary-table")]//table/tbody/tr[1]/td[2]')
        open_price = t.read('//*[contains(@data-test,"left-summary-table")]//table/tbody/tr[2]/td[2]')
        trade_daily_volume = t.read('//*[contains(@data-test,"left-summary-table")]//table/tbody/tr[7]/td[2]')
        day_price_range = t.read('//*[contains(@data-test,"left-summary-table")]//table/tbody/tr[5]/td[2]')
        day_low = day_price_range.split(" - ")[0]
        day_high = day_price_range.split(" - ")[1]
        market_cap = t.read('//*[contains(@data-test,"right-summary-table")]//table/tbody/tr[1]/td[2]')
        pe_ratio_ttm = t.read('//*[contains(@data-test,"right-summary-table")]//table/tbody/tr[3]/td[2]')
        eps_ttm = t.read('//*[contains(@data-test,"right-summary-table")]//table/tbody/tr[4]/td[2]')
        chart_snapshot(ticker, chart_duration)
        
        # Step 3: scrape from Company Profile page
        t.click('//*[contains(@data-test,"COMPANY_PROFILE")]') # click the company profile tab. 
        Company_Full = t.read('//div[@id="Main"]//div/h3') # read full name of company. act as confirmation to user's request as the search name used may not be complete
        industry = t.read('//*[@id="Col1-0-Profile-Proxy"]//span[contains(@class,"Fw(600)")][2]') # read industry of company
        employee_count = t.read('//*[@id="Col1-0-Profile-Proxy"]//span[contains(@class,"Fw(600)")][3]') # read number of full-time employees of company.
        about_company = t.read('//*[@id="Col1-0-Profile-Proxy"]//*[contains(@class,"quote-sub-section")]/p') # read company description

        # Initialize a list to store top 5 key personnel's data (Name, Title, Pay, Exercised, Year Born, Age)
        key_personnel = []
        # Define the range for iterating through rows and columns
        for i in range(1, 6):  # Rows (personnel 1 to 5)
            personnel_data = []
            for j in range(1, 6):  # Columns (name, title, pay, exercised, year born)
                # Use TagUI to read the content of each cell
                cell_content = t.read('//table/tbody/tr[' + str(i) + ']/td[' + str(j) + ']')
                personnel_data.append(cell_content)  # Append cell content to personnel_data

            # Calculate age based on year born if available
            year_born = personnel_data[-1]  # Get the year born from the last column
            if not year_born.isdigit() or len(year_born) != 4:
                age = "N/A"
            else:
                year_born = int(year_born)
                current_year = datetime.datetime.now().year
                age = current_year - year_born
            # Append age as an additional column after the year born
            personnel_data.append(age)

            key_personnel.append(personnel_data)  # Append personnel_data to key_personnel

        # Step 4: Scrape from Statistics Tab page
        t.click('//*[contains(@data-test,"STATISTICS")]') # click the statistics tab. 
        fwd_ann_div_rate = t.read('//tr[td/span[contains(text(), "Forward annual dividend rate")]]/td[2]')
        fwd_ann_div_yield = t.read('//tr[td/span[contains(text(), "Forward annual dividend yield")]]/td[2]')
        trail_ann_div_rate = t.read('//tr[td/span[contains(text(), "Trailing annual dividend rate")]]/td[2]')
        trail_ann_div_yield = t.read('//tr[td/span[contains(text(), "Trailing annual dividend yield")]]/td[2]')      
    
        return (
            live_price, stock_market, ticker, currency, prev_close, open_price, trade_daily_volume, day_price_range, 
            day_low, day_high, market_cap, pe_ratio_ttm, eps_ttm, Company_Full, industry, employee_count, 
            fwd_ann_div_rate, fwd_ann_div_yield, trail_ann_div_rate, trail_ann_div_yield, about_company, 
            key_personnel 
        )
    
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None, None, None, None, None, None

In [None]:
## Export to Excel as Stock Portfolio
import datetime
from openpyxl import Workbook, load_workbook

# Function to create or load workbook and add data
def update_excel(username, company_info):
    # Define file path based on username
    file_path = f"{username}_stock_profile.xlsx"

    try:
        # Load existing workbook
        workbook = load_workbook(file_path)
        worksheet = workbook.active
    except FileNotFoundError:
        # If workbook doesn't exist, create a new one
        workbook = Workbook()
        worksheet = workbook.active
        # Add headers to the first row
        header = ["Date", "Live Price", "Stock Market", "Ticker Symbol", "Currency", "Previous Close", 
                  "Open Price", "Trade Daily Volume", "Day Price Range", "Day Low", "Day High", "Market Cap", 
                  "P/E Ratio TTM", "EPS TTM", "Company Name", "Industry", "Employee Count", 
                  "Fwd Annual Dividend Rate", "Fwd Annual Dividend Yield", "Trailing Annual Dividend Rate", "Trailing Annual Dividend Yield",
                  "About Company"]
        # Add headers for key personnel
        for i in range(1, 6):
            header.extend([f"Key Personnel {i} Name", f"Key Personnel {i} Title", f"Key Personnel {i} Pay",
                           f"Key Personnel {i} Exercised", f"Key Personnel {i} Year Born", f"Key Personnel {i} Age"])
        worksheet.append(header)

    # Append data to the workbook
    # row_data = [datetime.datetime.now()] + list(company_info)
    # worksheet.append(row_data)
    # Flatten the nested list and convert elements to strings
    flatten_company_info = []
    for item in company_info:
        if isinstance(item, list):
            flatten_company_info.extend(map(str, item))
        else:
            flatten_company_info.append(str(item))
    # Append flattened data to the workbook
    # worksheet.append([datetime.datetime.now()] + flatten_company_info)
    row_data = [
        datetime.datetime.now(),
        *company_info[:21],  # First 20 elements are single-valued
    ]
    # Append key personnel data
    for personnel in company_info[21]:
        row_data.extend(personnel)

    # Extend row_data with empty strings if less than 5 key personnel are provided
    row_data.extend([''] * (30 - len(row_data))) 

    worksheet.append(row_data)
    

    # Save the workbook
    workbook.save(file_path)


In [None]:
# Example usage:
username = "ky"  # Define username
company = 'google' # User input company of interest
chart_duration = ["1d", "1y"] # set as default. Options of "1d", "5d", "1m", "6m", "YTD", "1y", "5y", and "Max".

company_info = extract_company_info_yfinance(company, chart_duration) # stores extracted information
if company_info[0]:  # Check if data is not None
    (
        live_price, stock_market, ticker_symbol, currency, prev_close, open_price, trade_daily_volume, day_price_range, 
        day_low, day_high, market_cap, pe_ratio_ttm, eps_ttm, Company_Full, industry, employee_count, about_company, 
        key_personnel, fwd_ann_div_rate, fwd_ann_div_yield, trail_ann_div_rate, trail_ann_div_yield
    ) = company_info
    # Process retrieved data
else:
    print("Company information retrieval failed.")

update_excel(username, company_info)

In [None]:
## Export to Excel as Stock Portfolio
import datetime
from openpyxl import Workbook, load_workbook

# Function to create or load workbook and add data
def update_excel(username, company_info):
    # Define file path based on username
    file_path = f"{username}_stock_profile.xlsx"

    try:
        # Load existing workbook
        workbook = load_workbook(file_path)
        worksheet = workbook.active
    except FileNotFoundError:
        # If workbook doesn't exist, create a new one
        workbook = Workbook()
        worksheet = workbook.active
        # Add headers to the first row
        worksheet.append(["Date", "Company", "Live Price", "Stock Market", "Ticker Symbol", "CEO", "Company Description"])

    # Append data to the workbook
    row_data = [datetime.datetime.now()] + list(company_info)
    worksheet.append(row_data)

    # Save the workbook
    workbook.save(file_path)

In [None]:
# # Extract Company Information from Google Finance

# def extract_company_info_gfinance(company):
#     try:
#         t.init(visual_automation = True) # visual automation if keyboard automation required in subsequent code
#         t.url('https://www.google.com/finance/?hl=en') # go to google finance website
#         t.click('//c-wiz[2]//input[2]') # click on search bar and wait. Did not work when use type directly as first word to be searched is always missing. 
#         t.wait(0.1) # introduced wait 0.1sec as per above comment to allow time before typing search
#         t.type('//c-wiz[2]//input[2]',  company+'[enter]') # search for company. enter does not work somehow
#         t.click('//*[@class="MkjOTb SAq8ff"][1]') # click the top return company. 

#         Company_Full = t.read('//*[contains(@class,"zzDege")]') # read full name of company. act as confirmation to user's request as the search name used may not be complete
#         live_price = t.read('//*[contains(@class,"YMlKec fxKbKc")]') # reads live stock exchange price
#         live_price = float(live_price.replace('$', ''))
#         Trade = t.read('//div[@class="PdOqHc"]') # reads stock market and ticker symbol used
#         stock_market = Trade.split(" • ")[1] # extracts stock market name
#         ticker = Trade.split(" • ")[0][slice(4,len(Trade.split(" • ")[0]))] # extracts ticker symbol used
#         ceo = t.read('//span[@class="w4txWc oJeWuf"]/div[2]//a[@class="tBHE4e"]') # reads CEO of company
#         about_company = t.read('//*[@class="bLLb2d"]')
        
#         currency = t.read('//*[@class="yNnsfe PFjsMe"]')
#         p_e_ratio = t.read('//*[@class="gyFHrc"][6]//*[@class="P6K39c"]')
        
    
#         return Company_Full, live_price, stock_market, ticker, ceo, about_company
    
#     except Exception as e:
#         print(f"Error occurred: {str(e)}")
#         return None, None, None, None, None, None

In [None]:
# # Snap and save Stock Price Chart
# chart_duration = ["1day", "1year"] # Options of "1day", "5day", "1month", "6month", "ytd", "1year", "5year", and "max". Default set to 1 day and 1 year charts
# for duration in chart_duration:
#     t.click('//*[@id="' + duration + 'Tab"]') # click duration
#     t.snap('//*[@class="ushogf"]',ticker_symbol + '/' + duration + '_chart_'+datetime.datetime.now().strftime("%Y%m%d_%H%M")+'.png') # snap  chart

# # Working Code derived to click 1 year tab and snap  1 year chart before using array of chart_duration
# # t.click('//*[@id="1yearTab"]') # click to display 1 year chart
# # t.snap('//*[@class="ushogf"]',ticker_symbol+'/'+'1Y_chart_'+datetime.datetime.now().strftime("%Y%m%d_%H%M")+'.png') # snap 1 year chart
