In [None]:
from yahooquery import Ticker
import yfinance as yf
import pandas as pd
from datetime import datetime, date, timedelta
import warnings
from collections import defaultdict
import time


warnings.filterwarnings("ignore", category=FutureWarning, module="yahooquery")

def get_raw_yahoo(api, symbol_list, start_date):
    try:
        # To avoid time zone issue, end_date is set to the date of tomorrow
        end_date = date.today() + timedelta(days=1)
        
        if api=="yahooquery":                 
            # Attempt to fetch data from Yahoo Finance
            data = Ticker(symbol_list)            
            # Fetch historical data
            hist_data = data.history(start=start_date, end=end_date, interval="1d")
            hist_data = hist_data.reset_index()
            # print(hist_data)
        else:   
            # Fetch historical data
            hist_data = yf.download(
                tickers=symbol_list,
                start=start_date,
                end=end_date,
                interval="1d",
                group_by="ticker",  # Keep data grouped by ticker if multiple symbol_list are provided
                auto_adjust=False,  # Keep original values without adjustment
            )
            
            # Check if the data needs reshaping for multiple tickers
            hist_data = hist_data.stack(level=0, future_stack=True).reset_index()    
            hist_data.rename(columns={"Date": "date", "Ticker":"symbol", "Open": "open", "High": "high", 
                                      "Low": "low", "Close": "close", "Volume": "volume"}, inplace=True)
        
        # print(hist_data)
        # # Select only required columns
        columns_to_select = ["date", "symbol", "open", "high", "low", "close", "volume"]
        hist_data = hist_data[columns_to_select]
        # date could be 2012-1-1 or 2012-1-1 12:33:55.000 +5:00:00. Such mixed formats will cause problems
        # The following is to convert date to string then only get the left 10 chars for date
        hist_data["date"] = hist_data["date"].astype(str).str.slice(0, 10)
        hist_data["date"] = pd.to_datetime(hist_data["date"])
        
        # Add a new field for the current datetime
        # hist_data["import_time"] = pd.to_datetime(import_time).tz_localize(None)

        # Return the processed data
        return hist_data
    
    except Exception as e:
        # Handle any exception that occurred during the execution
        print(f"An error occurred: {e}, {symbol_list}, {start_date}")
        
        # Return an empty DataFrame with the same columns as the expected result
        return pd.DataFrame(columns=["date", "symbol", "open", "high", "low", "close", "volume"]) 

In [None]:
# symbol_list=['DBMF', 'DGS']
# get_raw_yahoo('yahooquery',symbol_list, '2024-11-15')

In [47]:
from collections import defaultdict
import time
import random

def get_raw_yahoo_by_looping_groups(api, group_date_symbol_list):

    largest_group_id = max(group_date_symbol_list, key=lambda x: x[0])[0]
    # Initialize a defaultdict to store the symbol_list for each (group_date, group_id)
    grouped_symbol_list = defaultdict(list)
    
    # Iterate over the data to group symbol_list by (group_date, group_id)
    for group_id, group_start_date, symbol in group_date_symbol_list:
        # Use a tuple of (group_date, group_id) as the key and append the symbol to the list
        grouped_symbol_list[(group_id, group_start_date)].append(symbol)
    
    warnings.filterwarnings("ignore", category=FutureWarning, module="yahooquery")
    
    stacked_hist_panda_df=[]
    for group, group_symbol_list in grouped_symbol_list.items():
        group_id, group_start_date = group
        print(f"Working on {group_id}/{largest_group_id}")
        # print(f"Group Date: {group_start_date}, Group Number: {group_id}, symbol_list: {group_symbol_list}")
        hist_group_panda_df=get_raw_yahoo(api, group_symbol_list, group_start_date)    
        print(f"{len(hist_group_panda_df)} records in {group_id}/{largest_group_id}.")
        
        stacked_hist_panda_df.append(hist_group_panda_df)        
        sleep_time = random.randint(1, 5)
        print(f"Sleeping for {sleep_time} seconds...")
        time.sleep(sleep_time) 
        
    consolidated_hist_panda_df = pd.concat(stacked_hist_panda_df, ignore_index=True)
    
    return consolidated_hist_panda_df

In [48]:
from yahooquery import Ticker
import yfinance as yf
import pandas as pd
from datetime import datetime, date, timedelta
import warnings
from collections import defaultdict
import time


warnings.filterwarnings("ignore", category=FutureWarning, module="yahooquery")

def get_raw_yahoo(api, symbol_list, start_date):
    try:
        # To avoid time zone issue, end_date is set to the date of tomorrow
        end_date = date.today() + timedelta(days=1)
        
        if api=="yahooquery":                 
            # Attempt to fetch data from Yahoo Finance
            data = Ticker(symbol_list)            
            # Fetch historical data
            hist_data = data.history(start=start_date, end=end_date, interval="1d")
            hist_data = hist_data.reset_index()
            # print(hist_data)
        else:   
            # Fetch historical data
            hist_data = yf.download(
                tickers=symbol_list,
                start=start_date,
                end=end_date,
                interval="1d",
                group_by="ticker",  # Keep data grouped by ticker if multiple symbol_list are provided
                auto_adjust=False,  # Keep original values without adjustment
            )
            
            # Check if the data needs reshaping for multiple tickers
            hist_data = hist_data.stack(level=0, future_stack=True).reset_index()    
            hist_data.rename(columns={"Date": "date", "Ticker":"symbol", "Open": "open", "High": "high", 
                                      "Low": "low", "Close": "close", "Volume": "volume"}, inplace=True)
        
        # print(hist_data)
        # # Select only required columns
        columns_to_select = ["date", "symbol", "open", "high", "low", "close", "volume"]
        hist_data = hist_data[columns_to_select]
        # date could be 2012-1-1 or 2012-1-1 12:33:55.000 +5:00:00. Such mixed formats will cause problems
        # The following is to convert date to string then only get the left 10 chars for date
        hist_data["date"] = hist_data["date"].astype(str).str.slice(0, 10)
        hist_data["date"] = pd.to_datetime(hist_data["date"])
        
        # Add a new field for the current datetime
        # hist_data["import_time"] = pd.to_datetime(import_time).tz_localize(None)

        # Return the processed data
        return hist_data
    
    except Exception as e:
        # Handle any exception that occurred during the execution
        print(f"An error occurred: {e}")
        
        # Return an empty DataFrame with the same columns as the expected result
        return pd.DataFrame(columns=["date", "symbol", "open", "high", "low", "close", "volume"])

In [49]:
import sys
import traceback
import pandas as pd
from yahooquery import Ticker

# symbol_list to fetch
symbol_list = ["OXSQ", "C"]

def get_market_data(symbol_list):
    try:
        # Fetch data using yahooquery
        t = Ticker(symbol_list)
        prices = t.price  # Access price data

        # Extract data into a dictionary
        market_data = {
            symbol: {
                "pre_market_time": details.get("preMarketTime", "NULL"),
                "pre_market_price": details.get("preMarketPrice", "NULL"),
                "pre_market_change": details.get("preMarketChange", "NULL"),
                "pre_market_change_percent": details.get("preMarketChangePercent", "NULL"),
                "regular_market_time": details.get("regularMarketTime", "NULL"),
                "regular_market_price": details.get("regularMarketPrice", "NULL"),
                "regular_market_change": details.get("regularMarketChange", "NULL"),
                "regular_market_change_percent": details.get("regularMarketChangePercent", "NULL"),
                "post_market_time": details.get("postMarketTime", "NULL"),
                "post_market_price": details.get("postMarketPrice", "NULL"),
                "post_market_change": details.get("postMarketChange", "NULL"),
                "post_market_change_percent": details.get("postMarketChangePercent", "NULL"),
            }
            for symbol, details in prices.items()
        }

        # Convert the dictionary to a Pandas DataFrame
        market_panda_df = pd.DataFrame.from_dict(market_data, orient="index")
        market_panda_df.index.name = "symbol"  # Set the index name to "Symbol"
        market_panda_df = market_panda_df.reset_index()
        return market_panda_df

    except Exception as err:
        # Handle exceptions gracefully
        exc_type, exc_value, exc_traceback = sys.exc_info()
        except_message = repr(traceback.format_exception(exc_type, exc_value, exc_traceback))
        message = f"Error(-2): Unable to fetch data. <symbol_list: {symbol_list}> <Exception: {except_message}>"
        print(message)
        return None

# Get pre-market data as a Pandas DataFrame
market_panda_df = get_market_data(symbol_list)

if market_panda_df is not None:
    print("Pre-Market Data DataFrame:")
    print(market_panda_df)


Pre-Market Data DataFrame:
  symbol      pre_market_time  pre_market_price  pre_market_change  \
0   OXSQ  2025-01-28 08:29:44            2.6503          -0.009700   
1      C  2025-01-28 08:29:56           81.5500           0.480003   

   pre_market_change_percent  regular_market_time  regular_market_price  \
0                  -0.003647  2025-01-28 15:00:00                  2.64   
1                   0.005921  2025-01-28 15:00:02                 79.94   

   regular_market_change  regular_market_change_percent     post_market_time  \
0                  -0.02                      -0.007519  2025-01-28 18:03:20   
1                  -1.13                      -0.013938  2025-01-28 18:58:05   

   post_market_price  post_market_change  post_market_change_percent  
0             2.6500            0.010000                    0.003788  
1            79.7601           -0.179901                   -0.002250  


In [53]:

def get_market_records(symbol_list: list) -> pd.DataFrame:
    """
    Fetch instant market data for a list of symbols.

    Args:
        symbol_list (list): List of stock symbols.

    Returns:
        pd.DataFrame: DataFrame containing instant market data.
    """
    try:
        # Fetch data using yahooquery
        t = Ticker(symbol_list)
        prices = t.price  # Access price data

        # Extract data into a dictionary
        market_data = {
            symbol: {
                "pre_market_time": details.get("preMarketTime", "NULL"),
                "pre_market_price": details.get("preMarketPrice", "NULL"),
                "pre_market_change": details.get("preMarketChange", "NULL"),
                "pre_market_change_percent": details.get("preMarketChangePercent", "NULL"),
                "regular_market_time": details.get("regularMarketTime", "NULL"),
                "regular_market_price": details.get("regularMarketPrice", "NULL"),
                "regular_market_change": details.get("regularMarketChange", "NULL"),
                "regular_market_change_percent": details.get("regularMarketChangePercent", "NULL"),
                "post_market_time": details.get("postMarketTime", "NULL"),
                "post_market_price": details.get("postMarketPrice", "NULL"),
                "post_market_change": details.get("postMarketChange", "NULL"),
                "post_market_change_percent": details.get("postMarketChangePercent", "NULL"),
            }
            for symbol, details in prices.items()
        }

        # Convert the dictionary to a Pandas DataFrame
        market_panda_df = pd.DataFrame.from_dict(market_data, orient="index")
        market_panda_df.index.name = "symbol"  # Set the index name to "symbol"
        market_panda_df = market_panda_df.reset_index()
        return market_panda_df

    except Exception as e:
        # Handle exceptions and print error message
        print(f"An error occurred: {e}")
        traceback.print_exc()
        return pd.DataFrame()  # Return an empty DataFrame on error

symbol_list=["C","BAC"]
get_market_records(symbol_list)

Unnamed: 0,symbol,pre_market_time,pre_market_price,pre_market_change,pre_market_change_percent,regular_market_time,regular_market_price,regular_market_change,regular_market_change_percent,post_market_time,post_market_price,post_market_change,post_market_change_percent
0,C,2025-01-28 08:29:56,81.55,0.480003,0.005921,2025-01-28 15:00:02,79.94,-1.13,-0.013938,2025-01-28 18:58:05,79.7601,-0.179901,-0.00225
1,BAC,2025-01-28 08:29:10,47.075,-0.014999,-0.000319,2025-01-28 15:00:02,46.84,-0.25,-0.005309,2025-01-28 18:59:31,46.83,-0.009998,-0.000213


In [51]:
def get_market_records_consolidated(group_symbol_list: list) -> pd.DataFrame:
    """
    Fetch and consolidate EOD records for grouped symbols.

    Args:
        group_date_symbol_list (list): List of tuples containing (group_id, group_start_date, symbol).

    Returns:
        pd.DataFrame: Consolidated DataFrame of EOD records.
    """
    try:
        largest_group_id = max(group_symbol_list, key=lambda x: x[0])[0]
        grouped_symbols = defaultdict(list)
        
        # Group symbols by (group_id, group_start_date)
        for group_id, symbol in group_symbol_list:
            grouped_symbols[(group_id)].append(symbol)
        
        stacked_market_group_panda_dfs = []
        for group_id, group_symbols in grouped_symbols.items():
            print(f"Processing group {group_id}/{largest_group_id} with {len(group_symbols)} symbols...")
            hist_group_panda_df = get_market_records(group_symbols)
            print(f"Retrieved {len(hist_group_panda_df)} records for group {group_id}/{largest_group_id}.")
            
            stacked_market_group_panda_dfs.append(hist_group_panda_df)
            sleep_time = random.randint(1, 5)
            print(f"Sleeping for {sleep_time} seconds...")
            time.sleep(sleep_time)
        
        # Combine all DataFrames into one
        consolidated_market_panda_df = pd.concat(stacked_market_group_panda_dfs, ignore_index=True)
        return consolidated_market_panda_df
    
    except Exception as e:
        # Handle exceptions and return an empty DataFrame
        print(f"An error occurred: {e}")
        traceback.print_exc()
        return pd.DataFrame(columns=["symbol", "pre_market_time", "pre_market_price", "pre_market_change", \
                                     "pre_market_change_percent", "regular_market_time", "regular_market_price", \
                                     "regular_market_change", "regular_market_change_percent", "post_market_time", \
                                     "post_market_price", "post_market_change", "post_market_change_percent"])



In [52]:
group_symbol_list=[(1,'AADR'), (1,'AAXJ'), (2,'ACWI'), (2,'ACWV')]
get_market_records_consolidated(group_symbol_list)

Processing group 1/2 with 2 symbols...
Retrieved 2 records for group 1/2.
Sleeping for 4 seconds...
Processing group 2/2 with 2 symbols...
Retrieved 2 records for group 2/2.
Sleeping for 1 seconds...


Unnamed: 0,symbol,pre_market_time,pre_market_price,pre_market_change,pre_market_change_percent,regular_market_time,regular_market_price,regular_market_change,regular_market_change_percent,post_market_time,post_market_price,post_market_change,post_market_change_percent
0,AADR,,,,,2025-01-28 15:00:00,73.8983,0.8983,0.012305,,,,
1,AAXJ,,,,,2025-01-28 15:00:01,72.21,0.610001,0.00852,2025-01-28 15:00:08,72.21,0.0,0.0
2,ACWI,2025-01-28 07:47:03,120.6,0.0,0.0,2025-01-28 15:00:01,121.4,0.800003,0.006634,2025-01-28 18:51:40,121.52,0.119995,0.000988
3,ACWV,2025-01-28 08:13:38,110.05,-1.98,-0.017674,2025-01-28 15:00:00,111.56,-0.470001,-0.004195,,,,
