In [79]:
from yahooquery import Ticker
import yfinance as yf
import pandas as pd
from datetime import datetime, date, timedelta
import warnings
from collections import defaultdict
import time


warnings.filterwarnings("ignore", category=FutureWarning, module="yahooquery")

def get_raw_yahoo(api, symbol_list, start_date):
    try:
        # To avoid time zone issue, end_date is set to the date of tomorrow
        end_date = date.today() + timedelta(days=1)
        
        if api=="yahooquery":                 
            # Attempt to fetch data from Yahoo Finance
            data = Ticker(symbol_list)            
            # Fetch historical data
            hist_data = data.history(start=start_date, end=end_date, interval="1d")
            hist_data = hist_data.reset_index()
            # print(hist_data)
        else:   
            # Fetch historical data
            hist_data = yf.download(
                tickers=symbol_list,
                start=start_date,
                end=end_date,
                interval="1d",
                group_by="ticker",  # Keep data grouped by ticker if multiple symbol_list are provided
                auto_adjust=False,  # Keep original values without adjustment
            )
            
            # Check if the data needs reshaping for multiple tickers
            hist_data = hist_data.stack(level=0, future_stack=True).reset_index()    
            hist_data.rename(columns={"Date": "date", "Ticker":"symbol", "Open": "open", "High": "high", 
                                      "Low": "low", "Close": "close", "Volume": "volume"}, inplace=True)
        
        # print(hist_data)
        # # Select only required columns
        columns_to_select = ["date", "symbol", "open", "high", "low", "close", "volume"]
        hist_data = hist_data[columns_to_select]
        # date could be 2012-1-1 or 2012-1-1 12:33:55.000 +5:00:00. Such mixed formats will cause problems
        # The following is to convert date to string then only get the left 10 chars for date
        hist_data["date"] = hist_data["date"].astype(str).str.slice(0, 10)
        hist_data["date"] = pd.to_datetime(hist_data["date"])
        
        # Add a new field for the current datetime
        # hist_data["import_time"] = pd.to_datetime(import_time).tz_localize(None)

        # Return the processed data
        return hist_data
    
    except Exception as e:
        # Handle any exception that occurred during the execution
        print(f"An error occurred: {e}, {symbol_list}, {start_date}")
        
        # Return an empty DataFrame with the same columns as the expected result
        return pd.DataFrame(columns=["date", "symbol", "open", "high", "low", "close", "volume"]) 

In [None]:
# symbol_list=['DBMF', 'DGS']
# get_raw_yahoo('yahooquery',symbol_list, '2024-11-15')

In [47]:
from collections import defaultdict
import time
import random

def get_raw_yahoo_by_looping_groups(api, group_date_symbol_list):

    largest_group_id = max(group_date_symbol_list, key=lambda x: x[0])[0]
    # Initialize a defaultdict to store the symbol_list for each (group_date, group_id)
    grouped_symbol_list = defaultdict(list)
    
    # Iterate over the data to group symbol_list by (group_date, group_id)
    for group_id, group_start_date, symbol in group_date_symbol_list:
        # Use a tuple of (group_date, group_id) as the key and append the symbol to the list
        grouped_symbol_list[(group_id, group_start_date)].append(symbol)
    
    warnings.filterwarnings("ignore", category=FutureWarning, module="yahooquery")
    
    stacked_hist_panda_df=[]
    for group, group_symbol_list in grouped_symbol_list.items():
        group_id, group_start_date = group
        print(f"Working on {group_id}/{largest_group_id}")
        # print(f"Group Date: {group_start_date}, Group Number: {group_id}, symbol_list: {group_symbol_list}")
        hist_group_panda_df=get_raw_yahoo(api, group_symbol_list, group_start_date)    
        print(f"{len(hist_group_panda_df)} records in {group_id}/{largest_group_id}.")
        
        stacked_hist_panda_df.append(hist_group_panda_df)        
        sleep_time = random.randint(1, 5)
        print(f"Sleeping for {sleep_time} seconds...")
        time.sleep(sleep_time) 
        
    consolidated_hist_panda_df = pd.concat(stacked_hist_panda_df, ignore_index=True)
    
    return consolidated_hist_panda_df

In [48]:
from yahooquery import Ticker
import yfinance as yf
import pandas as pd
from datetime import datetime, date, timedelta
import warnings
from collections import defaultdict
import time


warnings.filterwarnings("ignore", category=FutureWarning, module="yahooquery")

def get_raw_yahoo(api, symbol_list, start_date):
    try:
        # To avoid time zone issue, end_date is set to the date of tomorrow
        end_date = date.today() + timedelta(days=1)
        
        if api=="yahooquery":                 
            # Attempt to fetch data from Yahoo Finance
            data = Ticker(symbol_list)            
            # Fetch historical data
            hist_data = data.history(start=start_date, end=end_date, interval="1d")
            hist_data = hist_data.reset_index()
            # print(hist_data)
        else:   
            # Fetch historical data
            hist_data = yf.download(
                tickers=symbol_list,
                start=start_date,
                end=end_date,
                interval="1d",
                group_by="ticker",  # Keep data grouped by ticker if multiple symbol_list are provided
                auto_adjust=False,  # Keep original values without adjustment
            )
            
            # Check if the data needs reshaping for multiple tickers
            hist_data = hist_data.stack(level=0, future_stack=True).reset_index()    
            hist_data.rename(columns={"Date": "date", "Ticker":"symbol", "Open": "open", "High": "high", 
                                      "Low": "low", "Close": "close", "Volume": "volume"}, inplace=True)
        
        # print(hist_data)
        # # Select only required columns
        columns_to_select = ["date", "symbol", "open", "high", "low", "close", "volume"]
        hist_data = hist_data[columns_to_select]
        # date could be 2012-1-1 or 2012-1-1 12:33:55.000 +5:00:00. Such mixed formats will cause problems
        # The following is to convert date to string then only get the left 10 chars for date
        hist_data["date"] = hist_data["date"].astype(str).str.slice(0, 10)
        hist_data["date"] = pd.to_datetime(hist_data["date"])
        
        # Add a new field for the current datetime
        # hist_data["import_time"] = pd.to_datetime(import_time).tz_localize(None)

        # Return the processed data
        return hist_data
    
    except Exception as e:
        # Handle any exception that occurred during the execution
        print(f"An error occurred: {e}")
        
        # Return an empty DataFrame with the same columns as the expected result
        return pd.DataFrame(columns=["date", "symbol", "open", "high", "low", "close", "volume"])

In [82]:

def get_market_quote(symbol_list: list) -> pd.DataFrame:
    """
    Fetch instant market data for a list of symbols.

    Args:
        symbol_list (list): List of stock symbols.

    Returns:
        pd.DataFrame: DataFrame containing instant market data.
    """
    try:
        # Fetch data using yahooquery
        t = Ticker(symbol_list)
        prices = t.price  # Access price data

        # Extract data into a dictionary
        market_data = {
            symbol: {
                "exchange_name": details.get("exchangeName", "NULL"),
                "current_market_state": details.get("marketState", "NULL"),
                "pre_market_time": details.get("preMarketTime", "NULL"),
                "pre_market_price": details.get("preMarketPrice", "NULL"),
                "pre_market_change": details.get("preMarketChange", "NULL"),
                "pre_market_change_percent": details.get("preMarketChangePercent", "NULL"),
                "regular_market_time": details.get("regularMarketTime", "NULL"),
                "regular_market_price": details.get("regularMarketPrice", "NULL"),
                "regular_market_change": details.get("regularMarketChange", "NULL"),
                "regular_market_change_percent": details.get("regularMarketChangePercent", "NULL"),
                "post_market_time": details.get("postMarketTime", "NULL"),
                "post_market_price": details.get("postMarketPrice", "NULL"),
                "post_market_change": details.get("postMarketChange", "NULL"),
                "post_market_change_percent": details.get("postMarketChangePercent", "NULL"),
            }
            for symbol, details in prices.items()
        }

        # Convert the dictionary to a Pandas DataFrame
        market_panda_df = pd.DataFrame.from_dict(market_data, orient="index")
        market_panda_df.index.name = "symbol"  # Set the index name to "symbol"
        market_panda_df = market_panda_df.reset_index()
        return market_panda_df

    except Exception as e:
        # Handle exceptions and print error message
        print(f"An error occurred: {e}")
        traceback.print_exc()
        return pd.DataFrame()  # Return an empty DataFrame on error

# symbol_list=["C","AAXJ"]
# get_market_records(symbol_list)

In [83]:
def get_market_quote_consolidated(group_symbol_list: list) -> pd.DataFrame:
    """
    Fetch and consolidate EOD records for grouped symbols.

    Args:
        group_date_symbol_list (list): List of tuples containing (group_id, group_start_date, symbol).

    Returns:
        pd.DataFrame: Consolidated DataFrame of EOD records.
    """
    try:
        largest_group_id = max(group_symbol_list, key=lambda x: x[0])[0]
        grouped_symbols = defaultdict(list)
        
        # Group symbols by (group_id, group_start_date)
        for group_id, symbol in group_symbol_list:
            grouped_symbols[(group_id)].append(symbol)
        
        stacked_group_panda_dfs = []
        for group_id, group_symbols in grouped_symbols.items():
            print(f"Processing group {group_id}/{largest_group_id} with {len(group_symbols)} symbols...")
            group_panda_df = get_market_quote(group_symbols)
            print(f"Retrieved {len(group_panda_df)} records for group {group_id}/{largest_group_id}.")
            
            stacked_group_panda_dfs.append(group_panda_df)
            sleep_time = random.randint(1, 5)
            print(f"Sleeping for {sleep_time} seconds...")
            time.sleep(sleep_time)
        
        # Combine all DataFrames into one
        consolidated_market_panda_df = pd.concat(stacked_group_panda_dfs, ignore_index=True)
        return consolidated_market_panda_df
    
    except Exception as e:
        # Handle exceptions and return an empty DataFrame
        print(f"An error occurred: {e}")
        traceback.print_exc()
        return pd.DataFrame(columns=["symbol", "exchange_name", "current_market_state", "pre_market_time", "pre_market_price", "pre_market_change", \
                                     "pre_market_change_percent", "regular_market_time", "regular_market_price", \
                                     "regular_market_change", "regular_market_change_percent", "post_market_time", \
                                     "post_market_price", "post_market_change", "post_market_change_percent"])



In [85]:
# group_symbol_list=[(1,'C'), (1,'AAXJ'), (2,'ACWI'), (2,'ACWV')]
# get_market_quote_consolidated(group_symbol_list)

Processing group 1/2 with 2 symbols...
Retrieved 2 records for group 1/2.
Sleeping for 4 seconds...
Processing group 2/2 with 2 symbols...
Retrieved 2 records for group 2/2.
Sleeping for 3 seconds...


Unnamed: 0,symbol,exchange_name,current_market_state,pre_market_time,pre_market_price,pre_market_change,pre_market_change_percent,regular_market_time,regular_market_price,regular_market_change,regular_market_change_percent,post_market_time,post_market_price,post_market_change,post_market_change_percent
0,C,NYSE,POSTPOST,2025-01-29 08:29:59,79.78,-0.160004,-0.002002,2025-01-29 15:00:01,80.63,0.689995,0.008631,2025-01-29 18:59:21,81.1,0.470001,0.005829
1,AAXJ,NasdaqGM,POSTPOST,,,,,2025-01-29 15:00:01,72.23,0.020004,0.000277,,,,
2,ACWI,NasdaqGM,POSTPOST,2025-01-29 07:55:53,121.47,0.07,0.000577,2025-01-29 15:00:02,121.04,-0.360001,-0.002965,2025-01-29 16:00:04,122.8,1.76,0.014541
3,ACWV,Cboe US,POSTPOST,,,,,2025-01-29 15:00:00,111.62,0.060005,0.000538,2025-01-29 15:07:12,108.14,-3.49,-0.031264


In [89]:
x=[(1,'2004-01-01'), (1,'AAXJ'), (2,'ACWI'), (2,'ACWV')]
for i in range(len(x[0])):
    print(type(x[0][i]))


<class 'int'>
<class 'str'>
