In [36]:
from yahooquery import Ticker
import yfinance as yf
import pandas as pd
from datetime import datetime, date, timedelta
import warnings
from collections import defaultdict
import time


warnings.filterwarnings("ignore", category=FutureWarning, module="yahooquery")

def get_raw_yahoo(api, symbol_list, start_date):
    try:
        # To avoid time zone issue, end_date is set to the date of tomorrow
        end_date = date.today() + timedelta(days=1)
        
        if api=="yahooquery":                 
            # Attempt to fetch data from Yahoo Finance
            data = Ticker(symbol_list)            
            # Fetch historical data
            hist_data = data.history(start=start_date, end=end_date, interval="1d")
            hist_data = hist_data.reset_index()
            # print(hist_data)
        else:   
            # Fetch historical data
            hist_data = yf.download(
                tickers=symbol_list,
                start=start_date,
                end=end_date,
                interval="1d",
                group_by="ticker",  # Keep data grouped by ticker if multiple symbols are provided
                auto_adjust=False,  # Keep original values without adjustment
            )
            
            # Check if the data needs reshaping for multiple tickers
            hist_data = hist_data.stack(level=0, future_stack=True).reset_index()    
            hist_data.rename(columns={"Date": "date", "Ticker":"symbol", "Open": "open", "High": "high", 
                                      "Low": "low", "Close": "close", "Volume": "volume"}, inplace=True)
        
        # print(hist_data)
        # # Select only required columns
        columns_to_select = ["date", "symbol", "open", "high", "low", "close", "volume"]
        hist_data = hist_data[columns_to_select]
        # date could be 2012-1-1 or 2012-1-1 12:33:55.000 +5:00:00. Such mixed formats will cause problems
        # The following is to convert date to string then only get the left 10 chars for date
        hist_data["date"] = hist_data["date"].astype(str).str.slice(0, 10)
        hist_data["date"] = pd.to_datetime(hist_data["date"])
        
        # Add a new field for the current datetime
        # hist_data["import_time"] = pd.to_datetime(import_time).tz_localize(None)

        # Return the processed data
        return hist_data
    
    except Exception as e:
        # Handle any exception that occurred during the execution
        print(f"An error occurred: {e}")
        
        # Return an empty DataFrame with the same columns as the expected result
        return pd.DataFrame(columns=["date", "symbol", "open", "high", "low", "close", "volume"]) 

In [35]:
# symbol_list=['IVA', 'TBLT', 'TRNO', 'IONM', 'EGRX', 'OCAX', 'MBNKP', 'BRKHU', 'BNIX', 'SRDX']
# get_raw_yahoo('yahooquery',symbol_list, '2024-11-15')

Unnamed: 0,date,symbol,open,high,low,close,volume
0,2024-11-15,IVA,2.750000,2.750000,2.631000,2.720000,12500
1,2024-11-18,IVA,2.640000,2.730000,2.630000,2.730000,19900
2,2024-11-19,IVA,2.630000,2.767000,2.630000,2.740000,17700
3,2024-11-20,IVA,2.700000,2.740000,2.630000,2.740000,6700
4,2024-11-21,IVA,2.640000,2.670000,2.510000,2.660000,14200
...,...,...,...,...,...,...,...
362,2025-01-03,SRDX,39.750000,39.980000,39.500000,39.520000,167500
363,2025-01-06,SRDX,39.509998,40.209999,39.509998,39.709999,224100
364,2025-01-07,SRDX,39.709999,40.070000,38.820000,38.980000,221400
365,2025-01-08,SRDX,38.950001,39.150002,38.669998,39.000000,236500


In [3]:
from collections import defaultdict
import time

def get_raw_yahoo_by_looping_groups(api, group_date_symbol_list):
   
    # Initialize a defaultdict to store the symbols for each (group_date, group_id)
    grouped_symbols = defaultdict(list)
    
    # Iterate over the data to group symbols by (group_date, group_id)
    for group_id, group_start_date, symbol in group_date_symbol_list:
        # Use a tuple of (group_date, group_id) as the key and append the symbol to the list
        grouped_symbols[(group_id, group_start_date)].append(symbol)
    
    warnings.filterwarnings("ignore", category=FutureWarning, module="yahooquery")
    
    hist_data_frames=[]
    for group, group_symbols in grouped_symbols.items():
        group_id, group_start_date = group
        # print(f"Group Date: {group_start_date}, Group Number: {group_id}, Symbols: {group_symbols}")
        hist_group_data_frame=get_raw_yahoo(api, group_symbols, group_start_date)    
        hist_data_frames.append(hist_group_data_frame)
        time.sleep(3)
        
    combined_hist_data = pd.concat(hist_data_frames, ignore_index=True)
    
    return combined_hist_data