In [6]:
%pip install yfinance pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import yfinance as yf
import pandas as pd
import time
from datetime import datetime, timedelta
def fetch_historical_data(ticker,start_date,end_date,retries=3,delay=1):
    """Fetch historical stock data from Yahoo Finance.
    ARGS:
        ticker (str): Stock ticker symbol.
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.
        RETURNS:
        pd.DataFrame: DataFrame containing historical stock data."""
    
    for attempt in range(retries):
        try:
           #getting stock data
           stock=yf.Ticker(ticker)
           #Fetch historical data
           hist=stock.history(start=start_date,end=end_date)

           # check if data is empty
           if hist.empty:
               print(f"No data found for {ticker} between {start_date} and {end_date}.")
               if attempt < retries -1:
                   print(f"Retrying...({attempt + 1}/{retries})")
                   time.sleep(delay)
                   continue
               else:
                   None
           return hist
        
        except Exception as e:
            print(f"‚ùå Error fetching {ticker} data: {str(e)}")
            
            if attempt < retries - 1:
                print(f"Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                print(f"All retry attempts failed. Generating sample data for {ticker}")
                return None
    
    return None



In [8]:
fetch_historical_data('AAPL', '2023-11-01', '2024-11-30')

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-11-01 00:00:00-04:00,169.177288,172.372854,168.306663,172.115631,56934900,0.0,0.0
2023-11-02 00:00:00-04:00,173.649140,175.885045,173.589782,175.677292,77334800,0.0,0.0
2023-11-03 00:00:00-04:00,172.382789,174.935291,171.502276,174.767090,79829200,0.0,0.0
2023-11-06 00:00:00-05:00,174.499967,177.517445,174.331781,177.319580,63841300,0.0,0.0
2023-11-07 00:00:00-05:00,177.270114,180.495375,177.062361,179.881989,70530000,0.0,0.0
...,...,...,...,...,...,...,...
2024-11-22 00:00:00-05:00,227.033174,229.681201,227.033174,228.835022,38168300,0.0,0.0
2024-11-25 00:00:00-05:00,230.417878,232.199812,228.705621,231.821518,90152800,0.0,0.0
2024-11-26 00:00:00-05:00,232.279457,234.509377,232.279457,234.001663,45986200,0.0,0.0
2024-11-27 00:00:00-05:00,233.414333,234.628841,232.757301,233.872253,33498400,0.0,0.0


In [4]:
# Test with 2024 data
result = fetch_historical_data('AAPL', '2024-10-25', '2024-10-30')
print(f"Result type: {type(result)}")
if result is not None:
    print(f"Shape: {result.shape}")
    print(result.head(2))
else:
    print("No data returned")

Result type: <class 'pandas.core.frame.DataFrame'>
Shape: (3, 7)
                                 Open        High         Low       Close  \
Date                                                                        
2024-10-25 00:00:00-04:00  228.454277  231.914797  228.285230  230.114929   
2024-10-28 00:00:00-04:00  232.014242  233.416339  231.248547  232.093781   

                             Volume  Dividends  Stock Splits  
Date                                                          
2024-10-25 00:00:00-04:00  38802300        0.0           0.0  
2024-10-28 00:00:00-04:00  36087100        0.0           0.0  


In [6]:
%pip install Flask

import sys
import os

# Try to locate a parent folder that contains the 'Data' package and add it to sys.path
def ensure_data_package_in_path(start_dir=None):
	if start_dir is None:
		start_dir = os.getcwd()
	cur = os.path.abspath(start_dir)
	root = os.path.abspath(os.sep)
	while True:
		if os.path.isdir(os.path.join(cur, 'Data')):
			if cur not in sys.path:
				sys.path.insert(0, cur)
			return True
		if cur == root:
			return False
		cur = os.path.dirname(cur)

# First attempt from current working directory; if not found try a few levels up relative to this notebook location
if not ensure_data_package_in_path():
	# common repo layout: notebook may live several directories deep; try a couple of parents
	tried = False
	for up in range(1, 5):
		candidate = os.path.abspath(os.path.join(os.getcwd(), *(['..'] * up)))
		if ensure_data_package_in_path(candidate):
			tried = True
			break
	if not tried and not ensure_data_package_in_path():
		raise ModuleNotFoundError(
			"Could not find 'Data' directory in parent folders. Ensure the 'Data' package (with HistoricalDataAPI.py) "
			"exists in a parent directory of this notebook."
		)

from Data.HistoricalDataAPI import app

app.run(debug=True, host='0.0.0.0', port=5000)

Note: you may need to restart the kernel to use updated packages.


ModuleNotFoundError: Could not find 'Data' directory in parent folders. Ensure the 'Data' package (with HistoricalDataAPI.py) exists in a parent directory of this notebook.

## Test Complete Data Pipeline

Now let's test the complete pipeline including JSON conversion

In [9]:
# Test 1: Import and test YahooData module
import sys
sys.path.insert(0, r'C:\Users\duway\OneDrive\Documentos\GitHub\web103_Capstone_FinRus\server\Data')

from YahooData import fetch_historical_data, fetch_multiple_tickers, get_recent_data
import json

# Test single ticker
print("=" * 60)
print("Test 1: Single Ticker Fetch")
print("=" * 60)

result = fetch_historical_data('AAPL', '2024-10-25', '2024-10-30')

if result and result.get('metadata', {}).get('success'):
    print(f"‚úÖ SUCCESS!")
    print(f"   Ticker: {result['ticker']}")
    print(f"   Data points: {result['metadata']['data_points']}")
    print(f"   Cached: {result['metadata'].get('cached', False)}")
    print(f"\nüìä Sample data (first 2 days):")
    for day in result['data'][:2]:
        print(f"   {day['Date']}: Close=${day['Close']}, Volume={day['Volume']:,}")
    
    # Test JSON serialization
    json_str = json.dumps(result, indent=2)
    print(f"\n‚úÖ JSON serialization successful ({len(json_str):,} chars)")
else:
    print("‚ùå FAILED:", result.get('metadata', {}).get('error', 'Unknown error') if result else 'No result')

print("\n" + result['ticker'] if result else "")

2025-11-11 14:59:48,917 - YahooData - INFO - Fetching data for AAPL from 2024-10-25 to 2024-10-30
2025-11-11 14:59:48,942 - YahooData - INFO - üíæ Cached data for AAPL
2025-11-11 14:59:48,944 - YahooData - INFO - ‚úÖ Successfully fetched 3 data points for AAPL


Test 1: Single Ticker Fetch
‚úÖ SUCCESS!
   Ticker: AAPL
   Data points: 3
   Cached: False

üìä Sample data (first 2 days):
   2024-10-25: Close=$230.11, Volume=38,802,300
   2024-10-28: Close=$232.09, Volume=36,087,100

‚úÖ JSON serialization successful (989 chars)

AAPL


In [10]:
# Test 2: Multiple Tickers
print("=" * 60)
print("Test 2: Multiple Tickers Fetch")
print("=" * 60)

tickers = ['AAPL', 'GOOGL', 'MSFT']
results = fetch_multiple_tickers(tickers, '2024-10-28', '2024-10-30')

success_count = 0
for ticker, result in results.items():
    if result and result.get('metadata', {}).get('success'):
        success_count += 1
        print(f"‚úÖ {ticker}: {result['metadata']['data_points']} data points")
    else:
        error = result.get('metadata', {}).get('error', 'Unknown') if result else 'No result'
        print(f"‚ùå {ticker}: {error}")

print(f"\nüìä Success Rate: {success_count}/{len(tickers)} tickers")

2025-11-11 15:00:35,930 - YahooData - INFO - Processing ticker 1/3: AAPL
2025-11-11 15:00:35,931 - YahooData - INFO - Fetching data for AAPL from 2024-10-28 to 2024-10-30
2025-11-11 15:00:35,931 - YahooData - INFO - Fetching data for AAPL from 2024-10-28 to 2024-10-30


Test 2: Multiple Tickers Fetch


2025-11-11 15:00:36,207 - YahooData - INFO - üíæ Cached data for AAPL
2025-11-11 15:00:36,209 - YahooData - INFO - ‚úÖ Successfully fetched 2 data points for AAPL
2025-11-11 15:00:36,209 - YahooData - INFO - ‚úÖ Successfully fetched 2 data points for AAPL
2025-11-11 15:00:36,311 - YahooData - INFO - Processing ticker 2/3: GOOGL
2025-11-11 15:00:36,313 - YahooData - INFO - Fetching data for GOOGL from 2024-10-28 to 2024-10-30
2025-11-11 15:00:36,311 - YahooData - INFO - Processing ticker 2/3: GOOGL
2025-11-11 15:00:36,313 - YahooData - INFO - Fetching data for GOOGL from 2024-10-28 to 2024-10-30
2025-11-11 15:00:36,624 - YahooData - INFO - üíæ Cached data for GOOGL
2025-11-11 15:00:36,624 - YahooData - INFO - ‚úÖ Successfully fetched 2 data points for GOOGL
2025-11-11 15:00:36,624 - YahooData - INFO - üíæ Cached data for GOOGL
2025-11-11 15:00:36,624 - YahooData - INFO - ‚úÖ Successfully fetched 2 data points for GOOGL
2025-11-11 15:00:36,727 - YahooData - INFO - Processing ticker 3/

‚úÖ AAPL: 2 data points
‚úÖ GOOGL: 2 data points
‚úÖ MSFT: 2 data points

üìä Success Rate: 3/3 tickers


In [None]:
# Test 3: Start Flask API Server in Notebook
print("=" * 60)
print("Test 3: Flask API Server")
print("=" * 60)
print("Starting Flask API server...")
print("Once running, you can test endpoints at:")
print("  - http://localhost:5000/api/health")
print("  - http://localhost:5000/api/stock/AAPL?days_back=7")
print("  - http://localhost:5000/api/stock/multiple?tickers=AAPL,GOOGL")
print("\nPress Ctrl+C to stop the server")
print("=" * 60)

# Import and run Flask app
import sys
sys.path.insert(0, r'C:\Users\duway\OneDrive\Documentos\GitHub\web103_Capstone_FinRus\server\Data')

from HistoricalDataAPI import app

# Run the Flask app
# Note: This will block the notebook cell until stopped
app.run(debug=True, host='127.0.0.1', port=5000, use_reloader=False)