In [3]:
import requests
import pandas as pd
from datetime import datetime
import os

def get_stock_data(symbol, function='TIME_SERIES_DAILY', output_size='compact'):
    """
    Fetch stock data from Alpha Vantage API
    
    Parameters:
    symbol (str): Stock symbol (e.g., 'IBM', 'AAPL')
    function (str): Alpha Vantage API function to use
    output_size (str): 'compact' returns latest 100 datapoints, 'full' returns all available data
    
    Returns:
    pandas.DataFrame: Stock data
    """
    # Load API key from environment variable
    api_key = os.getenv('ALPHA_VANTAGE_API_KEY')
    
    if not api_key:
        raise ValueError("Please set ALPHA_VANTAGE_API_KEY environment variable")
    
    base_url = 'https://www.alphavantage.co/query'
    params = {
        'function': function,
        'symbol': symbol,
        'apikey': api_key,
        'outputsize': output_size
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()
        
        # Extract time series data
        if function == 'TIME_SERIES_DAILY':
            time_series = data.get('Time Series (Daily)', {})
        else:
            raise ValueError(f"Function {function} not implemented in this example")
        
        # Convert to DataFrame
        df = pd.DataFrame.from_dict(time_series, orient='index')
        
        # Clean up column names and convert to numeric
        df.columns = [col.split('. ')[1] for col in df.columns]
        for col in df.columns:
            df[col] = pd.to_numeric(df[col])
            
        df.index = pd.to_datetime(df.index)
        df = df.sort_index()
        
        return df
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

# Example usage
if __name__ == "__main__":

    os.environ['ALPHA_VANTAGE_API_KEY'] = 'YOUR_API_KEY' 
    

    symbol = 'AAPL'  # Example stock symbol
    df = get_stock_data(symbol)
    
    if df is not None:
        print(f"\nLatest data for {symbol}:")
        print(df.tail())
        
        # Basic statistics
        print("\nSummary statistics:")
        print(df['close'].describe())


Latest data for AAPL:
               open    high      low   close    volume
2025-02-03  229.990  231.83  225.700  228.01  73063301
2025-02-04  227.250  233.13  226.650  232.80  45067301
2025-02-05  228.530  232.67  228.270  232.47  39664989
2025-02-06  231.285  233.80  230.425  233.22  29925349
2025-02-07  232.600  234.00  227.260  227.63  39707224

Summary statistics:
count    100.000000
mean     234.085400
std        9.828695
min      216.320000
25%      227.267500
50%      231.595000
75%      240.245000
max      259.020000
Name: close, dtype: float64




In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

def prepare_trading_data(symbol, lookback=10, prediction_days=1):
    """
    Prepare stock data for KNN trading model using only price data
    
    Parameters:
    symbol (str): Stock symbol
    lookback (int): Number of days to look back for features
    prediction_days (int): Number of days ahead to predict
    
    Returns:
    X: Feature matrix
    y: Labels
    """
    # Get the stock data using our previous function
    df = get_stock_data(symbol, output_size='full')
    
    # Calculate returns and price changes
    df['returns'] = df['close'].pct_change()
    df['high_low_ratio'] = df['high'] / df['low']
    df['close_open_ratio'] = df['close'] / df['open']
    df['volume_change'] = df['volume'].pct_change()
    
    feature_columns = []
    
    # Create lagged features
    for i in range(1, lookback + 1):
        # Price returns
        df[f'return_lag_{i}'] = df['returns'].shift(i)
        feature_columns.append(f'return_lag_{i}')
        
        # Daily volatility (high/low ratio)
        df[f'hl_ratio_lag_{i}'] = df['high_low_ratio'].shift(i)
        feature_columns.append(f'hl_ratio_lag_{i}')
        
        # Daily price movement (close/open ratio)
        df[f'co_ratio_lag_{i}'] = df['close_open_ratio'].shift(i)
        feature_columns.append(f'co_ratio_lag_{i}')
        
        # Volume changes
        df[f'volume_change_lag_{i}'] = df['volume_change'].shift(i)
        feature_columns.append(f'volume_change_lag_{i}')
    
    # Add rolling standard deviations for volatility
    windows = [5, 10, 20]
    for window in windows:
        df[f'return_std_{window}'] = df['returns'].rolling(window=window).std()
        feature_columns.append(f'return_std_{window}')
    
    # Create target variable (1 if price goes up after prediction_days, 0 otherwise)
    df['target'] = (df['close'].shift(-prediction_days) > df['close']).astype(int)
    
    # Drop rows with NaN values
    df = df.dropna()
    
    # Prepare features and target
    X = df[feature_columns]
    y = df['target']
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y, df

def train_knn_model(X, y, n_neighbors=5):
    """
    Train a KNN model for trading
    """
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False
    )
    
    # Create and train the model
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train, y_train)
    
    # Calculate accuracy
    train_accuracy = knn.score(X_train, y_train)
    test_accuracy = knn.score(X_test, y_test)
    
    return knn, X_train, X_test, y_train, y_test, train_accuracy, test_accuracy

def analyze_predictions(df, y_test, y_pred, test_start_idx):
    """
    Analyze the model's predictions
    """
    # Create a DataFrame with actual returns and predictions
    analysis_df = df.iloc[test_start_idx:].copy()
    analysis_df['actual_direction'] = y_test
    analysis_df['predicted_direction'] = y_pred
    
    # Calculate returns based on predictions
    analysis_df['next_day_return'] = analysis_df['close'].pct_change().shift(-1)
    analysis_df['strategy_return'] = analysis_df['next_day_return'] * analysis_df['predicted_direction']
    
    # Calculate cumulative returns
    analysis_df['cumulative_strategy_return'] = (1 + analysis_df['strategy_return']).cumprod()
    analysis_df['cumulative_market_return'] = (1 + analysis_df['next_day_return']).cumprod()
    
    return analysis_df

# Example usage
if __name__ == "__main__":
    symbol = 'AAPL'
    
    # Prepare data
    X, y, df = prepare_trading_data(symbol)
    
    # Train model
    model, X_train, X_test, y_train, y_test, train_acc, test_acc = train_knn_model(X, y)
    
    print(f"Train accuracy: {train_acc:.2f}")
    print(f"Test accuracy: {test_acc:.2f}")
    
    # Get predictions for test set
    y_pred = model.predict(X_test)
    
    # Analyze results
    test_start_idx = len(X) - len(X_test)
    analysis_df = analyze_predictions(df, y_test, y_pred, test_start_idx)
    
    # Print performance metrics
    strategy_return = analysis_df['cumulative_strategy_return'].iloc[-1] - 1
    market_return = analysis_df['cumulative_market_return'].iloc[-1] - 1
    
    print(f"\nStrategy Return: {strategy_return:.2%}")
    print(f"Market Return: {market_return:.2%}")

Train accuracy: 0.69
Test accuracy: 0.48

Strategy Return: nan%
Market Return: nan%


In [5]:
df.to_csv("data.csv")

In [1]:
import requests
import pandas as pd
from datetime import datetime
import time

def get_alpha_vantage_data(symbol, api_key, function='TIME_SERIES_DAILY', output_size='full'):
    """
    Fetch stock data from Alpha Vantage API
    
    Parameters:
    symbol (str): Stock symbol (e.g., 'AAPL' for Apple)
    api_key (str): Your Alpha Vantage API key
    function (str): Type of time series data to fetch
    output_size (str): 'compact' for latest 100 datapoints, 'full' for all available data
    
    Returns:
    pandas.DataFrame: Historical stock data
    """
    base_url = 'https://www.alphavantage.co/query'
    params = {
        'function': function,
        'symbol': symbol,
        'apikey': api_key,
        'outputsize': output_size
    }
    
    response = requests.get(base_url, params=params)
    data = response.json()
    
    # Check for error messages
    if "Error Message" in data:
        raise ValueError(f"API Error: {data['Error Message']}")
    
    # Extract time series data
    if function == 'TIME_SERIES_DAILY':
        time_series_key = 'Time Series (Daily)'
    elif function == 'TIME_SERIES_WEEKLY':
        time_series_key = 'Weekly Time Series'
    else:
        time_series_key = 'Time Series (Daily)'  # Default
    
    # Convert to DataFrame
    df = pd.DataFrame(data[time_series_key]).T
    
    # Convert string values to float
    for col in df.columns:
        df[col] = pd.to_numeric(df[col])
    
    # Rename columns
    df.columns = ['open', 'high', 'low', 'close', 'volume']
    
    # Add date as a column
    df.index = pd.to_datetime(df.index)
    df['date'] = df.index
    
    return df

def create_dataset(symbols, api_key, save_path='stock_data.csv'):
    """
    Create a dataset for multiple stock symbols
    
    Parameters:
    symbols (list): List of stock symbols
    api_key (str): Alpha Vantage API key
    save_path (str): Path to save the CSV file
    """
    all_data = []
    
    for symbol in symbols:
        print(f"Fetching data for {symbol}...")
        try:
            df = get_alpha_vantage_data(symbol, api_key)
            df['symbol'] = symbol
            all_data.append(df)
            time.sleep(12)  # Alpha Vantage has a limit of 5 API calls per minute for free tier
        except Exception as e:
            print(f"Error fetching data for {symbol}: {e}")
    
    # Combine all data
    if all_data:
        combined_df = pd.concat(all_data)
        combined_df.to_csv(save_path)
        print(f"Dataset saved to {save_path}")
        return combined_df
    else:
        print("No data was fetched")
        return None

# Example usage
if __name__ == "__main__":
    API_KEY = 'TKZMZK2F3VMKJ58C'  # Replace with your actual API key
    symbols = ['AAPL', 'MSFT', 'GOOGL', 'AMZN']  # Add more symbols as needed
    
    dataset = create_dataset(symbols, API_KEY, 'stock_data.csv')

Fetching data for AAPL...
Fetching data for MSFT...
Fetching data for GOOGL...
Fetching data for AMZN...
Dataset saved to stock_data.csv
