# Alpaca BarSet Data Exploration and Visualization

This notebook provides comprehensive methods to explore and visualize Alpaca BarSet data.

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from alpaca.data.historical import StockHistoricalDataClient
from alpaca.data.requests import StockBarsRequest
from alpaca.data.timeframe import TimeFrame

# Set up display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 100)

In [2]:
# API credentials
API_KEY = "PKJCOVJ8NBAT2HVHKCSC"
API_SECRET = "dm3BAs0Xh0qdctMB6BPMZyqHPIphB7gdVUoUqNyN"

# Initialize client
data_client = StockHistoricalDataClient(API_KEY, API_SECRET)

In [3]:
# Get historical data (use dates that are not too recent to avoid subscription issues)
# Let's get data from 3 months ago
end_date = datetime.now() - timedelta(days=15)  # 15 days ago to avoid recent data issues
start_date = end_date - timedelta(days=30)  # 30 days of data

request_params = StockBarsRequest(
    symbol_or_symbols=["AAPL"],
    timeframe=TimeFrame.Day,
    start=start_date,
    end=end_date
)

print(f"Fetching data from {start_date.date()} to {end_date.date()}...")
df = data_client.get_stock_bars(request_params)
print("Data fetched successfully!")

Fetching data from 2025-08-04 to 2025-09-03...
Data fetched successfully!


## 1. Exploring the BarSet Structure

In [4]:
# Method 1: Access as dictionary
print("=" * 60)
print("METHOD 1: ACCESS AS DICTIONARY")
print("=" * 60)
print(f"\nType of df: {type(df)}")
print(f"Available symbols: {list(df.data.keys())}")

# Access bars for AAPL
aapl_bars = df["AAPL"]
print(f"\nNumber of bars for AAPL: {len(aapl_bars)}")
print(f"Type of bars list: {type(aapl_bars)}")
print(f"Type of individual bar: {type(aapl_bars[0]) if aapl_bars else 'No bars'}")

METHOD 1: ACCESS AS DICTIONARY

Type of df: <class 'alpaca.data.models.bars.BarSet'>
Available symbols: ['AAPL']

Number of bars for AAPL: 21
Type of bars list: <class 'list'>
Type of individual bar: <class 'alpaca.data.models.bars.Bar'>


In [5]:
# Method 2: Explore individual bar attributes
print("=" * 60)
print("METHOD 2: INDIVIDUAL BAR ATTRIBUTES")
print("=" * 60)

if aapl_bars:
    first_bar = aapl_bars[0]
    print("\nFirst Bar Details:")
    print(f"  Timestamp: {first_bar.timestamp}")
    print(f"  Open: ${first_bar.open:.2f}")
    print(f"  High: ${first_bar.high:.2f}")
    print(f"  Low: ${first_bar.low:.2f}")
    print(f"  Close: ${first_bar.close:.2f}")
    print(f"  Volume: {first_bar.volume:,}")
    print(f"  VWAP: ${first_bar.vwap:.2f}")
    print(f"  Trade Count: {first_bar.trade_count:,}")
    
    # Show all available attributes
    print("\nAll available bar attributes:")
    attributes = [attr for attr in dir(first_bar) if not attr.startswith('_')]
    print(f"  {', '.join(attributes)}")

METHOD 2: INDIVIDUAL BAR ATTRIBUTES

First Bar Details:
  Timestamp: 2025-08-05 04:00:00+00:00
  Open: $203.40
  High: $205.34
  Low: $202.16
  Close: $202.92
  Volume: 44,155,079.0
  VWAP: $203.62
  Trade Count: 491,748.0

All available bar attributes:
  close, construct, copy, dict, from_orm, high, json, low, model_computed_fields, model_config, model_construct, model_copy, model_dump, model_dump_json, model_extra, model_fields, model_fields_set, model_json_schema, model_parametrized_name, model_post_init, model_rebuild, model_validate, model_validate_json, model_validate_strings, open, parse_file, parse_obj, parse_raw, schema, schema_json, symbol, timestamp, trade_count, update_forward_refs, validate, volume, vwap


In [None]:
# Method 3: Convert to pandas DataFrame (most convenient)
print("=" * 60)
print("METHOD 3: CONVERT TO PANDAS DATAFRAME")
print("=" * 60)

# Convert to DataFrame
df_pandas = df.df
print(f"\nDataFrame shape: {df_pandas.shape}")
print(f"DataFrame columns: {list(df_pandas.columns)}")
print(f"Index type: {type(df_pandas.index)}")
print(f"Index name: {df_pandas.index.name}")

# Display first few rows
print("\nFirst 5 rows:")
df_pandas.head()

In [None]:
# Display DataFrame info
print("DataFrame Info:")
df_pandas.info()

## 2. Statistical Analysis

In [None]:
# Get the data for AAPL (handling multi-index columns)
if isinstance(df_pandas.columns, pd.MultiIndex):
    aapl_df = df_pandas['AAPL'].copy()
else:
    aapl_df = df_pandas.copy()

# Statistical summary
print("Statistical Summary:")
aapl_df.describe()

In [None]:
# Calculate additional metrics
aapl_df['daily_return'] = aapl_df['close'].pct_change()
aapl_df['MA_5'] = aapl_df['close'].rolling(window=5).mean()
aapl_df['MA_10'] = aapl_df['close'].rolling(window=10).mean()
aapl_df['volatility'] = aapl_df['daily_return'].rolling(window=10).std()

# Display calculated metrics
print("Calculated Metrics:")
print(f"Average Close Price: ${aapl_df['close'].mean():.2f}")
print(f"Max Close Price: ${aapl_df['close'].max():.2f}")
print(f"Min Close Price: ${aapl_df['close'].min():.2f}")
print(f"Price Range: ${aapl_df['close'].max() - aapl_df['close'].min():.2f}")
print(f"\nTotal Volume: {aapl_df['volume'].sum():,}")
print(f"Average Daily Volume: {aapl_df['volume'].mean():,.0f}")
print(f"\nDaily Return Volatility: {aapl_df['daily_return'].std():.4f} ({aapl_df['daily_return'].std()*100:.2f}%)")
print(f"Total Return: {((aapl_df['close'].iloc[-1] / aapl_df['close'].iloc[0]) - 1) * 100:.2f}%")

# Show the DataFrame with new columns
print("\nDataFrame with calculated metrics (last 5 rows):")
aapl_df.tail()

## 3. Interactive Visualizations with Plotly

In [None]:
# Prepare data for visualization
aapl_df_reset = aapl_df.reset_index()

# Calculate Bollinger Bands
rolling_mean = aapl_df['close'].rolling(window=20).mean()
rolling_std = aapl_df['close'].rolling(window=20).std()
aapl_df['BB_upper'] = rolling_mean + (rolling_std * 2)
aapl_df['BB_lower'] = rolling_mean - (rolling_std * 2)
aapl_df['MA_20'] = rolling_mean

# Create interactive candlestick chart
fig = make_subplots(
    rows=3, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    subplot_titles=('AAPL Stock Price', 'Volume', 'Daily Returns'),
    row_heights=[0.5, 0.2, 0.3]
)

# Add candlestick
fig.add_trace(
    go.Candlestick(
        x=aapl_df_reset['timestamp'],
        open=aapl_df_reset['open'],
        high=aapl_df_reset['high'],
        low=aapl_df_reset['low'],
        close=aapl_df_reset['close'],
        name='OHLC'
    ),
    row=1, col=1
)

# Add moving averages
fig.add_trace(
    go.Scatter(
        x=aapl_df_reset['timestamp'],
        y=aapl_df['MA_5'].values,
        name='MA 5',
        line=dict(color='orange', width=1)
    ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=aapl_df_reset['timestamp'],
        y=aapl_df['MA_20'].values,
        name='MA 20',
        line=dict(color='blue', width=1)
    ),
    row=1, col=1
)

# Add volume bars
colors = ['red' if row['close'] < row['open'] else 'green' 
          for _, row in aapl_df_reset.iterrows()]

fig.add_trace(
    go.Bar(
        x=aapl_df_reset['timestamp'],
        y=aapl_df_reset['volume'],
        name='Volume',
        marker_color=colors,
        showlegend=False
    ),
    row=2, col=1
)

# Add daily returns
return_colors = ['red' if r < 0 else 'green' for r in aapl_df['daily_return'].values]
fig.add_trace(
    go.Bar(
        x=aapl_df_reset['timestamp'],
        y=aapl_df['daily_return'].values * 100,
        name='Daily Return %',
        marker_color=return_colors,
        showlegend=False
    ),
    row=3, col=1
)

# Update layout
fig.update_layout(
    title='AAPL Stock Analysis',
    xaxis_rangeslider_visible=False,
    height=800,
    showlegend=True,
    hovermode='x unified'
)

fig.update_xaxes(title_text="Date", row=3, col=1)
fig.update_yaxes(title_text="Price ($)", row=1, col=1)
fig.update_yaxes(title_text="Volume", row=2, col=1)
fig.update_yaxes(title_text="Return (%)", row=3, col=1)

fig.show()

## 4. Static Visualizations with Matplotlib

In [None]:
# Create comprehensive dashboard
fig, axes = plt.subplots(3, 2, figsize=(15, 10))
fig.suptitle('AAPL Stock Analysis Dashboard', fontsize=16)

# 1. Price and Moving Averages
ax1 = axes[0, 0]
ax1.plot(aapl_df.index, aapl_df['close'], label='Close Price', linewidth=2)
ax1.plot(aapl_df.index, aapl_df['MA_5'], label='MA 5', alpha=0.7)
ax1.plot(aapl_df.index, aapl_df['MA_10'], label='MA 10', alpha=0.7)
ax1.set_title('Price with Moving Averages')
ax1.set_xlabel('Date')
ax1.set_ylabel('Price ($)')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# 2. Volume
ax2 = axes[0, 1]
ax2.bar(aapl_df.index, aapl_df['volume'], color=colors, alpha=0.7)
ax2.set_title('Trading Volume')
ax2.set_xlabel('Date')
ax2.set_ylabel('Volume')
ax2.grid(True, alpha=0.3)
ax2.tick_params(axis='x', rotation=45)

# 3. Daily Returns Distribution
ax3 = axes[1, 0]
returns = aapl_df['daily_return'].dropna()
ax3.hist(returns * 100, bins=20, edgecolor='black', alpha=0.7)
ax3.axvline(x=0, color='red', linestyle='--', alpha=0.5)
ax3.set_title('Daily Returns Distribution')
ax3.set_xlabel('Return (%)')
ax3.set_ylabel('Frequency')
ax3.grid(True, alpha=0.3)

# Add statistics text
mean_return = returns.mean() * 100
std_return = returns.std() * 100
ax3.text(0.05, 0.95, f'Mean: {mean_return:.3f}%\nStd: {std_return:.3f}%', 
         transform=ax3.transAxes, verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# 4. Cumulative Returns
ax4 = axes[1, 1]
cumulative_returns = (1 + returns).cumprod() - 1
ax4.plot(aapl_df.index[1:], cumulative_returns * 100, linewidth=2)
ax4.set_title('Cumulative Returns')
ax4.set_xlabel('Date')
ax4.set_ylabel('Cumulative Return (%)')
ax4.grid(True, alpha=0.3)
ax4.tick_params(axis='x', rotation=45)

# 5. VWAP vs Close Price
ax5 = axes[2, 0]
ax5.plot(aapl_df.index, aapl_df['close'], label='Close', alpha=0.7)
ax5.plot(aapl_df.index, aapl_df['vwap'], label='VWAP', alpha=0.7)
ax5.set_title('Close Price vs VWAP')
ax5.set_xlabel('Date')
ax5.set_ylabel('Price ($)')
ax5.legend()
ax5.grid(True, alpha=0.3)
ax5.tick_params(axis='x', rotation=45)

# 6. High-Low Spread
ax6 = axes[2, 1]
spread = aapl_df['high'] - aapl_df['low']
ax6.plot(aapl_df.index, spread, linewidth=1, color='purple')
ax6.fill_between(aapl_df.index, 0, spread, alpha=0.3, color='purple')
ax6.set_title('Daily High-Low Spread')
ax6.set_xlabel('Date')
ax6.set_ylabel('Spread ($)')
ax6.grid(True, alpha=0.3)
ax6.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 5. Quick Access Functions

In [None]:
def quick_explore(barset, symbol="AAPL"):
    """Quick exploration of BarSet data"""
    # Convert to DataFrame
    df = barset.df
    
    # Handle multi-symbol DataFrames
    if isinstance(df.columns, pd.MultiIndex):
        symbol_df = df[symbol]
    else:
        symbol_df = df
    
    print(f"Quick Summary for {symbol}:")
    print("=" * 40)
    print(f"Date Range: {symbol_df.index[0].date()} to {symbol_df.index[-1].date()}")
    print(f"Number of trading days: {len(symbol_df)}")
    print(f"\nPrice Statistics:")
    print(f"  Current Close: ${symbol_df['close'].iloc[-1]:.2f}")
    print(f"  Period High: ${symbol_df['high'].max():.2f}")
    print(f"  Period Low: ${symbol_df['low'].min():.2f}")
    print(f"  Average Close: ${symbol_df['close'].mean():.2f}")
    
    # Calculate returns
    total_return = ((symbol_df['close'].iloc[-1] / symbol_df['close'].iloc[0]) - 1) * 100
    daily_returns = symbol_df['close'].pct_change()
    
    print(f"\nPerformance:")
    print(f"  Total Return: {total_return:.2f}%")
    print(f"  Daily Volatility: {daily_returns.std() * 100:.2f}%")
    print(f"  Sharpe Ratio (annualized): {(daily_returns.mean() / daily_returns.std()) * np.sqrt(252):.2f}")
    
    print(f"\nVolume Statistics:")
    print(f"  Total Volume: {symbol_df['volume'].sum():,}")
    print(f"  Average Daily Volume: {symbol_df['volume'].mean():,.0f}")
    print(f"  Max Volume Day: {symbol_df['volume'].max():,}")
    
    return symbol_df

# Use the quick explore function
quick_df = quick_explore(df, "AAPL")

In [None]:
# Iterate through bars example
print("Iterating through first 5 bars:")
print("=" * 40)
for i, bar in enumerate(df["AAPL"][:5]):
    print(f"Bar {i+1}:")
    print(f"  Date: {bar.timestamp.date()}")
    print(f"  Close: ${bar.close:.2f}")
    print(f"  Volume: {bar.volume:,}")
    print()

## 6. Export Data

In [None]:
# Export to CSV
aapl_df.to_csv('aapl_data.csv')
print("Data exported to aapl_data.csv")

# Export to Excel
# aapl_df.to_excel('aapl_data.xlsx')
# print("Data exported to aapl_data.xlsx")

# Export to JSON
aapl_df.to_json('aapl_data.json', orient='index', date_format='iso')
print("Data exported to aapl_data.json")