## **Part A**

### **1.**Dataset** Loading**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sentiment_df = pd.read_csv("/content/fear_greed_index.csv")
trader_df = pd.read_csv("/content/historical_data.csv")

a. Number of Rows / columns:

In [None]:
print("Sentiment Dataset Shape (rows,columns): ", sentiment_df.shape)
print("Trader Dataset Shape (rows,columns): ", trader_df.shape)

In [None]:
sentiment_df.head()

In [None]:
trader_df.head()

b. missing values / duplicates

In [None]:
print("Sentiment Missing Values:")
print(sentiment_df.isnull().sum())

In [None]:
print("Sentiment Duplicates:", sentiment_df.duplicated().sum())

In [None]:
print("Trader Missing Values:")
print(trader_df.isnull().sum())

In [None]:
print("Trader Duplicates:", trader_df.duplicated().sum())

In [None]:
sentiment_df = sentiment_df.drop_duplicates()
trader_df = trader_df.drop_duplicates()

In [None]:
print("Duplicates after cleaning:")
print(sentiment_df.duplicated().sum())
print(trader_df.duplicated().sum())

2. Convert timestamps and align the datasets by date (daily level is fine).

In [None]:
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'], format="%d-%m-%Y")

In [None]:
sentiment_df['date'].dtype

In [None]:
trader_df['date'] = pd.to_datetime(trader_df['Timestamp']).dt.normalize()

In [None]:
trader_df[['date']].head()

In [None]:
print(trader_df['Timestamp'].dtype)
print(trader_df['Timestamp'].head())

In [None]:
numeric_timestamps = pd.to_numeric(trader_df['Timestamp'], errors='coerce')
trader_df['date'] = pd.to_datetime(numeric_timestamps, unit='ms').dt.floor('D')

In [None]:
merged_df = pd.merge(sentiment_df, trader_df, on='date', how='outer')
print(merged_df.head())
print(merged_df.tail())

In [None]:
trader_df[['date']].head()

In [None]:
print(trader_df.columns)

In [None]:
account_median_size = trader_df.groupby('Account')['Size USD'].transform('median')

trader_df['proxy_leverage'] = trader_df['Size USD'] / account_median_size

In [None]:
avg_size=('Size USD', 'mean')

In [None]:
daily_trader = (
    trader_df.groupby(['date', 'Account']) # Corrected 'account' to 'Account'
    .agg(
        daily_pnl=('Closed PnL', 'sum'), # Corrected 'closedPnL' to 'Closed PnL'
        trade_count=('Account', 'count'), # Corrected 'account' to 'Account'
        # avg_leverage=('leverage', 'mean'), # 'leverage' column does not exist
        avg_size=('Size USD', 'mean') # Using 'Size USD' as a proxy for 'size'
    )
    .reset_index()
)

In [None]:
final_merged_df = pd.merge(merged_df, daily_trader, on='date', how='left')
print(final_merged_df.head())

In [None]:
print(daily_trader.head())

In [None]:
merged_df = pd.merge(
    daily_trader,
    sentiment_df,
    on='date',
    how='left'
)

In [None]:
merged_df.head()

In [None]:
merged_df['classification'].isnull().sum()

3.a. daily PnL per trader (or per account)

In [None]:
daily_trader = (
    trader_df.groupby(['date', 'Account']) # Corrected 'account' to 'Account'
    .agg(daily_pnl=('Closed PnL', 'sum')) # Corrected 'closedPnL' to 'Closed PnL'
    .reset_index()
)

In [None]:
daily_pnl = (
    trader_df.groupby(['date', 'Account'])
    .agg(daily_pnl=('Closed PnL', 'sum')) # Corrected 'ClosedPnL' to 'Closed PnL'
    .reset_index()
)

b. win rate, average trade size

In [None]:
trader_df['is_win'] = trader_df['Closed PnL'] > 0

In [None]:
trader_performance = trader_df.groupby('Account').agg(
    win_rate=('is_win', 'mean'),
    avg_trade_size=('Size USD', 'mean')
).reset_index()
print(trader_performance.head())

Leverage Distribution

In [None]:
leverage_df = (
    trader_df.groupby(['date', 'Account'])
    .agg(avg_leverage=('proxy_leverage', 'mean'))
    .reset_index()
)

Number of Trades Per Day

In [None]:
trades_per_day = (
    trader_df.groupby(['date', 'Account']) # Corrected 'account' to 'Account'
    .size()
    .reset_index(name='trade_count')
)
trades_per_day

In [None]:
daily_market_trades = (
    trader_df.groupby('date')
    .size()
    .reset_index(name='total_trades')
)
daily_market_trades

Long / Short Ratio

In [None]:
# trader_df['side'].value_counts() # 'side' column does not exist
print(trader_df.columns)


In [None]:
trader_df['Side'].value_counts()

In [None]:
trader_df['is_long'] = trader_df['Side'] == 'BUY'

long_short_df = (
    trader_df.groupby(['date', 'Account']) # Corrected 'account' to 'Account'
    .agg(
        long_trades=('is_long', 'sum'),
        total_trades=('Account', 'count') # Corrected 'account' to 'Account'
    )
    .reset_index()
)

long_short_df['short_trades'] = (
    long_short_df['total_trades'] - long_short_df['long_trades']
)

long_short_df['long_short_ratio'] = (
    long_short_df['long_trades'] /
    long_short_df['short_trades'].replace(0, 1) # Added .replace(0,1) to avoid division by zero
)

In [None]:
long_short_df.head()

In [None]:
from functools import reduce

daily_win_rate_df = (
    trader_df.groupby(['date', 'Account'])
    .agg(win_rate=('is_win', 'mean'))
    .reset_index()
)

daily_avg_size_df = (
    trader_df.groupby(['date', 'Account'])
    .agg(avg_size=('Size USD', 'mean'))
    .reset_index()
)

dfs = [
    daily_pnl,
    daily_win_rate_df,
    daily_avg_size_df,
    trades_per_day,
    long_short_df[['date','Account','long_short_ratio']],
    leverage_df # Add leverage_df to the list of dataframes to merge
]

final_metrics = reduce(
    lambda left, right: pd.merge(left, right, on=['date','Account'], how='left'),
    dfs
)

In [None]:
final_metrics.head()

# Part B — Analysis

1. Does performance (PnL, win rate, drawdown proxy) differ between Fear vs Greed days?

In [None]:
final_metrics

In [None]:
final_metrics = pd.merge(final_metrics, sentiment_df[['date', 'classification']], on='date', how='left')
final_metrics['classification'].value_counts()

In [None]:
analysis_df = final_metrics[
    final_metrics['classification'].isin(['Fear', 'Greed'])
]

In [None]:
analysis_df['classification'].value_counts()

Comparing Average Daily PnL

In [None]:
pnl_summary = analysis_df.groupby('classification')['daily_pnl'].agg(
    mean='mean',
    median='median',
    std='std',
    count='count'
)

pnl_summary

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='daily_pnl')
plt.title("Daily PnL Distribution: Fear vs Greed")
plt.show()

Compaing Win Rate

In [None]:
win_summary = analysis_df.groupby('classification')['win_rate'].agg(
    mean='mean',
    median='median',
    std='std'
)

win_summary

In [None]:
plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='win_rate')
plt.title("Win Rate: Fear vs Greed")
plt.show()

Drawdown Proxy

In [None]:
analysis_df['loss_day'] = analysis_df['daily_pnl'] < 0

drawdown_summary = (
    analysis_df.groupby('classification')['loss_day']
    .mean()
)

drawdown_summary

Volatility Proxy

In [None]:
volatility_summary = (
    analysis_df.groupby('classification')['daily_pnl']
    .std()
)

volatility_summary

2. Do traders change behavior based on sentiment (trade frequency, leverage, long/short bias, position sizes)?

In [None]:
final_metrics

In [None]:
analysis_df = final_metrics[
    final_metrics['classification'].isin(['Fear', 'Greed'])
].copy()

Trade Frequency Analysis

In [None]:
freq_summary = analysis_df.groupby('classification')['trade_count'].agg(
    mean='mean',
    median='median',
    std='std'
)

freq_summary

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='trade_count')
plt.title("Trade Frequency: Fear vs Greed")
plt.show()

Leverage Behavior Analysis

In [None]:
leverage_summary = analysis_df.groupby('classification')['avg_leverage'].agg(
    mean='mean',
    median='median',
    std='std'
)

leverage_summary

In [None]:
plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='avg_leverage')
plt.title("Average Leverage: Fear vs Greed")
plt.show()

Long / Short Bias Analysis

In [None]:
bias_summary = analysis_df.groupby('classification')['long_short_ratio'].agg(
    mean='mean',
    median='median'
)

bias_summary

In [None]:
plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='long_short_ratio')
plt.title("Long/Short Ratio: Fear vs Greed")
plt.show()

Position Size Analysis

In [None]:
size_summary = analysis_df.groupby('classification')['avg_size'].agg(
    mean='mean',
    median='median',
    std='std'
)

size_summary

In [None]:
plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='avg_size')
plt.title("Average Trade Size: Fear vs Greed")
plt.show()

# 3. Identify 2–3 segments (examples):
high leverage vs low leverage traders

frequent vs infrequent traders

consistent winners vs inconsistent traders


In [None]:
analysis_df

High vs Low Leverage Traders

In [None]:
leverage_threshold = analysis_df['avg_leverage'].median()
leverage_threshold

In [None]:
analysis_df['leverage_segment'] = np.where(
    analysis_df['avg_leverage'] > leverage_threshold,
    'High Leverage',
    'Low Leverage'
)

Frequent vs Infrequent Traders

In [None]:
freq_threshold = analysis_df['trade_count'].median()

In [None]:
import numpy as np
analysis_df['frequency_segment'] = np.where(
    analysis_df['trade_count'] > freq_threshold,
    'Frequent',
    'Infrequent'
)

In [None]:
freq_perf = analysis_df.groupby('frequency_segment').agg(
    avg_pnl=('daily_pnl', 'mean'),
    avg_win_rate=('win_rate', 'mean'),
    pnl_volatility=('daily_pnl', 'std'),
    avg_leverage=('avg_leverage', 'mean')
)

freq_perf

Consistent Winners vs Inconsistent Traders

In [None]:
trader_volatility = (
    analysis_df.groupby('Account')['daily_pnl']
    .std()
    .reset_index(name='pnl_volatility')
)

analysis_df = analysis_df.merge(trader_volatility, on='Account', how='left')

In [None]:
win_threshold = analysis_df['win_rate'].median()
vol_threshold = analysis_df['pnl_volatility'].median()

In [None]:
analysis_df['consistency_segment'] = np.where(
    (analysis_df['win_rate'] > win_threshold) &
    (analysis_df['pnl_volatility'] < vol_threshold),
    'Consistent',
    'Inconsistent'
)

In [None]:
consistency_perf = analysis_df.groupby('consistency_segment').agg(
    avg_pnl=('daily_pnl', 'mean'),
    avg_win_rate=('win_rate', 'mean'),
    pnl_volatility=('daily_pnl', 'std'),
    avg_leverage=('avg_leverage', 'mean')
)

consistency_perf

#  3 insights backed by charts/tables.


In [None]:
analysis_df

Performance difference between Fear and Greed Days

In [None]:
pnl_table = analysis_df.groupby('classification')['daily_pnl'].agg(
    mean='mean',
    median='median',
    std='std',
    loss_frequency=lambda x: (x < 0).mean()
)

pnl_table

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='daily_pnl')
plt.title("Daily PnL Distribution: Fear vs Greed")
plt.show()

Long/Short Bias Shifts With Sentiment

In [None]:
bias_table = analysis_df.groupby('classification')['long_short_ratio'].agg(
    mean='mean',
    median='median'
)

bias_table

In [None]:
plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='long_short_ratio')
plt.title("Long/Short Ratio: Fear vs Greed")
plt.show()

Overtrading During Volatile Sentiment

In [None]:
freq_table = analysis_df.groupby('classification')['trade_count'].agg(
    mean='mean',
    median='median',
    std='std'
)

freq_table

In [None]:
plt.figure()
sns.boxplot(data=analysis_df, x='classification', y='trade_count')
plt.title("Trade Frequency: Fear vs Greed")
plt.show()

# Part C: Actionable output

**Strategy 1: Reduce Risk During Fear Days**

Lower average PnL during Fear,
Higher volatility and drawdowns,
Increased exposure and overtrading by some traders; During Fear days, reduce position size by 20–30% for high-exposure and frequent traders


**Strategy 2: Selective Aggression During Greed Days**

Slightly better win rates during Greed,
Consistent traders perform better,
Market momentum is stronger;
During Greed days, increase exposure by 10–20% only for consistent traders.