In [None]:
risk_analysis = spark.sql("""
SELECT 
    symbol,
    count(*) as total_intervals,
    avg((high_price - low_price) / open_price * 100) as avg_price_range_percent,
    max((high_price - low_price) / open_price * 100) as max_price_range_percent,
    avg(volume) as avg_volume,
    avg(number_of_trade) as avg_trades_per_interval,
    avg(quote_asset_volume) as avg_quote_volume
FROM spot_data
GROUP BY symbol
HAVING avg_volume > 0
ORDER BY avg_price_range_percent DESC
""")

print("Trading Pair Risk Analysis:")
risk_analysis.show(10)
risk_analysis.count()


# The riskiest tokens often show extreme price movements but with less trading activity - a dangerous combination for traders. 

In [None]:
# Year-wise performance for all cryptocurrencies
yearly_analysis = spark.sql("""
WITH yearly_stats AS (
    SELECT 
        symbol,
        YEAR(open_timestamp) as trade_year,
        MIN(open_price) as year_start_price,
        MAX(close_price) as year_end_price,
        AVG(volume) as avg_daily_volume,
        AVG((high_price - low_price)/open_price) * 100 as avg_volatility
    FROM spot_data
    GROUP BY symbol, YEAR(open_timestamp)
)
SELECT 
    trade_year,
    symbol,
    ROUND(((year_end_price - year_start_price)/year_start_price * 100), 2) as yearly_return,
    ROUND(avg_volatility, 2) as avg_volatility,
    ROUND(avg_daily_volume, 2) as avg_volume
FROM yearly_stats 
ORDER BY trade_year DESC, yearly_return DESC
""")

print("Year-wise Performance Analysis:")
yearly_analysis.show(10)