In [4]:
import pandas as pd
from pathlib import Path
import yaml
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
from datetime import timedelta

# Step 1: Extract Data from YAML Files
def yaml_to_csv(input_folder, output_file):
    """Extract data from YAML files and save as a combined CSV."""
    Path(output_file).parent.mkdir(parents=True, exist_ok=True)
    combined_data = []

    for month_path in Path(input_folder).iterdir():
        if not month_path.is_dir():
            continue

        for file_path in month_path.glob('*.yaml'):
            with open(file_path, 'r') as file:
                data = yaml.safe_load(file)
                if isinstance(data, list):
                    combined_data.append(pd.DataFrame(data))

    if combined_data:
        combined_df = pd.concat(combined_data, ignore_index=True)
        combined_df.to_csv(output_file, index=False)
        print(f"Data saved to {output_file}")
    else:
        print("No data found.")
if __name__ == "__main__":
    input_folder = "D:/projects/stock/data"
    output_file = "D:/projects/stock/outputdemo/combined.csv"

    # Step 1: Extract data
    yaml_to_csv(input_folder, output_file)

Data saved to D:/projects/stock/outputdemo/combined.csv


In [12]:
def analyze_data(input_csv):
    """Analyze data for yearly returns, volatility, and market summary."""
    # Load the combined data
    df = pd.read_csv(input_csv)

    # Ensure 'date' is in datetime format
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year

    # Group by Ticker and year for yearly analysis
    yearly_data = df.groupby(['Ticker', 'year']).agg(
        first_open=('open', 'first'),
        last_close=('close', 'last')
    ).reset_index()

    # Calculate yearly return
    yearly_data['yearly_return'] = ((yearly_data['last_close'] - yearly_data['first_open']) / 
                                    yearly_data['first_open']) * 100

    # Identify top 10 green and red stocks
    top_10_green = yearly_data.nlargest(10, 'yearly_return')
    top_10_red = yearly_data.nsmallest(10, 'yearly_return')

    # Market summary
    green_stocks = (yearly_data['yearly_return'] > 0).sum()
    red_stocks = (yearly_data['yearly_return'] <= 0).sum()

    # Volatility analysis
    df['daily_return'] = df.groupby('Ticker')['close'].pct_change()
    volatility = df.groupby('Ticker')['daily_return'].std().nlargest(10)

    # Calculate cumulative return
    df['cumulative_return'] = df.groupby('Ticker')['daily_return'].cumsum()

    # Calculate monthly returns
    df['month'] = df['date'].dt.to_period('M')
    monthly_data = df.groupby(['Ticker', 'month']).agg(
        open=('open', 'first'),
        close=('close', 'last')
    ).reset_index()
    monthly_data['monthly_return'] = ((monthly_data['close'] - monthly_data['open']) / 
                                      monthly_data['open']) * 100

    # Save results to CSV
    top_10_green.to_csv('top_10_green_stocks.csv', index=False)
    top_10_red.to_csv('top_10_red_stocks.csv', index=False)
    

    return df, yearly_data, top_10_green, top_10_red, volatility, monthly_data

result = analyze_data(output_file)
print(result)
volatility = result[:2]  # Adjust if needed


(           Ticker    close                date    high      low    month  \
0            SBIN   602.95 2023-10-03 05:30:00   604.9   589.60  2023-10   
1      BAJFINANCE  7967.60 2023-10-03 05:30:00  7975.5  7755.00  2023-10   
2           TITAN  3196.25 2023-10-03 05:30:00  3212.5  3114.40  2023-10   
3             ITC   439.75 2023-10-03 05:30:00   442.9   439.25  2023-10   
4             TCS  3513.85 2023-10-03 05:30:00  3534.2  3480.10  2023-10   
...           ...      ...                 ...     ...      ...      ...   
14195    HINDALCO   652.10 2024-11-22 05:30:00   657.3   647.40  2024-11   
14196  HEROMOTOCO  4794.10 2024-11-22 05:30:00  4808.4  4742.00  2024-11   
14197    AXISBANK  1142.40 2024-11-22 05:30:00  1147.9  1127.55  2024-11   
14198    HDFCBANK  1745.60 2024-11-22 05:30:00  1754.3  1729.55  2024-11   
14199  BAJAJ-AUTO  9481.65 2024-11-22 05:30:00  9602.9  9444.10  2024-11   

          open    volume  year  daily_return  cumulative_return  
0       596.60  1532

In [13]:
print(result)
yearly_data = result[:2]

(           Ticker    close                date    high      low    month  \
0            SBIN   602.95 2023-10-03 05:30:00   604.9   589.60  2023-10   
1      BAJFINANCE  7967.60 2023-10-03 05:30:00  7975.5  7755.00  2023-10   
2           TITAN  3196.25 2023-10-03 05:30:00  3212.5  3114.40  2023-10   
3             ITC   439.75 2023-10-03 05:30:00   442.9   439.25  2023-10   
4             TCS  3513.85 2023-10-03 05:30:00  3534.2  3480.10  2023-10   
...           ...      ...                 ...     ...      ...      ...   
14195    HINDALCO   652.10 2024-11-22 05:30:00   657.3   647.40  2024-11   
14196  HEROMOTOCO  4794.10 2024-11-22 05:30:00  4808.4  4742.00  2024-11   
14197    AXISBANK  1142.40 2024-11-22 05:30:00  1147.9  1127.55  2024-11   
14198    HDFCBANK  1745.60 2024-11-22 05:30:00  1754.3  1729.55  2024-11   
14199  BAJAJ-AUTO  9481.65 2024-11-22 05:30:00  9602.9  9444.10  2024-11   

          open    volume  year  daily_return  cumulative_return  
0       596.60  1532