In [None]:
import pandas as pd
from pathlib import Path
import yaml

def yaml_to_csv(input_folder, output_file):
    
    Path(output_file).parent.mkdir(parents=True, exist_ok=True)
    combined_data = []

    for month_path in Path(input_folder).iterdir():
        if not month_path.is_dir():
            continue

        for file_path in month_path.glob('*.yaml'):
            with open(file_path, 'r') as file:
                data = yaml.safe_load(file)
                combined_data.append(pd.DataFrame(data))

    if combined_data:
        combined_df = pd.concat(combined_data, ignore_index=True)
        combined_df.to_csv(output_file, index=False)
        print(f"Data saved to {output_file}")
    else:
        print("No data found.")

if __name__ == "__main__":
    input_folder = "D:/projects/stock/data"
    output_file = "D:/projects/stock/output/combined.csv"

    yaml_to_csv(input_folder, output_file)

Data saved to D:/projects/stock/output/combined.csv


In [None]:
import pandas as pd
from pathlib import Path

def analyse_data():
    file_path = Path("D:/projects/stock/output/combined.csv")
    df = pd.read_csv(file_path)

    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year

    # For yearly analysis
    yearly_data = df.groupby(['Ticker', 'year']).agg(
        first_open=('open', 'first'),
        last_close=('close', 'last')
    ).reset_index()

    yearly_data['yearly_return'] = ((yearly_data['last_close'] - yearly_data['first_open']) / 
                                    yearly_data['first_open']) * 100
    
    top_10_green_stocks = yearly_data.nlargest(10, 'yearly_return')
    top_10_loss_stocks = yearly_data.nsmallest(10, 'yearly_return')

    green_stocks = (yearly_data['yearly_return'] > 0).sum()
    loss_stocks = (yearly_data['yearly_return'] <= 0).sum()
    
    
    df["average_price"] = (df["open"] + df["close"]) / 2
    avg_price = df["average_price"].mean()

    avg_volume = df["volume"].mean()

    print(f"Top 10 Green Stocks:{top_10_green_stocks}")
    print(f"\n Top 10 Loss Stocks:{top_10_loss_stocks}")
    print(f"\n Number of Green Stocks: {green_stocks}")
    print(f"Number of Loss Stocks: {loss_stocks}")
    print(f"Avg Price: {avg_price:.2f}")
    print(f"Avg Volume: {avg_volume:.2f}")

    return top_10_green_stocks, top_10_loss_stocks

top_10_green_stocks, top_10_loss_stocks = analyse_data()


Top 10 Green Stocks:        Ticker  year  first_open  last_close  yearly_return
95       TRENT  2024     3061.80     6652.80     117.283951
61         M&M  2024     1729.40     3012.95      74.219382
19  BHARTIARTL  2024     1031.95     1569.30      52.071321
17         BEL  2024      184.80      280.85      51.975108
94       TRENT  2023     2074.30     3054.95      47.276190
81   SUNPHARMA  2024     1261.00     1795.30      42.371134
71   POWERGRID  2024      237.90      336.95      41.635141
11  BAJAJ-AUTO  2024     6770.00     9481.65      40.053914
79  SHRIRAMFIN  2024     2055.55     2850.05      38.651456
91       TECHM  2024     1273.00     1747.45      37.270228

 Top 10 Loss Stocks:        Ticker  year  first_open  last_close  yearly_return
49  INDUSINDBK  2024     1598.95      998.20     -37.571531
7   ASIANPAINT  2024     3403.00     2472.20     -27.352336
1     ADANIENT  2024     2852.30     2228.00     -21.887599
65   NESTLEIND  2024     2665.00     2247.30     -15.673546