In [0]:
from pyspark.sql.functions import *

In [0]:
dbutils.widgets.text("year",'')
dbutils.widgets.text("month",'')
year_str=dbutils.widgets.get("year")
month_str=dbutils.widgets.get("month")
print(year_str,month_str)
# Convert year and month to integers
year_int = int(year_str) if year_str else None
month_int = int(month_str) if month_str else None


In [0]:
df = spark.read.csv('/Volumes/retail_analytics/portfolio/amfi_data/AMFI data.csv', header=True, sep=';',inferSchema=True)
df.createOrReplaceTempView('amfi_data')
# display(df)

In [0]:
portfolio_df=spark.read.csv('/Volumes/retail_analytics/portfolio/amfi_data/Portfolio_data_feb0226.csv',header=True,inferSchema=True)
portfolio_df.createOrReplaceTempView('portfolio_data')
# display(portfolio_df)

In [0]:
from pyspark.sql.functions import col, round, regexp_replace, year, month, to_date

# Portfolio data
df_jan_2026 = spark.read.csv(
    '/Volumes/retail_analytics/portfolio/amfi_data/Portfollio_holdings_202601.csv',
    header=True,
    inferSchema=True
)

df_jan_2026 = df_jan_2026.withColumn(
    'invested_amount',
    round(col('Quantity Available') * col('Average Price'), 2)
).withColumn(
    'total_value',
    round(col('invested_amount') + col('Unrealized P&L'), 2)
)

df_jan_2026.createOrReplaceTempView('portfolio_zerodha')


# Stock price history
stock_price_history = spark.read.csv(
    '/Volumes/retail_analytics/portfolio/amfi_data/all_stocks_monthly_returns.csv',
    header=True,
    inferSchema=True
)

stock_price_history = stock_price_history.withColumn(
    'ticker',
    regexp_replace(col('ticker'), '\\.NS$', '')
).withColumn(
    'YearMonth_date',
    to_date(col('YearMonth'), 'yyyy-MM-dd')   # adjust format if needed
).withColumn(
    'year',
    year(col('YearMonth_date'))
).withColumn(
    'month',
    month(col('YearMonth_date'))
)

stock_price_history.createOrReplaceTempView('stock_price_history')


# Mutual fund price history
mf_price_history = spark.read.csv(
    '/Volumes/retail_analytics/portfolio/amfi_data/mf_monthly_returns.csv',
    header=True,
    inferSchema=True
)

mf_price_history = mf_price_history.withColumn(
    'YearMonth_date',
    to_date(col('YearMonth'), 'yyyy-MM-dd')   # adjust format if needed
).withColumn(
    'year',
    year(col('YearMonth_date'))
).withColumn(
    'month',
    month(col('YearMonth_date'))
)

mf_price_history.createOrReplaceTempView('mf_price_history')

In [0]:
# df_jan_2026=spark.read.csv('/Volumes/retail_analytics/portfolio/amfi_data/Portfollio_holdings_202601.csv', header=True,inferSchema=True)
# df_jan_2026=df_jan_2026.withColumn('invested_amount', round(col('Quantity Available')*col('Average Price'), 2))\
#     .withColumn('total_value', round(col('invested_amount')+col('Unrealized P&L'), 2))

# df_jan_2026.createOrReplaceTempView('portfolio_zerodha')
# stock_price_history=spark.read.csv('/Volumes/retail_analytics/portfolio/amfi_data/all_stocks_monthly_returns.csv', header=True,inferSchema=True)
# stock_price_history = stock_price_history.withColumn(
#     'ticker', regexp_replace('ticker', '\\.NS$', '')
# )\
#     .withColumn('year', year('YearMonth'))\
#     .withColumn('month', month('YearMonth'))

# stock_price_history.createOrReplaceTempView('stock_price_history')
# mf_price_history=spark.read.csv('/Volumes/retail_analytics/portfolio/amfi_data/mf_monthly_returns.csv', header=True,inferSchema=True)
# mf_price_history=mf_price_history.withColumn('year', year('YearMonth'))\
#     .withColumn('month', month('YearMonth'))
# mf_price_history.createOrReplaceTempView('mf_price_history')

In [0]:
stock_returns=spark.sql(f"""select symbol,sector,pft.year,pft.month,invested_amount,total_value,round(total_value * monthly_return_pct / 100,2) AS profit_loss,round(total_value+profit_loss,2) as final_value from stock_price_history st join portfolio_zerodha pft on st.ticker=pft.symbol and st.year=pft.year and st.month=pft.month where pft.year={year_int} and pft.month={month_int} """)

stock_returns.write.mode('overwrite').partitionBy('year','month','symbol').saveAsTable('retail_analytics.portfolio.stock_returns')

stock_returns.createOrReplaceTempView('stock_returns')


In [0]:
#### MF RETURNS #####
mf_returns=spark.sql(f"""with cte as (
select scheme_code,* from portfolio_zerodha pft join amfi_data amfi on pft.isin=amfi.isin where pft.year={year_int} and pft.month={month_int} and symbol!='GOLDBEES')
select c.Symbol,c.invested_amount,c.total_value,round(c.total_value * monthly_return_pct / 100,2) AS profit_loss,monthly_return_pct,c.Year,c.Month from cte c join mf_price_history mf on c.scheme_code=mf.scheme_code and mf.year=c.year and mf.month=c.month
                     """)

mf_returns.write.mode('overwrite').partitionBy('year','month','Symbol').saveAsTable('retail_analytics.portfolio.mf_returns')
mf_returns.createOrReplaceTempView('mf_returns')

In [0]:
stock_portfolio=spark.sql("""with cte as (
select 'STOCKS' as portfolio_type,round(sum(invested_amount),2) as invested_amount,round(sum(total_value),2) as mnt_stock_portfolio_value,round(sum(total_value+profit_loss),2) as month_end_value,month,year from stock_returns
  group by month,year)

select portfolio_type,invested_amount,mnt_stock_portfolio_value,month_end_value,round(month_end_value-mnt_stock_portfolio_value,2) as month_profit_loss,round(month_profit_loss/month_end_value*100,2)as loss_profit_pct_month, month,year from cte""")
stock_portfolio.createOrReplaceTempView('stock_portfolio')
stock_portfolio.write.mode('overwrite').partitionBy('year','month','portfolio_type').saveAsTable('retail_analytics.portfolio.stock_portfolio')


In [0]:

mf_portfolio=spark.sql("""with cte as (select 'MF' as portfolio_type,round(sum(invested_amount),2) as invested_amount,round(sum(total_value),2) as mnt_stock_portfolio_value,round(sum(total_value+profit_loss),2) as month_end_value,month,year from mf_returns
  group by month,year)

select portfolio_type,invested_amount,mnt_stock_portfolio_value,month_end_value,round(month_end_value-mnt_stock_portfolio_value,2) as month_profit_loss,round(month_profit_loss/month_end_value*100,2)as loss_profit_pct_month, month,year from cte""")
mf_portfolio.createOrReplaceTempView('mf_portfolio')
mf_portfolio.write.mode("overwrite").partitionBy('year','month','portfolio_type').saveAsTable('retail_analytics.portfolio.mf_portfolio')

In [0]:
consolidated_df=spark.sql("""select * from stock_portfolio union all select * from mf_portfolio
                          """)
consolidated_df.write.mode('overwrite').partitionBy('year','month','portfolio_type').saveAsTable('retail_analytics.portfolio.consolidated_portfolio')

In [0]:
# %sql
# with cte as (
# select 'STOCKS' as portfolio_type,round(sum(invested_amount),2) as invested_amount,round(sum(total_value),2) as mnt_stock_portfolio_value,round(sum(total_value+profit_loss),2) as month_end_value,month,year from stock_returns
#   group by month,year),
# stc_returns as (
# select portfolio_type,invested_amount,mnt_stock_portfolio_value,month_end_value,round(month_end_value-mnt_stock_portfolio_value,2) as month_profit_loss,round(month_profit_loss/month_end_value*100,2)as loss_profit_pct_month, month,year from cte),
# cte2 as (
# select 'MF' as portfolio_type,round(sum(invested_amount),2) as invested_amount,round(sum(total_value),2) as mnt_stock_portfolio_value,round(sum(total_value+profit_loss),2) as month_end_value,month,year from mf_returns
#   group by month,year),
# mfc_returns as (
# select portfolio_type,invested_amount,mnt_stock_portfolio_value,month_end_value,round(month_end_value-mnt_stock_portfolio_value,2) as month_profit_loss,round(month_profit_loss/month_end_value*100,2)as loss_profit_pct_month, month,year from cte2),
# portfolio_returns as (
# select * from mfc_returns
# union 
# select * from stc_returns)
# select 'MF+STOCK' as portfolio_type,round(sum(invested_amount),2) as invested_amount,round(sum(mnt_stock_portfolio_value),2) as mnt_stock_portfolio_value,round(sum(month_end_value),2) as month_end_value,round(sum(month_profit_loss),2) as month_profit_loss,round(sum(loss_profit_pct_month),2) as loss_profit_pct_month,month,year from portfolio_returns
# group by month,year
# order by year,month
# --select * from portfolio_returns where portfolio_type='MF+STOCK'

In [0]:
%sql
select * from mf_returns

In [0]:
%sql



--select * from mf_returns
