In [12]:
import yfinance as yf
import pandas as pd
import datetime

# 1. Define the tickers and date range
tickers = ['BTC-USD', 'ETH-USD', 'XRP-USD', 'LTC-USD']
start_date = '2021-08-01'
end_date = datetime.date.today().strftime('%Y-%m-%d')

print(f"Downloading data for {', '.join(tickers)} from {start_date} to {end_date}...\n")

# 2. Download the data using yf.download()
# group_by='ticker' organizes the DataFrame so that each ticker is a top-level column.
# auto_adjust=True uses the Adjusted Close price, which is standard for stock data,
# but for crypto, Close and Adjusted Close are typically the same.
data = yf.download(
    tickers,
    start=start_date,
    end=end_date,
    group_by='ticker',
    interval='1d'  # Daily interval
)

# 3. Filter for only the 'Close' prices
# The DataFrame 'data' will have multiple columns per ticker (Open, High, Low, Close, Volume, etc.).
# We use a MultiIndex slicing technique to select the 'Close' column for ALL tickers.
# .dropna() removes any rows where a price might be missing for any of the cryptocurrencies.
closing_prices = data.loc[:, (slice(None), 'Close')]
closing_prices.columns = closing_prices.columns.droplevel(1) # Removes the 'Close' sub-header

# 4. Display the resulting DataFrame
print("--- Extracted Closing Prices ---")
print(closing_prices.head())
print("\n--- Summary Statistics ---")
print(closing_prices.describe())

# Optional: Save the data to a CSV file
closing_prices.to_csv('crypto_closing_prices.csv')

  data = yf.download(
[*********************100%***********************]  4 of 4 completed

Downloading data for BTC-USD, ETH-USD, XRP-USD, LTC-USD from 2021-08-01 to 2025-10-07...

--- Extracted Closing Prices ---
Ticker         LTC-USD      ETH-USD       BTC-USD   XRP-USD
Date                                                       
2021-08-01  140.735535  2561.852051  39974.894531  0.725922
2021-08-02  141.505508  2610.153320  39201.945312  0.739220
2021-08-03  138.290848  2502.349609  38152.980469  0.712729
2021-08-04  142.623047  2724.619873  39747.503906  0.730488
2021-08-05  143.435974  2827.328857  40869.554688  0.733151

--- Summary Statistics ---
Ticker      LTC-USD      ETH-USD        BTC-USD      XRP-USD
count   1528.000000  1528.000000    1528.000000  1528.000000
mean      94.232070  2550.642422   53373.729307     0.995993
std       36.534823   924.293819   29883.157512     0.831487
min       43.300301   993.636780   15787.284180     0.308090
25%       68.854666  1786.370026   27700.330566     0.480794
50%       85.525997  2486.989990   44391.462891     0.597126
75




In [16]:
daily_returns = closing_prices.pct_change()

# The first row will be NaN because there is no prior day's price to calculate the change from.

# Display the resulting DataFrame
print("--- Daily Percentage Returns ---")
# Display the first few rows (the first row is always NaN)
print(daily_returns.head()) 
print("\n--- Summary of Daily Returns ---")
# Show key statistics for the returns
print(daily_returns.describe().T)

# Optional: Save the returns data to a CSV filedaily_returns.to_csv('crypto_daily_returns.csv')

--- Daily Percentage Returns ---
Ticker       LTC-USD   ETH-USD   BTC-USD   XRP-USD
Date                                              
2021-08-01       NaN       NaN       NaN       NaN
2021-08-02  0.005471  0.018854 -0.019336  0.018319
2021-08-03 -0.022718 -0.041302 -0.026758 -0.035836
2021-08-04  0.031327  0.088825  0.041793  0.024917
2021-08-05  0.005700  0.037697  0.028229  0.003646

--- Summary of Daily Returns ---
          count      mean       std       min       25%       50%       75%  \
Ticker                                                                        
LTC-USD  1527.0  0.000724  0.040925 -0.188640 -0.019513  0.001045  0.020163   
ETH-USD  1527.0  0.001086  0.037233 -0.174564 -0.017203  0.000310  0.019182   
BTC-USD  1527.0  0.001138  0.028018 -0.159747 -0.011972 -0.000007  0.013723   
XRP-USD  1527.0  0.001929  0.046405 -0.195181 -0.018096  0.000276  0.018466   

              max  
Ticker             
LTC-USD  0.275898  
ETH-USD  0.217988  
BTC-USD  0.145412  
X

In [17]:

# 1. Format the Index (Date Column) to DD-MM-YYYY
daily_returns.index = daily_returns.index.strftime('%d-%m-%Y')

# 2. Save the data to a CSV file with:
#    - Comma (,) as the delimiter (sep=',')
#    - Comma (,) as the decimal separator (decimal=',')
file_name = 'crypto_daily_returns_european_format.csv'
daily_returns.to_csv(
    file_name, 
    sep=';',        # Use semicolon as the column delimiter
    decimal='.'     # Use comma as the decimal separator
)

# Display the head of the formatted data (will still show '.' in the console)
print("--- Formatted Daily Percentage Returns (European style CSV) ---")
print("Note: The console print uses '.', but the saved CSV uses ','.")
print("Here's how the file starts:")

# Read the first few lines of the file to confirm the content
with open(file_name, 'r') as f:
    lines = [next(f) for _ in range(6)]

for line in lines:
    # Print the line, removing the trailing newline character
    print(line.strip())

--- Formatted Daily Percentage Returns (European style CSV) ---
Note: The console print uses '.', but the saved CSV uses ','.
Here's how the file starts:
Date;LTC-USD;ETH-USD;BTC-USD;XRP-USD
01-08-2021;;;;
02-08-2021;0.005471068530768619;0.01885404331468732;-0.019335866368471732;0.01831882058798162
03-08-2021;-0.022717565417489594;-0.04130167760589376;-0.02675797936526192;-0.03583648321479649
04-08-2021;0.03132672310769391;0.0888246242008488;0.041792893187100555;0.024916939909721103
05-08-2021;0.005699830875203649;0.03769662894668069;0.02822946527400849;0.0036455305369686997


In [30]:
import numpy as np
import pandas as pd 

df = pd.read_excel('D:/OneDrive - Ministry of Digital Governance/Επιφάνεια εργασίας/new research projects/RF and LA/data_6_10.xlsx', sheet_name = 'indices returns', header =0).set_index('Dates')
df1 = pd.read_excel('D:/OneDrive - Ministry of Digital Governance/Επιφάνεια εργασίας/new research projects/RF and LA/data_6_10.xlsx', sheet_name = 'cryptos', header =0).set_index('Dates')


#print(df, df1)

aligned_df = pd.merge(
    df, 
    df1, 
    left_index=True,   # Use the index of the left DataFrame (indices returns)
    right_index=True,  # Use the index of the right DataFrame (crypto)
    how='inner'        # Keep only rows where the index exists in BOTH
)

#print("--- Original Daily Returns Dates (10 Days) ---")
#print(df.index.tolist())

#print("\n--- New Data Dates (17 Days) ---")
#print(df1.index.tolist())

print("\n--- Aligned DataFrame (Inner Merge Result) ---")
print(f"Original daily_returns rows: {len(df)}")
print(f"Aligned DataFrame rows: {len(aligned_df)}")
print(aligned_df.head(),aligned_df.describe().T )
aligned_df.to_csv('aligned_df.csv')


--- Aligned DataFrame (Inner Merge Result) ---
Original daily_returns rows: 1065
Aligned DataFrame rows: 1055
            SPX Index  LBUSTRUU Index  US0001M Index  RTY Index  \
Dates                                                             
2021-09-01   0.000312        0.000025       0.000003   0.005843   
2021-09-02   0.002843        0.001031       0.000003   0.007415   
2021-09-03  -0.000335       -0.001733       0.000003  -0.005195   
2021-09-06   0.000000        0.000000       0.000003   0.000000   
2021-09-07  -0.003395       -0.002304       0.000003  -0.007171   

            VIVAX US Equity  NAESX US Equity   BTC-USD   ETH-USD   XRP-USD  \
Dates                                                                        
2021-09-01        -0.002711         0.004979  0.035626  0.116810  0.039825   
2021-09-02         0.007432         0.007982  0.009841 -0.011432  0.017358   
2021-09-03        -0.003958        -0.002822  0.014143  0.039469  0.027947   
2021-09-06         0.000000  