<a href="https://colab.research.google.com/github/vlad-pirvu/random/blob/main/StockMonitor2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import csv
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
import time
import os
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
# Historical data going back for the following period (in years):
yearsPeriod = 2

In [None]:
# Extract stock historical data for all symbols
runFlag = True
if runFlag:
  # Read the CSV file
  df = pd.read_csv('USD_symbols.csv')
  symbols=df['symbol'].tolist()
  #symbols = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", 'NVDA']

  stocksFolder = "yFinanceStockData"
  # Check if the folder exists, create it if not
  if not os.path.exists(stocksFolder):
    os.makedirs(stocksFolder)
    print(f"Folder '{stocksFolder}' created!")
  else:
    print(f"Folder '{stocksFolder}' already exists.")

  print(f"Extracting historical data for {len(symbols)} symbols.")

  start_date = (datetime.today() - timedelta(days=yearsPeriod*365)).strftime('%Y-%m-%d')
  end_date = datetime.today().strftime('%Y-%m-%d')
  print(f"Time period: From {start_date} to {end_date}.")

  time.sleep(1)
  counter = 1
  for symbol in symbols:
    filename = "yFinanceStockData/" + symbol + ".csv"
    stockData = yf.download(symbol, start=start_date, end=end_date)
    stockData.columns = stockData.columns.droplevel(1)  # Drop the stock symbol level
    stockData.to_csv(filename,index=True)
    print(f"{str(counter)}: {symbol} extracted.")
    time.sleep(0.2)
    counter += 1

  print("Extraction completed.")

In [24]:
# Pool data into a date-indexed dataframe
runFlag = True
if runFlag:
  # 1. Define folder path and initialize an empty list for data
  folder_path = "yFinanceStockData/"
  combined_data = []

  # 2. Loop through files and extract only the "Close" column
  for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        symbol = filename.split(".")[0]  # Extract symbol from filename
        filepath = os.path.join(folder_path, filename)

        # Read only the "Close" column, set "Date" as index
        df = pd.read_csv(filepath, usecols=["Date", "Close"], parse_dates=["Date"], index_col="Date")
        df.rename(columns={"Close": symbol}, inplace=True)  # Rename "Close" to symbol
        combined_data.append(df)

  # 3. Combine all data into a single DataFrame
  all_data = pd.concat(combined_data, axis=1)  # Combine along columns
  print(f"Combined DataFrame shape: {all_data.shape}")

  print(all_data.head())

Combined DataFrame shape: (501, 2025)
                     UUUU   SONO        ZTO      OXLC       GDEN         CAR  \
Date                                                                           
2023-06-05 00:00:00  6.49  15.40  25.544962  3.452326  39.804394  167.047867   
2023-06-06 00:00:00  6.28  15.98  25.716789  3.492703  41.101673  175.389816   
2023-06-07 00:00:00  6.30  15.90  26.003168  3.465785  41.622410  182.430099   
2023-06-08 00:00:00  6.42  15.99  26.251362  3.465785  41.869076  185.261429   
2023-06-09 00:00:00  6.36  15.79  25.869524  3.438866  41.147350  183.608231   

                           IVZ       SBUX       NOVA        HAL  ...  \
Date                                                             ...   
2023-06-05 00:00:00  13.896261  95.153831  17.940001  29.889910  ...   
2023-06-06 00:00:00  14.257439  93.525574  17.709999  30.111673  ...   
2023-06-07 00:00:00  14.392879  93.239899  17.900000  31.384403  ...   
2023-06-08 00:00:00  14.510261  94.411125

In [25]:
#Clean-up the dataframe
runFlag = True
if runFlag:
  print(f"Initial DataFrame shape: {all_data.shape}")

  #Identify active stocks (with data in the past 7 days)
  cutoff_date = datetime.today() - timedelta(days=7)
  #all_data.index = pd.to_datetime(all_data.index).tz_convert(None)
  recent_data = all_data.loc[all_data.index >= cutoff_date]
  active_symbols = recent_data.dropna(axis=1, how="all").columns

  #Filter the original DataFrame to include only active stocks
  df = all_data[active_symbols]
  df.index = pd.to_datetime(df.index)
  print(f"Filtered DataFrame shape: {df.shape}")

  print(df.head())

  # Save to CSV
  df.to_csv("filtered_stock_data.csv")
  print("\nFiltered data saved to 'filtered_stock_data.csv'")

Initial DataFrame shape: (501, 2025)
Filtered DataFrame shape: (501, 1803)
            UUUU   SONO        ZTO      OXLC       GDEN         CAR  \
Date                                                                  
2023-06-05  6.49  15.40  25.544962  3.452326  39.804394  167.047867   
2023-06-06  6.28  15.98  25.716789  3.492703  41.101673  175.389816   
2023-06-07  6.30  15.90  26.003168  3.465785  41.622410  182.430099   
2023-06-08  6.42  15.99  26.251362  3.465785  41.869076  185.261429   
2023-06-09  6.36  15.79  25.869524  3.438866  41.147350  183.608231   

                  IVZ       SBUX       NOVA        HAL  ...         SF  \
Date                                                    ...              
2023-06-05  13.896261  95.153831  17.940001  29.889910  ...  55.234631   
2023-06-06  14.257439  93.525574  17.709999  30.111673  ...  56.368530   
2023-06-07  14.392879  93.239899  17.900000  31.384403  ...  57.560104   
2023-06-08  14.510261  94.411125  17.879999  30.998730  .

In [26]:
#Reading the mastertable data from its file
#Run this if it's the first runtime, you have the file, and you don't want to retrieve all data again
runFlag = True
if runFlag:
  big_data_file = "filtered_stock_data.csv"
  if not os.path.exists(big_data_file):
    print(f"File '{big_data_file}' does not exist. Data must be retrieved (check cells above.)")
  else:
    print(f"File '{big_data_file}' exists. Reading data...")
    df = pd.read_csv(big_data_file, index_col='Date', parse_dates=['Date'])
    df.index = pd.to_datetime(df.index)
    print(f"Dataframe shape: {df.shape}")


File 'filtered_stock_data.csv' exists. Reading data...
Dataframe shape: (501, 1803)


In [27]:
# Update with today's data
runFlag = True
if runFlag:
 symbols = df.columns.tolist()


In [28]:
# Filter the DataFrame to keep only data from the desired period
dateCutoff = datetime.today() - timedelta(days=yearsPeriod * 365)
df = df[df.index >= dateCutoff]

print(f"DataFrame shape after filtering: {df.shape}")

DataFrame shape after filtering: (500, 1803)


In [29]:
df.head()

Unnamed: 0_level_0,UUUU,SONO,ZTO,OXLC,GDEN,CAR,IVZ,SBUX,NOVA,HAL,...,SF,SPG,ASTS,RPM,NEU,TPH,ACHR,SMTC,RXRX,CLX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-06,6.28,15.98,25.716789,3.492703,41.101673,175.389816,14.257439,93.525574,17.709999,30.111673,...,56.36853,97.799675,5.83,79.88636,387.879944,32.43,3.12,22.09,8.63,148.375137
2023-06-07,6.3,15.9,26.003168,3.465785,41.62241,182.430099,14.392879,93.239899,17.9,31.384403,...,57.560104,100.962021,5.79,80.987434,390.248779,32.290001,3.14,22.43,9.07,146.672226
2023-06-08,6.42,15.99,26.251362,3.465785,41.869076,185.261429,14.510261,94.411125,17.879999,30.99873,...,56.30127,99.923714,5.8,80.446556,387.359955,32.209999,3.06,23.52,9.14,147.289749
2023-06-09,6.36,15.79,25.869524,3.438866,41.14735,183.608231,14.573467,93.277992,17.49,31.181929,...,56.493458,100.825394,5.75,80.098839,386.252594,32.380001,3.25,21.4,9.18,146.578659
2023-06-12,6.52,16.02,25.907707,3.432136,41.156483,190.886063,14.410938,93.754097,18.24,30.593767,...,55.974556,100.151405,5.87,80.765289,386.108124,33.16,3.77,22.82,9.85,145.717865


In [30]:
# Calculate the 10th and 90th percentiles for each stock (thresholds)
percentiles = {}
for stock in df.columns:
    percentiles[stock] = {
        "buy_low": np.percentile(df[stock], 10),
        "sell_high": np.percentile(df[stock], 90),
    }

In [31]:
def is_stock_going_up(df, stock_symbol, period=20): #20 trading days in one month
  """Determines if a stock is going up.
  Args:
    df: Pandas DataFrame containing stock prices.
    stock_symbol: The stock symbol to analyze.
    period: The number of days to look back for price change.
  Returns:
    True if the stock is going up, False otherwise.
  """
  current_price = df[stock_symbol].iloc[-1]  # Get the latest price
  previous_price = df[stock_symbol].iloc[-(period + 1)]  # Get price 'period' days ago
  price_change_percentage = (current_price - previous_price) / previous_price * 100

  return price_change_percentage > 0  # True if stock is rising

In [32]:
def above_percentage_threshold(df, stock, percentage_threshold = 10, days = 20):
  """Determines if a stock is above a percentage threshold of the max value.
  Args:
    df: Pandas DataFrame containing stock prices.
    stock: The stock symbol to analyze.
    percentage_threshold: The percentage threshold to check against.
    days: The number of days to look back for the max value.
  Returns:
    True if the stock is above the threshold, False otherwise.
  """
  historical_max = df[stock].max()
  today_price = df.loc[df.index[-1], stock]
  percentage = (today_price / historical_max) * 100
  #print(f"max: {historical_max}, today: {today_price}, percentage: {percentage}")
  if percentage > percentage_threshold:
    return True
  else:
    return False

In [33]:
# prompt: plot an entire stock column from the dataframe, marking where we are today (latest row) and also marking the 2 threshold from percentiles

def plotStock(stock, df):
  plt.figure(figsize=(10, 5))
  plt.plot(df.index, df[stock])

  # Mark today's price
  plt.scatter(df.index[-1], df[stock].iloc[-1], color='red', label='Today')

  # Mark the thresholds
  plt.axhline(y=percentiles[stock]['buy_low'], color='green', linestyle='--', label='Buy Threshold')
  plt.axhline(y=percentiles[stock]['sell_high'], color='red', linestyle='--', label='Sell Threshold')

  plt.xlabel('Date')
  plt.ylabel(stock + ' Price')
  plt.title(stock)
  plt.legend()
  plt.grid(True)
  plt.show()

In [34]:
# Adjustments
sell_hook = 0.85  # 15% drop
buy_hook = 1.15   # 15% rise
trend_days = 10 # duration from where we take the trend (past X days)

In [35]:
# Hooks:
# Sell now: Stocks that are higher than the threshold, but recently came down with 5% of the last 7day max
# Buy now: Stocks that are lower than the threshold, but recently came up with 10% of the last 7day min

# Hook monitoring data
recommendations = {"Buy Now": [], "Sell Now": []}

thisWeek = df.tail(7)
lastMonth = df.tail(30)
max_values = df.max()
min_values = df.min()
today = df.tail(1)

for stock in today.columns:
  trend = df.tail(trend_days)[stock].mean() # Filter based on this week's moving average trend
  price = today.loc[today.index[0],stock]
  thresholds = percentiles[stock]

  #print(stock, price, thresholds)

  #if stock == "CHRD": #TMDX": #"CHRD":
   # print(stock, price, thresholds, is_stock_going_up(df, stock), above_percentage_threshold(df, stock))


  # Sell hook logic
  if price > thresholds["sell_high"] and is_stock_going_up(df,stock, 20) == False: # and trend > price:
    if price <= max_values[stock] * sell_hook:
    #if price <= thresholds["sell_high"] * sell_hook:
      recommendations["Sell Now"].append((stock, price))

  # Buy hook logic
  if price < thresholds["buy_low"] and is_stock_going_up(df, stock) and above_percentage_threshold(df, stock): # and trend < price:
    if price >= min_values[stock] * buy_hook:
    #if price >= thresholds["buy_low"] * 0.90:
      recommendations["Buy Now"].append((stock, price))
      #print(stock, weekly_std, is_stock_going_up(df, stock, 20))
#      print(stock, flatline_check)



# Sort recommendations
#recommendations["Buy Now"].sort(key=lambda x: x[1])  # Closest to buy low threshold first
#recommendations["Buy Now"].sort(key=lambda x: x[1] - percentiles[x[0]]["buy_low"])
#recommendations["Sell Now"].sort(key=lambda x: -x[1])  # Furthest above sell high threshold first

#Results

In [36]:
# Display results summary & plots
print(f"Results calculated based on data spanning the last {yearsPeriod} years")
print(f"Buy Now: {len(recommendations['Buy Now'])} results")
print(*[symbol for symbol, price in recommendations['Buy Now']], sep=", ")

print(f"Sell Now: {len(recommendations['Sell Now'])} results")
print(*[symbol for symbol, price in recommendations['Sell Now']], sep=", ")

Results calculated based on data spanning the last 2 years
Buy Now: 95 results
SONO, NKE, UA, IMVT, ALTO, EPAM, STHO, VAC, SSTK, SVC, DIN, MTN, VTLE, AMKR, CRL, HIVE, HXL, LCII, MGA, SPT, QDEL, HVT, OXY, XRAY, CVE, KSS, BIIB, IIPR, NBR, ZYXI, CIVI, MRVI, NVO, OGN, FNKO, NWL, MEOH, FLNC, BC, APA, DV, ASRT, AMCX, SWK, CHRD, BLNK, CGNX, FLYW, KOS, ABCL, GERN, KNX, DBI, MBUU, RIG, NNOX, ASPN, LEA, TDC, GPRO, CPRI, ASO, GSM, VET, PBF, ALGN, VSH, SABR, WEX, ILMN, TXG, CRSP, IAS, SM, BORR, CE, XWEL, FIGS, HKD, QS, MX, DVN, PII, TROX, RNG, EXPI, ROG, FIVN, HOG, SYNA, WGO, GFS, MTDR, TER, PPG
Sell Now: 11 results
FINV, HMY, ROOT, TIGR, GTX, YI, CPS, TRVG, AXSM, TGTX, ERJ


##Buy Now:

In [22]:
runFlag = False
if runFlag:
  print("Buy now:")
  for stock, price in recommendations["Buy Now"]:
    plotStock(stock, df)
    print(end='\n')

##Sell Now:



In [None]:
runFlag = False
if runFlag:
  print("Sell now:")
  for stock, price in recommendations["Sell Now"]:
    plotStock(stock, df)
    print(end='\n')

In [None]:
#plotStock("TMDX", df)
#plotStock("CHRD", df)
#plotStock("MLGO", df)

In [None]:
# Write recommendations to output files

def write_results_to_csv(filename, data):
    # Check if the file exists
    if os.path.exists(filename):
        # Read existing data into a dictionary
        with open(filename, "r") as file:
            reader = csv.reader(file)
            existing_data = {row[0]: row[1:] for row in reader}
    else:
        existing_data = {}

    # Update today's data
    existing_data[today] = [f"'{stock}':{round(price,2)}" for stock, price in data]

    # Write the updated data back to the CSV
    with open(filename, "w", newline="") as file:
        writer = csv.writer(file)
        for date, stocks in existing_data.items():
            writer.writerow([date] + stocks)

# Get the current date
today = datetime.now().strftime("%Y-%m-%d")

# Process each category
for category, stocks in recommendations.items():
    filename = f"{category.replace(' ', '_')}.csv"  # e.g., Buy_Now.csv
    write_results_to_csv(filename, stocks)

print("Data written to CSV files successfully!")

Data written to CSV files successfully!


In [None]:
# Notify if a buy-sell cycle got completed (and output the profit per stock)

def calculate_average_profit(buy_filename, sell_data):
    # Dictionary to store buy prices for each stock
    buy_prices = {}

    # Read the Buy Now file
    if os.path.exists(buy_filename):
        with open(buy_filename, "r") as file:
            reader = csv.reader(file)
            for row in reader:
                # Each row contains a date followed by stock-price pairs
                for stock_price in row[1:]:
                    stock, price = stock_price.split(":")
                    stock = stock.strip("'")  # Remove quotes
                    price = float(price)
                    if stock not in buy_prices:
                        buy_prices[stock] = []
                    buy_prices[stock].append(price)
    else:
        print(f"Buy file {buy_filename} does not exist!")
        return

    # Calculate average profit for stocks in Sell Now
    average_profits = {}
    for stock, sell_price in sell_data:
        if stock in buy_prices:
            avg_buy_price = sum(buy_prices[stock]) / len(buy_prices[stock])
            profit = sell_price - avg_buy_price
            average_profits[stock] = profit

    # Output results
    for stock, profit in average_profits.items():
        print(f"{stock}: Profit per stock = {profit:.2f}")

# Example usage
buy_now_filename = "Buy_Now.csv"
sell_now_data = recommendations["Sell Now"]

calculate_average_profit(buy_now_filename, sell_now_data)