This notebooks allows for the creation of sample ticker sets with category labels such as asset class, market cap, sector, etc.

In [3]:
import os
import pandas as pd
import random

Method 1: Trading View Screens Export

Step 1: Define parameters for building the sample ticker set

In [4]:
# Screen date for TradingView Screens data
screen_date = "02_24_25"
# What market caps should be included in the sample data
# market_caps = ["MICROCAP", "SMALLCAP", "MIDCAP", "LARGECAP", "MEGACAP"]
market_caps = ["MIDCAP", "LARGECAP", "MEGACAP"]

# How many samples for each market cap
n_samples_by_mktcap = 33

In [5]:
# Read all files in TradingView Screens folder with specified filters
# Define the folder path
folder_path = "TV_Screens"

# Create an empty list to store dataframes
dfs = []

# Loop through each market cap and read the corresponding files
for market_cap in market_caps:
    file_pattern = f"TV_SCREEN_US_STOCK_{market_cap}_{screen_date}.csv"
    file_path = os.path.join(os.getcwd(), folder_path, file_pattern)
    df = pd.read_csv(file_path)
    df['Market cap tag'] = market_cap
    dfs.append(df)

# Combine all dataframes into one
combined_df = pd.concat(dfs, ignore_index=True)

In [6]:
sample_ticker_list = []
# For each market cap
for market_cap in market_caps:
    # Filter the data
    temp_list = []
    filtered_df = combined_df[combined_df['Market cap tag'] == market_cap]
    # For each sector in the filtered data
    for sector in filtered_df['Sector'].unique():
        # Filter the data
        sector_df = filtered_df[filtered_df['Sector'] == sector]
        # Sample upto 2 tickers from the filtered data or all if less than 2
        for ticker in sector_df.sample(n=min(2, len(sector_df)))['Symbol']:
            temp_list.append({"ticker": ticker, "sector": sector, "market_cap": market_cap})

    sample_ticker_list.append(random.sample(temp_list, min(n_samples_by_mktcap, len(temp_list))))

flattened_ticker_list = [ticker for sublist in sample_ticker_list for ticker in sublist]

In [7]:
print(f"Number of tickers in sample = {len(flattened_ticker_list)}")

Number of tickers in sample = 87


In [8]:
flattened_ticker_list = flattened_ticker_list*3

Step 2: Define the parameters to fetch data for the sample ticker set

In [9]:
# Define the bar size for the sample data
bar_size = '1 hour'
# Define the duration and years_in_sample
duration = '1 Y' 
years_in_sample = 1 # Integer value - lowest value is 1

In [10]:
import nest_asyncio
nest_asyncio.apply()

import os
import sys

sys.path.append(os.path.join(os.getcwd(), "../.."))

from jarjarquant import Jarjarquant
jjq = Jarjarquant()

Error 10275, reqId -1: Positions info is not available for account(s): U12104351 until the application is finished and approved.


In [None]:
results = jjq.data_gatherer.get_random_price_samples_tws(years_in_sample=years_in_sample, tickers=[flattened_ticker_list[i]['ticker'] for i in range(len(flattened_ticker_list))], num_tickers_to_sample=len(flattened_ticker_list), bar_size=bar_size, duration=duration, verbose=True)

In [12]:
flat_results = {key: value for result in results for key, value in result.items()}
ticker_data = {item['ticker']: {'market_cap': item['market_cap'], 'sector': item['sector']} for item in flattened_ticker_list}

sampled_ticker_data = {}
for k,v in flat_results.items():
    sampled_ticker_data[k] = ticker_data[k]

In [None]:
len(results)

71

Step 3: Name the sample and save sample data and metadata

In [29]:
from datetime import datetime
today_date = datetime.today().strftime('%Y-%m-%d')
timestamp = datetime.now().strftime('%H-%M-%S')

# Customize the folder name
sample_folder_name = f"1hour_midcap+_yearly_samples_172" 
os.makedirs(f"data/{sample_folder_name}", exist_ok=True)

In [30]:
keys_so_far = set()
suffix = 0
for result in results:
    for k,v in result.items():
        # Check if k is in keys_so_far
        if k in keys_so_far:
            # Save file with a random suffix
            # suffix = random.randint(1, 99)
            v.to_csv(f"data/{sample_folder_name}/{ticker_data[k]['market_cap']}_{ticker_data[k]['sector']}_{bar_size}_{k}_{suffix}.csv")
            suffix += 1
        else:
            keys_so_far.add(k)
            v.to_csv(f"data/{sample_folder_name}/{ticker_data[k]['market_cap']}_{ticker_data[k]['sector']}_{bar_size}_{k}.csv")

In [31]:
# Pickle the ticker data
import pickle
with open(f"data/{sample_folder_name}/ticker_data_{bar_size}_{today_date}.pkl", "wb") as f:
    pickle.dump(sampled_ticker_data, f)