In [32]:
import requests
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
import datetime
import warnings



# Load environment variables from .env file
load_dotenv()

# Use os.getenv to get the API key from your .env file
API_KEY = os.getenv('API_KEY')
BASE_URL = 'https://financialmodelingprep.com/api/v3/'

# Function to fetch quarterly financial ratios for a given ticker
def fetch_financial_ratios(ticker):
    current_year = datetime.datetime.now().year
    start_year = current_year - 6

    url = f"https://financialmodelingprep.com/api/v3/ratios/{ticker}?period=quarter&from={start_year}&to={current_year}&apikey={API_KEY}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        # Create DataFrame from the fetched data
        df = pd.DataFrame(data)
        # Convert the 'date' column to datetime format for filtering
        df['date'] = pd.to_datetime(df['date'])
        # Filter the DataFrame to ensure it only contains data for the last 6 years
        df = df[df['date'].dt.year >= start_year]
        return df
    else:
        print("Failed to fetch data:", response.status_code)
        return pd.DataFrame()




### historical s&p constituents. 

In [5]:
def fetch_historical_changes(api_key):
    url = f"https://financialmodelingprep.com/api/v3/historical/sp500_constituent?apikey={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data)
    else:
        print(f"Failed to fetch historical changes: {response.status_code}")
        return pd.DataFrame()

historical_changes = fetch_historical_changes(API_KEY)

### current s&p consitituents

In [6]:
def fetch_current_constituents(api_key):
    url = f"https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data)
    else:
        print(f"Failed to fetch current constituents: {response.status_code}")
        return pd.DataFrame()

current_constituents = fetch_current_constituents(API_KEY)


### reconstructing constituents

In [7]:
# Placeholder function to calculate quarter dates for the last 6 years
def calculate_quarter_dates():
    today = datetime.date.today()
    start_year = today.year - 6
    quarters = []
    for year in range(start_year, today.year + 1):
        for quarter in range(1, 5):
            quarter_date = datetime.date(year, 3 * quarter - 2, 1)
            if quarter_date < today:
                quarters.append(quarter_date)
    return quarters

# Reconstruct constituents list
def reconstruct_constituents(historical_changes, current_constituents):
    quarters = calculate_quarter_dates()
    constituents_by_quarter = {}
    current_list = set(current_constituents['symbol'])

    for quarter_end in reversed(quarters):
        for _, change in historical_changes.iterrows():
            change_date = datetime.datetime.strptime(change['date'], "%Y-%m-%d").date()
            if change_date > quarter_end:
                if change['addedSecurity']:
                    current_list.discard(change['symbol'])
                if change['removedTicker']:
                    current_list.add(change['removedTicker'])
        constituents_by_quarter[quarter_end] = current_list.copy()
    return constituents_by_quarter

constituents_by_quarter = reconstruct_constituents(historical_changes, current_constituents)


In [8]:
master_stock_set = set()
for quarter_end, stocks in constituents_by_quarter.items():
    master_stock_set.update(stocks)
master_stock_list = list(master_stock_set)

In [9]:
len(master_stock_list)


612

### create table and upload to snowflake

In [10]:
import pandas as pd
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas

load_dotenv()
SNF_USER = os.getenv('snf_user') 
SNF_PASSWORD= os.getenv('snf_password')
SNF_ACCOUNT= os.getenv('snf_account_name')
SNF_WAREHOUSE= os.getenv('snf_warehouse')
SNF_DATABASE=os.getenv('snf_db')

# Function to upload financial ratios data to Snowflake
def upload_financial_ratios_to_snowflake(df, ticker):
    # Snowflake connection details
    ctx = snowflake.connector.connect(
        user=SNF_USER,
        password=SNF_PASSWORD,
        account=SNF_ACCOUNT,
        warehouse=SNF_WAREHOUSE,
        database=SNF_DATABASE,
        schema='PUBLIC'
    )
    
    # Create cursor object
    cs = ctx.cursor()
    
    # Create schema and table if they don't exist
    try:
        cs.execute(f"CREATE SCHEMA IF NOT EXISTS PUBLIC")
        create_table_query = f"""
        CREATE TABLE IF NOT EXISTS FINANCIAL_RATIOS (
            symbol VARCHAR,
            date DATE,
            calendarYear INT,
            period VARCHAR,
            currentRatio FLOAT,
            quickRatio FLOAT,
            cashRatio FLOAT,
            daysOfSalesOutstanding FLOAT,
            daysOfInventoryOutstanding FLOAT,
            operatingCycle FLOAT,
            daysOfPayablesOutstanding FLOAT,
            cashConversionCycle FLOAT,
            grossProfitMargin FLOAT,
            operatingProfitMargin FLOAT,
            pretaxProfitMargin FLOAT,
            netProfitMargin FLOAT,
            effectiveTaxRate FLOAT,
            returnOnAssets FLOAT,
            returnOnEquity FLOAT,
            returnOnCapitalEmployed FLOAT,
            netIncomePerEBT FLOAT,
            ebtPerEbit FLOAT,
            ebitPerRevenue FLOAT,
            debtRatio FLOAT,
            debtEquityRatio FLOAT,
            longTermDebtToCapitalization FLOAT,
            totalDebtToCapitalization FLOAT,
            interestCoverage FLOAT,
            cashFlowToDebtRatio FLOAT,
            companyEquityMultiplier FLOAT,
            receivablesTurnover FLOAT,
            payablesTurnover FLOAT,
            inventoryTurnover FLOAT,
            fixedAssetTurnover FLOAT,
            assetTurnover FLOAT,
            operatingCashFlowPerShare FLOAT,
            freeCashFlowPerShare FLOAT,
            cashPerShare FLOAT,
            payoutRatio FLOAT,
            operatingCashFlowSalesRatio FLOAT,
            freeCashFlowOperatingCashFlowRatio FLOAT,
            cashFlowCoverageRatios FLOAT,
            shortTermCoverageRatios FLOAT,
            capitalExpenditureCoverageRatio FLOAT,
            dividendPaidAndCapexCoverageRatio FLOAT,
            dividendPayoutRatio FLOAT,
            priceBookValueRatio FLOAT,
            priceToBookRatio FLOAT,
            priceToSalesRatio FLOAT,
            priceEarningsRatio FLOAT,
            priceToFreeCashFlowsRatio FLOAT,
            priceToOperatingCashFlowsRatio FLOAT,
            priceCashFlowRatio FLOAT,
            priceEarningsToGrowthRatio FLOAT,
            priceSalesRatio FLOAT,
            dividendYield FLOAT,
            enterpriseValueMultiple FLOAT,
            priceFairValue FLOAT
            SNAPSHOT_DATE DATE
        )
        """
        cs.execute(create_table_query)
    except Exception as e:
        print(f"Error creating schema/table: {e}")
    
    # Upload DataFrame to Snowflake
    try:
        write_pandas(ctx, df, 'FINANCIAL_RATIOS')
        print(f"Data for {ticker} uploaded successfully.")
    except Exception as e:
        print(f"Error uploading data: {e}")
    
    # Close cursor and connection
    cs.close()
    ctx.close()



### PROCESS ALL STOCKS IN LIST AND UPLOAD TO SNOWFLAKE;

In [34]:
import time
warnings.filterwarnings('ignore', category=UserWarning)
def process_and_upload_snf(stock_list):
    load_dotenv() # loading .env variables
    ctx = snowflake.connector.connect(
    user=SNF_USER,
    password=SNF_PASSWORD,
    account="jlicobf-hb49222",
    warehouse=SNF_WAREHOUSE,
    database=SNF_DATABASE,
    schema='PUBLIC'
)
# Process stocks in batches to respect the API limit
    batch_size = 100  # API limit
    for i in range(0, len(stock_list), batch_size):
        batch = stock_list[i:i+batch_size]
        for ticker in batch:
            try:
                df = fetch_financial_ratios(ticker)  # Your function to fetch data
                df.columns = [column.upper() for column in df.columns]  # Adjust as necessary to match Snowflake's schema
                df['DATE'] = pd.to_datetime(df['DATE'],errors='coerce').dt.date
                df['SNAPSHOT_DATE'] = datetime.datetime.now().date() 
                
                if not df.empty:
                    write_pandas(ctx, df, 'FINANCIAL_RATIOS')  # Table name in Snowflake
                    print(f"Uploaded data for {ticker}")
                else:
                    print(f"No data fetched for {ticker}")
            except Exception as e:
                print(f"Error processing {ticker}: {e}")
        
        # Wait for a minute before processing the next batch
        if i + batch_size < len(stock_list):  # Avoid sleeping after the last batch
            print("Waiting to respect API rate limit...")
            time.sleep(60)  # Sleep for 60 seconds

    # Close the Snowflake connection
    ctx.close()