Step1: Read yfinance data from gcs data lake

!pip install google-cloud-storage

!pip install google-cloud-bigquery

In [45]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, input_file_name, regexp_extract
from pyspark.sql import functions as F
from pyspark.sql.window import Window
import pandas as pd
from google.cloud import storage
from google.cloud import bigquery
import os

In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "C:\\Users\\nbyin\\.gc\\credentials\\google_credentials.json"

client = storage.Client()
buckets = list(client.list_buckets())
print("Available Buckets:", [bucket.name for bucket in buckets])

Available Buckets: ['yfinance-data-lake']


In [52]:
spark = SparkSession.builder \
    .appName("StockAnalysis") \
    .config("spark.jars", "C:\\Users\\nbyin\\5_spark\\spark-3.3.2-bin-hadoop3\\jars\\spark-3.3-bigquery-0.42.1.jar") \
    .getOrCreate()

In [4]:
wiki_path = "gs://yfinance-data-lake/nasdaq_100_data/*.csv"
nasdaq_100_df = spark.read.option("header", "true").csv(wiki_path)
nasdaq_100_df.show()

+--------------------+------+--------------------+--------------------+
|             Company|Ticker|         GICS Sector|   GICS Sub-Industry|
+--------------------+------+--------------------+--------------------+
|          Adobe Inc.|  ADBE|Information Techn...|Application Software|
|Advanced Micro De...|   AMD|Information Techn...|      Semiconductors|
|              Airbnb|  ABNB|Consumer Discreti...|Hotels, Resorts &...|
|Alphabet Inc. (Cl...| GOOGL|Communication Ser...|Interactive Media...|
|Alphabet Inc. (Cl...|  GOOG|Communication Ser...|Interactive Media...|
|              Amazon|  AMZN|Consumer Discreti...|    Broadline Retail|
|American Electric...|   AEP|           Utilities|  Electric Utilities|
|               Amgen|  AMGN|         Health Care|       Biotechnology|
|      Analog Devices|   ADI|Information Techn...|      Semiconductors|
|               Ansys|  ANSS|Information Techn...|Application Software|
|          Apple Inc.|  AAPL|Information Techn...|Technology Har

In [5]:
ticker_list = [row["Ticker"] for row in nasdaq_100_df.select("Ticker").distinct().collect()]
print(ticker_list)

['CDW', 'CRWD', 'EA', 'ARM', 'APP', 'AAPL', 'TTWO', 'CSCO', 'ADI', 'MCHP', 'ROP', 'GILD', 'AMGN', 'QCOM', 'CEG', 'MNST', 'PANW', 'META', 'TSLA', 'INTC', 'MDB', 'INTU', 'ROST', 'CCEP', 'CTAS', 'ANSS', 'PAYX', 'CSX', 'ADP', 'VRTX', 'PDD', 'DXCM', 'GEHC', 'TTD', 'AZN', 'GOOG', 'PCAR', 'NFLX', 'COST', 'FANG', 'ISRG', 'AXON', 'EXC', 'ABNB', 'KLAC', 'AEP', 'WBD', 'DASH', 'LIN', 'TMUS', 'DDOG', 'NXPI', 'GOOGL', 'MSTR', 'GFS', 'KDP', 'TEAM', 'MAR', 'PLTR', 'ADSK', 'LRCX', 'CSGP', 'MRVL', 'TXN', 'WDAY', 'AVGO', 'AMAT', 'AMD', 'IDXX', 'ON', 'SBUX', 'VRSK', 'MU', 'CTSH', 'CDNS', 'CMCSA', 'CPRT', 'ORLY', 'PYPL', 'ODFL', 'BKR', 'REGN', 'KHC', 'ZS', 'LULU', 'AMZN', 'BIIB', 'MSFT', 'MDLZ', 'HON', 'ADBE', 'BKNG', 'FAST', 'FTNT', 'SNPS', 'ASML', 'PEP', 'NVDA', 'XEL', 'CHTR', 'MELI']


In [6]:
stocks_path = "gs://yfinance-data-lake/stocks/*/*.parquet"
yfinance_data = spark.read.parquet(stocks_path)
yfinance_data.show(5)

+--------------------+------------------+-----------------+------------------+------------------+---------+---------+------------+
|                Date|              Open|             High|               Low|             Close|   Volume|Dividends|Stock Splits|
+--------------------+------------------+-----------------+------------------+------------------+---------+---------+------------+
|2024-01-02 00:00:...| 49.22551389912714|49.27649344260116| 47.57713465197093| 48.14991760253906|411254000|      0.0|         0.0|
|2024-01-03 00:00:...| 47.46717554449256|48.16591065021649|47.302236567517355|47.551143646240234|320896000|      0.0|         0.0|
|2024-01-04 00:00:...| 47.74906818641796|48.48179475152094| 47.49016653492504|47.979984283447266|306535000|      0.0|         0.0|
|2024-01-05 00:00:...| 48.44380758245641|49.52839932284036| 48.28786410604074| 49.07856750488281|415039000|      0.0|         0.0|
|2024-01-08 00:00:...|49.493414803058315| 52.2553780928012| 49.46042624518818| 52.2

In [7]:
yfinance_with_ticker = yfinance_data.withColumn(
    "ticker", regexp_extract(input_file_name(), r".*/stocks/([^/]+)/.*", 1)
)
yfinance_with_ticker.show(5)

+--------------------+------------------+-----------------+------------------+------------------+---------+---------+------------+------+
|                Date|              Open|             High|               Low|             Close|   Volume|Dividends|Stock Splits|ticker|
+--------------------+------------------+-----------------+------------------+------------------+---------+---------+------------+------+
|2024-01-02 00:00:...| 49.22551389912714|49.27649344260116| 47.57713465197093| 48.14991760253906|411254000|      0.0|         0.0|  NVDA|
|2024-01-03 00:00:...| 47.46717554449256|48.16591065021649|47.302236567517355|47.551143646240234|320896000|      0.0|         0.0|  NVDA|
|2024-01-04 00:00:...| 47.74906818641796|48.48179475152094| 47.49016653492504|47.979984283447266|306535000|      0.0|         0.0|  NVDA|
|2024-01-05 00:00:...| 48.44380758245641|49.52839932284036| 48.28786410604074| 49.07856750488281|415039000|      0.0|         0.0|  NVDA|
|2024-01-08 00:00:...|49.493414803

In [8]:
yfinance_with_ticker = yfinance_with_ticker.withColumn("Date", F.to_date("Date"))
yfinance_with_ticker.show(5)

+----------+------------------+-----------------+------------------+------------------+---------+---------+------------+------+
|      Date|              Open|             High|               Low|             Close|   Volume|Dividends|Stock Splits|ticker|
+----------+------------------+-----------------+------------------+------------------+---------+---------+------------+------+
|2024-01-02| 49.22551389912714|49.27649344260116| 47.57713465197093| 48.14991760253906|411254000|      0.0|         0.0|  NVDA|
|2024-01-03| 47.46717554449256|48.16591065021649|47.302236567517355|47.551143646240234|320896000|      0.0|         0.0|  NVDA|
|2024-01-04| 47.74906818641796|48.48179475152094| 47.49016653492504|47.979984283447266|306535000|      0.0|         0.0|  NVDA|
|2024-01-05| 48.44380758245641|49.52839932284036| 48.28786410604074| 49.07856750488281|415039000|      0.0|         0.0|  NVDA|
|2024-01-08|49.493414803058315| 52.2553780928012| 49.46042624518818| 52.23338317871094|642510000|      0

In [9]:
# eg.take a glimpse at APPL
df_spark_appl = yfinance_with_ticker.filter(col("ticker") == "AAPL")
df_spark_appl.show()

+----------+-----------------+-----------------+-----------------+-----------------+---------+---------+------------+------+
|      Date|             Open|             High|              Low|            Close|   Volume|Dividends|Stock Splits|ticker|
+----------+-----------------+-----------------+-----------------+-----------------+---------+---------+------------+------+
|2020-01-02|71.72100391407992|72.77658292273667|71.46679725362982|72.71605682373047|135480400|      0.0|         0.0|  AAPL|
|2020-01-03|71.94132818321717|72.77174458511139| 71.7839617851537|72.00911712646484|146322800|      0.0|         0.0|  AAPL|
|2020-01-06|71.12786596061405|72.62164622763687|70.87607527260708| 72.5829086303711|118387200|      0.0|         0.0|  AAPL|
|2020-01-07|72.59260129853506|72.84923143823697|72.02123831231323| 72.2415542602539|108872000|      0.0|         0.0|  AAPL|
|2020-01-08| 71.9437663243162|73.70628659812324| 71.9437663243162|73.40365600585938|132079200|      0.0|         0.0|  AAPL|


Step2: Join yfinance data with nasdaq_100_data from wikipedia

In [10]:
yfinance_with_ticker = yfinance_with_ticker.withColumnRenamed("ticker", "ticker_yf")

In [11]:
combined_df = yfinance_with_ticker.join(nasdaq_100_df, yfinance_with_ticker["ticker_yf"] == nasdaq_100_df["Ticker"], how="inner")
combined_df.show(5)

+----------+------------------+-----------------+------------------+------------------+---------+---------+------------+---------+-------+------+--------------------+-----------------+
|      Date|              Open|             High|               Low|             Close|   Volume|Dividends|Stock Splits|ticker_yf|Company|Ticker|         GICS Sector|GICS Sub-Industry|
+----------+------------------+-----------------+------------------+------------------+---------+---------+------------+---------+-------+------+--------------------+-----------------+
|2024-01-02| 49.22551389912714|49.27649344260116| 47.57713465197093| 48.14991760253906|411254000|      0.0|         0.0|     NVDA| Nvidia|  NVDA|Information Techn...|   Semiconductors|
|2024-01-03| 47.46717554449256|48.16591065021649|47.302236567517355|47.551143646240234|320896000|      0.0|         0.0|     NVDA| Nvidia|  NVDA|Information Techn...|   Semiconductors|
|2024-01-04| 47.74906818641796|48.48179475152094| 47.49016653492504|47.9799

In [12]:
data = combined_df.select(
    col("Company").alias("company_name"),
    col("Ticker"), 
    col("Date"), 
    col("Open"), 
    col("Close"), 
    col("GICS Sector").alias("sector"),
    col("GICS Sub-Industry").alias("sub_industry")
)
data.show(5)

+------------+------+----------+------------------+------------------+--------------------+--------------+
|company_name|Ticker|      Date|              Open|             Close|              sector|  sub_industry|
+------------+------+----------+------------------+------------------+--------------------+--------------+
|      Nvidia|  NVDA|2024-01-02| 49.22551389912714| 48.14991760253906|Information Techn...|Semiconductors|
|      Nvidia|  NVDA|2024-01-03| 47.46717554449256|47.551143646240234|Information Techn...|Semiconductors|
|      Nvidia|  NVDA|2024-01-04| 47.74906818641796|47.979984283447266|Information Techn...|Semiconductors|
|      Nvidia|  NVDA|2024-01-05| 48.44380758245641| 49.07856750488281|Information Techn...|Semiconductors|
|      Nvidia|  NVDA|2024-01-08|49.493414803058315| 52.23338317871094|Information Techn...|Semiconductors|
+------------+------+----------+------------------+------------------+--------------------+--------------+
only showing top 5 rows



Step3: Group by sector & sub-industry

In [15]:
sector_count = data.groupBy("sector").count()
sector_count.show()

distinct_sectors = data.select("sector").distinct()
distinct_sectors.count()

+--------------------+-----+
|              sector|count|
+--------------------+-----+
|Communication Ser...|27178|
|Information Techn...|91948|
|Consumer Discreti...|26239|
|         Industrials|27610|
|         Health Care|23102|
|              Energy| 5020|
|           Utilities| 8271|
|    Consumer Staples|17444|
|           Materials| 2510|
|          Financials| 2384|
|         Real Estate| 2510|
+--------------------+-----+



11

In [16]:
sub_industry_count = data.groupBy("sub_industry").count()
sub_industry_count.show()

distinct_industry = data.select("sub_industry").distinct()
distinct_industry.count()

+--------------------+-----+
|        sub_industry|count|
+--------------------+-----+
|Interactive Media...| 7530|
|Technology Hardwa...| 2510|
|    Systems Software|10731|
|      Semiconductors|28732|
|Semiconductor Equ...|10040|
|         Restaurants| 2510|
|   Cable & Satellite| 5020|
| Rail Transportation| 2510|
|Application Software|28828|
|Communications Eq...| 2510|
|       Biotechnology|12550|
|  Electric Utilities| 5761|
|Oil & Gas Explora...| 2510|
|Industrial Conglo...| 2510|
|Construction Mach...| 2510|
|Soft Drinks & Non...|10040|
|Hotels, Resorts &...| 6037|
|Wireless Telecomm...| 2510|
|     Pharmaceuticals| 2510|
|Consumer Staples ...| 2510|
+--------------------+-----+
only showing top 20 rows



47

In [17]:
sector_daily_price = data.groupBy("Date", "sector").agg(
    F.avg("Open").alias("avg_open"),
    F.avg("Close").alias("avg_close")
)

sector_daily_price.show(5)

+----------+--------------------+------------------+------------------+
|      Date|              sector|          avg_open|         avg_close|
+----------+--------------------+------------------+------------------+
|2024-04-19|Information Techn...|220.32059288280567|215.98012184515233|
|2020-10-22|Information Techn...| 140.5777272669553|139.50613440965353|
|2020-12-22|Information Techn...|164.06603523318208| 165.7427168143423|
|2021-07-30|Information Techn...|193.60630756023656|195.60227213150415|
|2021-10-06|Information Techn...|  193.529562942009| 197.0843870700934|
+----------+--------------------+------------------+------------------+
only showing top 5 rows



In [18]:
sub_industry_daily_price = data.groupBy("Date", "sub_industry").agg(
    F.avg("Open").alias("avg_open"),
    F.avg("Close").alias("avg_close")
)

sub_industry_daily_price.show(5)

+----------+--------------------+------------------+------------------+
|      Date|        sub_industry|          avg_open|         avg_close|
+----------+--------------------+------------------+------------------+
|2020-03-16|      Semiconductors|44.265391969379465| 41.81771997971968|
|2020-05-08|      Semiconductors|54.771261831942304| 55.84048639644276|
|2020-05-22|      Semiconductors| 55.02228055985577|55.147816224531695|
|2020-06-17|      Semiconductors| 61.50840786548904| 61.45337989113548|
|2020-03-09|Technology Hardwa...| 64.00658563730508|  64.5938720703125|
+----------+--------------------+------------------+------------------+
only showing top 5 rows



Step4: Daily return and moving average

In [22]:
data = data.withColumn("daily_return", (col("Close") - col("Open")) / col("Open"))
data = data.withColumn("rolling_50_day_avg", F.avg("Close").over(Window.partitionBy("ticker").orderBy("Date").rowsBetween(-50, 0)))
data = data.withColumn("rolling_100_day_avg", F.avg("Close").over(Window.partitionBy("ticker").orderBy("Date").rowsBetween(-100, 0)))
data = data.withColumn("rolling_200_day_avg", F.avg("Close").over(Window.partitionBy("ticker").orderBy("Date").rowsBetween(-200, 0)))
data.show(5)

+------------+------+----------+------------------+------------------+--------------------+--------------------+--------------------+------------------+-------------------+-------------------+
|company_name|Ticker|      Date|              Open|             Close|              sector|        sub_industry|        daily_return|rolling_50_day_avg|rolling_100_day_avg|rolling_200_day_avg|
+------------+------+----------+------------------+------------------+--------------------+--------------------+--------------------+------------------+-------------------+-------------------+
|  Apple Inc.|  AAPL|2015-01-02| 24.77868056866345|  24.3204345703125|Information Techn...|Technology Hardwa...|-0.01849355929510...|  24.3204345703125|   24.3204345703125|   24.3204345703125|
|  Apple Inc.|  AAPL|2015-01-05| 24.08908208842444|23.635284423828125|Information Techn...|Technology Hardwa...|-0.01883831284772696|23.977859497070312| 23.977859497070312| 23.977859497070312|
|  Apple Inc.|  AAPL|2015-01-06|23.

In [27]:
sector_daily_price = sector_daily_price.withColumn("daily_return", (col("avg_close") - col("avg_open")) / col("avg_open"))
sector_daily_price.show(5)

sub_industry_daily_price = sub_industry_daily_price.withColumn("daily_return", (col("avg_close") - col("avg_open")) / col("avg_open"))
sub_industry_daily_price.show(5)

+----------+--------------------+------------------+------------------+--------------------+
|      Date|              sector|          avg_open|         avg_close|        daily_return|
+----------+--------------------+------------------+------------------+--------------------+
|2024-04-19|Information Techn...|220.32059288280567|215.98012184515233|-0.01970070514453...|
|2020-10-22|Information Techn...| 140.5777272669553|139.50613440965353|-0.00762277835995...|
|2020-12-22|Information Techn...|164.06603523318208| 165.7427168143423| 0.01021955323523966|
|2021-07-30|Information Techn...|193.60630756023656|195.60227213150415|0.010309398471672155|
|2021-10-06|Information Techn...|  193.529562942009| 197.0843870700934| 0.01836837780256663|
+----------+--------------------+------------------+------------------+--------------------+
only showing top 5 rows

+----------+--------------------+------------------+------------------+--------------------+
|      Date|        sub_industry|          av

Step4: calculate relative return starting Jan 2015

In [25]:
base_date = "2015-01-02"

In [26]:
base_data = data.filter(col("Date") == base_date)
base_data = base_data.select("ticker", "Close").withColumnRenamed("Close", "base_Close")

data = data.join(base_data, on="ticker", how="left")
data = data.withColumn("relative_return", (col("Close") - col("base_Close")) / col("base_Close") * 100)
data = data.drop("base_Close")
data.show(5)

+------+------------------+
|ticker|        base_Close|
+------+------------------+
|  AAPL|  24.3204345703125|
|  AVGO| 7.605642318725586|
|  LRCX| 3.865813970565796|
| GOOGL|26.351516723632812|
|  GOOG|26.045289993286133|
+------+------------------+
only showing top 5 rows

+------+------------+----------+------------------+------------------+--------------------+--------------------+--------------------+------------------+-------------------+-------------------+-------------------+
|Ticker|company_name|      Date|              Open|             Close|              sector|        sub_industry|        daily_return|rolling_50_day_avg|rolling_100_day_avg|rolling_200_day_avg|    relative_return|
+------+------------+----------+------------------+------------------+--------------------+--------------------+--------------------+------------------+-------------------+-------------------+-------------------+
|  AAPL|  Apple Inc.|2015-01-02| 24.77868056866345|  24.3204345703125|Information Te

Step6: Get Big Seven dataset

The Big 7 Tech Players:

Apple Inc. - AAPL

Microsoft Corporation - MSFT

Amazon.com, Inc. - AMZN

Alphabet Inc. (Google) - GOOGL

Meta Platforms, Inc. (Facebook) - META

NVIDIA Corporation - NVDA

Tesla, Inc. - TSLA

In [33]:
big_seven_tickers = ['AAPL', 'MSFT', 'AMZN', 'AMZN', 'GOOGL', 'META', 'NVDA', 'TSLA']
print(big_seven_tickers)

['AAPL', 'MSFT', 'AMZN', 'AMZN', 'GOOGL', 'META', 'NVDA', 'TSLA']


In [34]:
big_seven_df = data.filter(col("ticker").isin(big_seven_tickers))
big_seven_df.show(5)

+------+------------+----------+------------------+------------------+--------------------+--------------------+--------------------+------------------+-------------------+-------------------+-------------------+
|Ticker|company_name|      Date|              Open|             Close|              sector|        sub_industry|        daily_return|rolling_50_day_avg|rolling_100_day_avg|rolling_200_day_avg|    relative_return|
+------+------------+----------+------------------+------------------+--------------------+--------------------+--------------------+------------------+-------------------+-------------------+-------------------+
|  AAPL|  Apple Inc.|2015-01-02| 24.77868056866345|  24.3204345703125|Information Techn...|Technology Hardwa...|-0.01849355929510...|  24.3204345703125|   24.3204345703125|   24.3204345703125|                0.0|
|  AAPL|  Apple Inc.|2015-01-05| 24.08908208842444|23.635284423828125|Information Techn...|Technology Hardwa...|-0.01883831284772696|23.977859497070

Step7: Annualized Rate of Return

In [61]:
data_yearly = data.groupBy("ticker", F.year("Date").alias("year")).agg(F.avg("daily_return").alias("annual_avg_return"))
data_yearly.show(5)

+------+----+--------------------+
|ticker|year|   annual_avg_return|
+------+----+--------------------+
|  AAPL|2020|0.001587037250173...|
|  AAPL|2021|8.922906397032655E-4|
|  AAPL|2016|9.417617653038774E-4|
|  AAPL|2019|0.001826174510894...|
|  AAPL|2015|-9.54329539536688...|
+------+----+--------------------+
only showing top 5 rows



In [62]:
big_seven_yearly = big_seven_df.groupBy("ticker", F.year("Date").alias("year")).agg(F.avg("daily_return").alias("annual_avg_return"))
big_seven_yearly.show(5)

+------+----+--------------------+
|ticker|year|   annual_avg_return|
+------+----+--------------------+
|  AAPL|2020|0.001587037250173...|
|  AAPL|2021|8.922906397032655E-4|
|  AAPL|2016|9.417617653038774E-4|
|  AAPL|2019|0.001826174510894...|
|  AAPL|2015|-9.54329539536688...|
+------+----+--------------------+
only showing top 5 rows



In [63]:
sector_yearly_return = sector_daily_price.groupBy("sector", F.year("Date").alias("year")).agg(F.avg("daily_return").alias("annual_avg_return"))
sector_yearly_return.show(5)

+--------------------+----+--------------------+
|              sector|year|   annual_avg_return|
+--------------------+----+--------------------+
|    Consumer Staples|2022|-8.96103263574838...|
|Information Techn...|2021|1.335529032370394E-4|
|Communication Ser...|2015|2.969932312012591E-4|
|              Energy|2019|4.799272890148064E-5|
|         Real Estate|2023|7.452076309877752E-4|
+--------------------+----+--------------------+
only showing top 5 rows



In [64]:
sub_industry_yearly_return = sub_industry_daily_price.groupBy("sub_industry", F.year("Date").alias("year")).agg(F.avg("daily_return").alias("annual_avg_return"))
sub_industry_yearly_return.show(5)

+--------------------+----+--------------------+
|        sub_industry|year|   annual_avg_return|
+--------------------+----+--------------------+
|     Multi-Utilities|2020|7.192633866542166E-4|
|Diversified Suppo...|2023| 8.32870109148898E-4|
|Electronic Equipm...|2015|0.001084343884663...|
|Wireless Telecomm...|2023| 8.28773495315729E-4|
|Transaction & Pay...|2018|1.427829547660595...|
+--------------------+----+--------------------+
only showing top 5 rows



Step8：Load to BigQuery

In [54]:
client = bigquery.Client()

spark.conf.set('temporaryGcsBucket', 'your-gcs-bucket-name')

In [57]:
table_id_1 = "yahoo-finance-455223.yfinance_data.nasdaq_100_stock_data"
pandas_df = data.toPandas()
client.load_table_from_dataframe(pandas_df, table_id_1)
#data.write.format('bigquery').option('table', table_id_1).mode("overwrite").save()



LoadJob<project=yahoo-finance-455223, location=us-central1, id=660139d7-c977-4dbb-9dfa-fcced9c28a93>

In [59]:
table_id_2 = "yahoo-finance-455223.yfinance_data.big_seven_stock_data"
pandas_df = big_seven_df.toPandas()
client.load_table_from_dataframe(pandas_df, table_id_2)



LoadJob<project=yahoo-finance-455223, location=us-central1, id=1698bb67-5901-41c2-8de5-6962bed4a873>

In [65]:
table_id_3 = "yahoo-finance-455223.yfinance_data.nasdaq_100_yearly_data"
pandas_df = data_yearly.toPandas()
client.load_table_from_dataframe(pandas_df, table_id_3)



LoadJob<project=yahoo-finance-455223, location=us-central1, id=bf0af77d-d6af-4853-9655-1399ee969f8e>

In [66]:
table_id_4 = "yahoo-finance-455223.yfinance_data.big_seven_yearly_data"
pandas_df = big_seven_yearly.toPandas()
client.load_table_from_dataframe(pandas_df, table_id_4)



LoadJob<project=yahoo-finance-455223, location=us-central1, id=dc5eacb8-6272-45e1-a457-61c37f98558a>

In [67]:
table_id_5 = "yahoo-finance-455223.yfinance_data.sector_yearly_return"
pandas_df = sector_yearly_return.toPandas()
client.load_table_from_dataframe(pandas_df, table_id_5)



LoadJob<project=yahoo-finance-455223, location=us-central1, id=9ebb2631-a9b1-456c-9d1f-a8468675397b>

In [68]:
table_id_6 = "yahoo-finance-455223.yfinance_data.sub_industry_yearly_return"
pandas_df = sub_industry_yearly_return.toPandas()
client.load_table_from_dataframe(pandas_df, table_id_6)



LoadJob<project=yahoo-finance-455223, location=us-central1, id=25852033-a97c-418d-a900-81dab5f78942>