pip install psycopg2


In [8]:
import yfinance as yf
import pandas as pd
from sqlalchemy import create_engine
import psycopg2

# Step 1: Download the data
data = yf.download("AAPL", start="2023-01-01", end="2024-01-01")

# Step 2: Flatten the MultiIndex by resetting the columns
data.columns = [f'{col[0]}_{col[1]}' for col in data.columns]  # Flatten MultiIndex columns

# Reset the index to make 'Date' a regular column
data.reset_index(inplace=True)

# Convert 'Date' column to string format
data['Date'] = data['Date'].dt.strftime('%Y-%m-%d')  # Format: 'YYYY-MM-DD'

# Step 3: AWS RDS PostgreSQL Connection Details
rds_host = "database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com"
rds_user = "postgres"
rds_password = "d82Nb5kMAK8T37S2JCSV"
rds_db = "postgres"
rds_port = "5432"

# Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')

# Step 4: Create table in PostgreSQL (if it doesn’t exist)
create_table_query = '''
CREATE TABLE IF NOT EXISTS apple_stock (
    Date TEXT PRIMARY KEY,
    Close_AAPL REAL,
    High_AAPL REAL,
    Low_AAPL REAL,
    Open_AAPL REAL,
    Volume_AAPL BIGINT
);
'''

# Execute table creation
with engine.connect() as conn:
    conn.execute(text(create_table_query))


# Step 5: Insert data into PostgreSQL
data.to_sql('apple_stock', engine, if_exists='replace', index=False)

print("✅ Data successfully saved to AWS RDS PostgreSQL!")


[*********************100%***********************]  1 of 1 completed


✅ Data successfully saved to AWS RDS PostgreSQL!


In [23]:
import pandas as pd
from sqlalchemy import create_engine

# Step 3: AWS RDS PostgreSQL Connection Details
rds_host = "database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com"
rds_user = "postgres"
rds_password = "d82Nb5kMAK8T37S2JCSV"
rds_db = "postgres"
rds_port = "5432"

# Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')

# Load AAPL stock data
df_aapl = pd.read_sql("SELECT * FROM apple_stock", engine)

# Convert 'date' column to datetime
df_aapl['Date'] = pd.to_datetime(df_aapl['Date'])

# Handle missing values (forward fill for stock prices)
df_aapl.fillna(method='ffill', inplace=True)

# Remove duplicates
df_aapl.drop_duplicates(inplace=True)

# Save cleaned AAPL data back to PostgreSQL
df_aapl.to_sql("apple_stock", engine, if_exists="replace", index=False)

print("AAPL stock data cleaned and saved to PostgreSQL!")


  df_aapl.fillna(method='ffill', inplace=True)


AAPL stock data cleaned and saved to PostgreSQL!


In [12]:
print(df_aapl.columns)


Index(['Date', 'Close_AAPL', 'High_AAPL', 'Low_AAPL', 'Open_AAPL',
       'Volume_AAPL'],
      dtype='object')


In [7]:
import os
import requests
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from sqlalchemy import create_engine, text
import psycopg2

# Initialize Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Step 1: Fetch news data from NewsAPI
API_KEY = os.getenv("NEWS_API_KEY")  # Ensure API key is set

url = f"https://newsapi.org/v2/everything?q=stock&apiKey={API_KEY}"
response = requests.get(url).json()

# Step 2: Extract relevant fields and convert to DataFrame
articles = response.get("articles", [])

# Check if data is available
if not articles:
    raise ValueError("❌ No articles found in API response. Check your API key and query.")

df_news = pd.DataFrame(articles)

# Check if required columns exist
required_columns = ["publishedAt", "title", "description", "url"]
df_news = df_news[[col for col in required_columns if col in df_news.columns]]

# Convert 'publishedAt' to datetime format if it exists
if "publishedAt" in df_news.columns:
    df_news["publishedAt"] = pd.to_datetime(df_news["publishedAt"])

# Perform Sentiment Analysis on each news title
df_news["sentiment_score"] = df_news["title"].apply(lambda x: sia.polarity_scores(x)["compound"] if isinstance(x, str) else 0)
df_news["sentiment_label"] = df_news["sentiment_score"].apply(lambda x: "positive" if x > 0 else ("negative" if x < 0 else "neutral"))

# AWS RDS PostgreSQL connection details
rds_host = "database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com"
rds_user = "postgres"
rds_password = "d82Nb5kMAK8T37S2JCSV"
rds_db = "postgres"
rds_port = "5432"

# Step 4: Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')

# Step 5: Create table in PostgreSQL if it doesn’t exist
create_table_query = '''
CREATE TABLE IF NOT EXISTS financial_news_sentiment (
    publishedAt TIMESTAMP,
    title TEXT,
    description TEXT,
    url TEXT PRIMARY KEY,
    sentiment_score REAL,
    sentiment_label TEXT
);
'''

# Execute table creation
with engine.connect() as conn:
    conn.execute(text(create_table_query))

# Step 6: Insert data into AWS RDS PostgreSQL
df_news.to_sql('financial_news_sentiment', engine, if_exists='replace', index=False)

print("✅ News sentiment data successfully saved to AWS RDS PostgreSQL!")


✅ News sentiment data successfully saved to AWS RDS PostgreSQL!


In [9]:
import psycopg2
import pandas as pd

# AWS RDS PostgreSQL connection details
rds_host = "database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com"
rds_user = "postgres"
rds_password = "d82Nb5kMAK8T37S2JCSV"
rds_db = "postgres"
rds_port = "5432"

# Connect to PostgreSQL
conn = psycopg2.connect(
    host=rds_host,
    user=rds_user,
    password=rds_password,
    dbname=rds_db,
    port=rds_port
)

# Query to fetch data from the table
query = "SELECT * FROM apple_stock LIMIT 10;"  # Adjust LIMIT as needed

# Execute the query and fetch data
df = pd.read_sql(query, conn)

# Show the fetched data
print(df)

# Close connection
conn.close()


  df = pd.read_sql(query, conn)


         Date  Close_AAPL   High_AAPL    Low_AAPL   Open_AAPL  Volume_AAPL
0  2023-01-03  123.632538  129.395526  122.742880  128.782657    112117500
1  2023-01-04  124.907707  127.181276  123.642420  125.431615     89113600
2  2023-01-05  123.583099  126.301493  123.326093  125.668849     80962700
3  2023-01-06  128.130219  128.792516  123.454587  124.561717     87754700
4  2023-01-09  128.654129  131.876670  128.397123  128.970458     70790800
5  2023-01-10  129.227463  129.751370  126.647460  128.762863     63896200
6  2023-01-11  131.955765  131.975524  128.960591  129.741504     69458900
7  2023-01-12  131.876663  132.716885  129.929304  132.341262     71379600
8  2023-01-13  133.211166  133.369331  130.146804  130.512547     57809700
9  2023-01-17  134.377609  135.712084  132.588414  133.280366     63646600


In [24]:

# AWS RDS PostgreSQL connection details
rds_host = "database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com"
rds_user = "postgres"
rds_password = "d82Nb5kMAK8T37S2JCSV"
rds_db = "postgres"
rds_port = "5432"

# Step 4: Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')



# Load financial news sentiment data
df_news = pd.read_sql("SELECT * FROM financial_news_sentiment", engine)

# Convert 'publishedAt' to datetime
df_news['publishedAt'] = pd.to_datetime(df_news['publishedAt'])

# Handle missing values
df_news.fillna({"sentiment_score": 0, "sentiment_label": "neutral"}, inplace=True)  # Default sentiment for missing values

# Remove duplicates
df_news.drop_duplicates(inplace=True)

# Save cleaned news sentiment data back to PostgreSQL
df_news.to_sql("financial_news_sentiment", engine, if_exists="replace", index=False)

print("Financial news sentiment data cleaned and saved to PostgreSQL!")


Financial news sentiment data cleaned and saved to PostgreSQL!


In [16]:
import os

# Set the environment variable
os.environ['NEWS_API_KEY'] = '4967d527b00948cd8b94061cf59bc78d'

In [17]:
import os
import requests 

API_KEY = os.getenv("NEWS_API_KEY")  # Fetch from environment variable
url = f"https://newsapi.org/v2/everything?q=stock&apiKey={API_KEY}"

response = requests.get(url).json()
print(response)


{'status': 'ok', 'totalResults': 66524, 'articles': [{'source': {'id': 'wired', 'name': 'Wired'}, 'author': 'Zeyi Yang', 'title': 'Chinese Companies Rush to Put DeepSeek in Everything', 'description': 'From video game developers to a nuclear power plant, companies across China are adopting DeepSeek’s AI models to boost stock prices and flaunt their national pride.', 'url': 'https://www.wired.com/story/deepseek-china-nationalism/', 'urlToImage': 'https://media.wired.com/photos/67c5f5496e5120fc90d42211/191:100/w_1280,c_limit/China-DeepSeek-AI-Business-2197000260.jpg', 'publishedAt': '2025-03-12T10:00:00Z', 'content': 'A mobile shooting game developed by Tencent is using DeepSeek to power an in-game assistant that can, among other things, give players fortune-telling readers about whether they are going to have a g… [+3020 chars]'}, {'source': {'id': None, 'name': 'Gizmodo.com'}, 'author': 'Matthew Gault', 'title': 'Tesla Hate Is Making Insurance More Expensive for Owners', 'description':

In [19]:
import os
import requests
import pandas as pd
from sqlalchemy import create_engine

# Fetch API Key from environment variables
API_KEY = os.getenv("NEWS_API_KEY")

# Fetch financial news from NewsAPI
url = f"https://newsapi.org/v2/everything?q=stock&apiKey={API_KEY}"
response = requests.get(url).json()

# Extract relevant fields from the response
articles = response.get("articles", [])

# Convert to DataFrame
df_news = pd.DataFrame(articles)[["publishedAt", "title", "description", "url"]]

# 1️⃣ Convert `publishedAt` to datetime format
df_news["publishedAt"] = pd.to_datetime(df_news["publishedAt"], errors='coerce')

# 2️⃣ Handle missing values
df_news["title"].fillna("No title", inplace=True)
df_news["description"].fillna("No description", inplace=True)

# 3️⃣ Remove duplicate articles (based on title & description)
df_news.drop_duplicates(subset=["title", "description"], inplace=True)

# 4️⃣ Perform text cleaning (optional)
df_news["title"] = df_news["title"].str.replace(r"[^\w\s]", "", regex=True).str.lower()
df_news["description"] = df_news["description"].str.replace(r"[^\w\s]", "", regex=True).str.lower()

# ✅ Print cleaned data preview
print(df_news.head())

# 5️⃣ Save cleaned data to AWS RDS PostgreSQL
# Step 4: AWS RDS PostgreSQL Connection Details
# AWS RDS PostgreSQL connection details
rds_host = "database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com"
rds_user = "postgres"
rds_password = "d82Nb5kMAK8T37S2JCSV"
rds_db = "postgres"
rds_port = "5432"

# Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')


# Save to PostgreSQL
df_news.to_sql("financial_news", engine, if_exists="replace", index=False)

print("Financial news cleaned and saved to AWS RDS PostgreSQL successfully!")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_news["title"].fillna("No title", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_news["description"].fillna("No description", inplace=True)


                publishedAt  \
0 2025-03-12 10:00:00+00:00   
1 2025-03-18 16:20:34+00:00   
2 2025-03-11 20:41:25+00:00   
3 2025-02-22 12:15:29+00:00   
4 2025-03-06 21:20:26+00:00   

                                               title  \
0  chinese companies rush to put deepseek in ever...   
1  tesla hate is making insurance more expensive ...   
2  nvidias rtx 5060 and 5060 ti gpus may arrive r...   
3  the galaxy s25 ultra is now 70 off samsung is ...   
4  amazon offers a selfheating coffee mug at an a...   

                                         description  \
0  from video game developers to a nuclear power ...   
1  as stock slumps and americans turn against the...   
2  take your bets whether youll be able to find n...   
3  samsungs latest deal on its website makes the ...   
4  your cup of joe can remain eternally hot with ...   

                                                 url  
0  https://www.wired.com/story/deepseek-china-nat...  
1  https://gizmodo.com/tesla-

In [2]:
!pip install fredapi


Collecting fredapi
  Downloading fredapi-0.5.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fredapi-0.5.2-py3-none-any.whl (11 kB)
Installing collected packages: fredapi
Successfully installed fredapi-0.5.2


In [20]:
import os
os.environ["FRED_API_KEY"] = "661467c458ce41671d637d95e1057f05"


In [21]:
import os
print(f"API Key: {os.getenv('FRED_API_KEY')}")  # Should print your API key



API Key: 661467c458ce41671d637d95e1057f05


In [22]:
from fredapi import Fred
import os
import pandas as pd
from sqlalchemy import create_engine

# Get FRED API Key from environment variable
API_KEY = os.getenv("FRED_API_KEY")

if not API_KEY:
    raise ValueError("FRED_API_KEY not found in environment variables")

fred = Fred(api_key=API_KEY)

# Fetch economic indicators
gdp = fred.get_series("GDP")  # Gross Domestic Product (GDP)
unemployment = fred.get_series("UNRATE")  # Unemployment Rate

# Create DataFrame
df_economic = pd.DataFrame({"GDP": gdp, "Unemployment_Rate": unemployment})

# Data Cleaning
# Drop any null values
df_economic.dropna(inplace=True)

# Remove duplicate rows if any
df_economic.drop_duplicates(inplace=True)

# Reset index
df_economic.reset_index(inplace=True)
df_economic.rename(columns={'index': 'Date'}, inplace=True)

# Convert Date column to datetime format
df_economic['Date'] = pd.to_datetime(df_economic['Date'])

# AWS RDS PostgreSQL connection details
rds_host = "database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com"
rds_user = "postgres"
rds_password = "d82Nb5kMAK8T37S2JCSV"
rds_db = "postgres"
rds_port = "5432"

# Create PostgreSQL connection string
conn_str = f"postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}"

# Create a database connection using SQLAlchemy
engine = create_engine(conn_str)

# Save DataFrame to AWS PostgreSQL (replace existing table)
df_economic.to_sql("economic_indicators", engine, if_exists="replace", index=False)

print("Economic indicators data cleaned and saved to AWS RDS PostgreSQL successfully!")



Economic indicators data cleaned and saved to AWS RDS PostgreSQL successfully!


In [9]:
import pandas as pd

# Query the data from AWS PostgreSQL
with engine.connect() as connection:
    df_check = pd.read_sql("SELECT * FROM economic_indicators", connection)

print(df_check)  # Display first few rows


           GDP  Unemployment_Rate
0          NaN                NaN
1          NaN                NaN
2          NaN                NaN
3          NaN                NaN
4      243.164                NaN
..         ...                ...
929  29719.647                4.1
930        NaN                4.2
931        NaN                4.1
932        NaN                4.0
933        NaN                4.1

[934 rows x 2 columns]
