pip install psycopg2


In [4]:
!pip install python-dotenv





In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

rds_host = os.getenv("RDS_HOST")
rds_user = os.getenv("RDS_USER")
rds_password = os.getenv("RDS_PASSWORD")
rds_db = os.getenv("RDS_DB")
rds_port = os.getenv("RDS_PORT")

In [2]:
print(f"RDS Host: {rds_host}")


RDS Host: database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com


In [5]:
import yfinance as yf
import pandas as pd
from sqlalchemy import create_engine, text  # <-- Added text
import psycopg2
from dotenv import load_dotenv
import os

# Step 1: Load environment variables
load_dotenv()

rds_host = os.getenv('RDS_HOST')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')
rds_db = os.getenv('RDS_DB')
rds_port = os.getenv('RDS_PORT')

# Step 2: Download Apple stock data
data = yf.download("AAPL", start="2023-01-01", end="2024-01-01")

# Step 3: Flatten MultiIndex columns if necessary
if isinstance(data.columns, pd.MultiIndex):
    data.columns = [f'{col[0]}_{col[1]}' for col in data.columns]

# Reset the index to make 'Date' a regular column
data.reset_index(inplace=True)

# Convert 'Date' column to string format
data['Date'] = data['Date'].dt.strftime('%Y-%m-%d')

# Step 4: Connect to AWS RDS PostgreSQL
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')

# Step 5: Create table if not exists
create_table_query = '''
CREATE TABLE IF NOT EXISTS apple_stock (
    Date TEXT PRIMARY KEY,
    Open_AAPL REAL,
    High_AAPL REAL,
    Low_AAPL REAL,
    Close_AAPL REAL,
    "Adj Close_AAPL" REAL,
    Volume_AAPL BIGINT
);
'''

# Execute the table creation query
with engine.connect() as conn:
    conn.execute(text(create_table_query))

# Step 6: Insert data into PostgreSQL
data.to_sql('apple_stock', engine, if_exists='replace', index=False)

print("✅ Data successfully saved to AWS RDS PostgreSQL!")


[*********************100%***********************]  1 of 1 completed


✅ Data successfully saved to AWS RDS PostgreSQL!


In [6]:
import pandas as pd
from sqlalchemy import create_engine

# Step 3: AWS RDS PostgreSQL Connection Details
from dotenv import load_dotenv
import os

# Step 1: Load environment variables
load_dotenv()

rds_host = os.getenv('RDS_HOST')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')
rds_db = os.getenv('RDS_DB')
rds_port = os.getenv('RDS_PORT')

# Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')

# Load AAPL stock data
df_aapl = pd.read_sql("SELECT * FROM apple_stock", engine)

# Convert 'date' column to datetime
df_aapl['Date'] = pd.to_datetime(df_aapl['Date'])

# Handle missing values (forward fill for stock prices)
df_aapl.fillna(method='ffill', inplace=True)

# Remove duplicates
df_aapl.drop_duplicates(inplace=True)

# Save cleaned AAPL data back to PostgreSQL
df_aapl.to_sql("apple_stock", engine, if_exists="replace", index=False)

print("AAPL stock data cleaned and saved to PostgreSQL!")


  df_aapl.fillna(method='ffill', inplace=True)


AAPL stock data cleaned and saved to PostgreSQL!


In [7]:
print(df_aapl.columns)


Index(['Date', 'Close_AAPL', 'High_AAPL', 'Low_AAPL', 'Open_AAPL',
       'Volume_AAPL'],
      dtype='object')


In [1]:
#sentiment Analysis
import os
import requests
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from sqlalchemy import create_engine, text
import psycopg2

from dotenv import load_dotenv
load_dotenv()
# Initialize Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Step 1: Fetch news data from NewsAPI
API_KEY = os.getenv("NEWS_API_KEY")  # Ensure API key is set

url = f"https://newsapi.org/v2/everything?q=stock&apiKey={API_KEY}"
response = requests.get(url).json()

# Step 2: Extract relevant fields and convert to DataFrame
articles = response.get("articles", [])

# Check if data is available
if not articles:
    raise ValueError("❌ No articles found in API response. Check your API key and query.")

df_news = pd.DataFrame(articles)

# Check if required columns exist
required_columns = ["publishedAt", "title", "description", "url"]
df_news = df_news[[col for col in required_columns if col in df_news.columns]]

# Convert 'publishedAt' to datetime format if it exists
if "publishedAt" in df_news.columns:
    df_news["publishedAt"] = pd.to_datetime(df_news["publishedAt"])

# Perform Sentiment Analysis on each news title
df_news["sentiment_score"] = df_news["title"].apply(lambda x: sia.polarity_scores(x)["compound"] if isinstance(x, str) else 0)
df_news["sentiment_label"] = df_news["sentiment_score"].apply(lambda x: "positive" if x > 0 else ("negative" if x < 0 else "neutral"))

# AWS RDS PostgreSQL connection details


# Step 1: Load environment variables


rds_host = os.getenv('RDS_HOST')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')
rds_db = os.getenv('RDS_DB')
rds_port = os.getenv('RDS_PORT')

# Step 4: Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')

# Step 5: Create table in PostgreSQL if it doesn’t exist
create_table_query = '''
CREATE TABLE IF NOT EXISTS financial_news_sentiment (
    publishedAt TIMESTAMP,
    title TEXT,
    description TEXT,
    url TEXT PRIMARY KEY,
    sentiment_score REAL,
    sentiment_label TEXT
);
'''

# Execute table creation
with engine.connect() as conn:
    conn.execute(text(create_table_query))

# Step 6: Insert data into AWS RDS PostgreSQL
df_news.to_sql('financial_news_sentiment', engine, if_exists='replace', index=False)

print("✅ News sentiment data successfully saved to AWS RDS PostgreSQL!")


✅ News sentiment data successfully saved to AWS RDS PostgreSQL!


In [3]:
from dotenv import load_dotenv
load_dotenv()
# AWS RDS PostgreSQL connection details
# Step 1: Load environment variables
rds_host = os.getenv('RDS_HOST')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')
rds_db = os.getenv('RDS_DB')
rds_port = os.getenv('RDS_PORT')
# Step 4: Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')



# Load financial news sentiment data
df_news = pd.read_sql("SELECT * FROM financial_news_sentiment", engine)

# Convert 'publishedAt' to datetime
df_news['publishedAt'] = pd.to_datetime(df_news['publishedAt'])

# Handle missing values
df_news.fillna({"sentiment_score": 0, "sentiment_label": "neutral"}, inplace=True)  # Default sentiment for missing values

# Remove duplicates
df_news.drop_duplicates(inplace=True)

# Save cleaned news sentiment data back to PostgreSQL
df_news.to_sql("financial_news_sentiment", engine, if_exists="replace", index=False)

print("Financial news sentiment data cleaned and saved to PostgreSQL!")


Financial news sentiment data cleaned and saved to PostgreSQL!


In [2]:
import os
import requests 

API_KEY = os.getenv("NEWS_API_KEY")  # Fetch from environment variable
url = f"https://newsapi.org/v2/everything?q=stock&apiKey={API_KEY}"

response = requests.get(url).json()
print(response)




In [4]:
import os
import requests
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
load_dotenv()

# Fetch API Key from environment variables
API_KEY = os.getenv("NEWS_API_KEY")

# Fetch financial news from NewsAPI
url = f"https://newsapi.org/v2/everything?q=stock&apiKey={API_KEY}"
response = requests.get(url).json()

# Extract relevant fields from the response
articles = response.get("articles", [])

# Convert to DataFrame
df_news = pd.DataFrame(articles)[["publishedAt", "title", "description", "url"]]

# 1️⃣ Convert `publishedAt` to datetime format
df_news["publishedAt"] = pd.to_datetime(df_news["publishedAt"], errors='coerce')

# 2️⃣ Handle missing values
df_news["title"].fillna("No title", inplace=True)
df_news["description"].fillna("No description", inplace=True)

# 3️⃣ Remove duplicate articles (based on title & description)
df_news.drop_duplicates(subset=["title", "description"], inplace=True)

# 4️⃣ Perform text cleaning (optional)
df_news["title"] = df_news["title"].str.replace(r"[^\w\s]", "", regex=True).str.lower()
df_news["description"] = df_news["description"].str.replace(r"[^\w\s]", "", regex=True).str.lower()

# ✅ Print cleaned data preview
print(df_news.head())

# 5️⃣ Save cleaned data to AWS RDS PostgreSQL
# Step 4: AWS RDS PostgreSQL Connection Details
# AWS RDS PostgreSQL connection details
# Step 1: Load environment variables

rds_host = os.getenv('RDS_HOST')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')
rds_db = os.getenv('RDS_DB')
rds_port = os.getenv('RDS_PORT')

# Create PostgreSQL connection using SQLAlchemy
engine = create_engine(f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}')


# Save to PostgreSQL
df_news.to_sql("financial_news", engine, if_exists="replace", index=False)

print("Financial news cleaned and saved to AWS RDS PostgreSQL successfully!")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_news["title"].fillna("No title", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_news["description"].fillna("No description", inplace=True)


                publishedAt  \
0 2025-04-22 11:09:00+00:00   
1 2025-04-03 14:38:12+00:00   
2 2025-04-03 14:13:00+00:00   
3 2025-04-07 17:32:00+00:00   
4 2025-04-01 14:10:07+00:00   

                                               title  \
0          7 best ecofriendly cleaning products 2025   
1  apple has its biggest stock drop in five years...   
2  apple has its biggest stock drop in five years...   
3  stocks plunge after trump declares web rumor o...   
4  star wars harry potter amazon clears stock of ...   

                                         description  \
0  next time you stock up on supplies consider th...   
1  shares of apple amazon and other tech stocks a...   
2  shares of apple amazon and other tech stocks a...   
3  the stock market had a brief moment of hope on...   
4                            first come first served   

                                                 url  
0  https://www.wired.com/gallery/best-eco-friendl...  
1  https://www.theverge.com/n

In [2]:
!pip install fredapi


Collecting fredapi
  Downloading fredapi-0.5.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fredapi-0.5.2-py3-none-any.whl (11 kB)
Installing collected packages: fredapi
Successfully installed fredapi-0.5.2


In [20]:
import os
os.environ["FRED_API_KEY"] = "661467c458ce41671d637d95e1057f05"


In [5]:
from fredapi import Fred
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
load_dotenv()

# Get FRED API Key from environment variable
API_KEY = os.getenv("FRED_API_KEY")

if not API_KEY:
    raise ValueError("FRED_API_KEY not found in environment variables")

fred = Fred(api_key=API_KEY)

# Fetch economic indicators
gdp = fred.get_series("GDP")  # Gross Domestic Product (GDP)
unemployment = fred.get_series("UNRATE")  # Unemployment Rate

# Create DataFrame
df_economic = pd.DataFrame({"GDP": gdp, "Unemployment_Rate": unemployment})

# Data Cleaning
# Drop any null values
df_economic.dropna(inplace=True)

# Remove duplicate rows if any
df_economic.drop_duplicates(inplace=True)

# Reset index
df_economic.reset_index(inplace=True)
df_economic.rename(columns={'index': 'Date'}, inplace=True)

# Convert Date column to datetime format
df_economic['Date'] = pd.to_datetime(df_economic['Date'])

# AWS RDS PostgreSQL connection details
rds_host = "database-1.c2b8mmg2krpj.us-east-1.rds.amazonaws.com"
rds_user = "postgres"
rds_password = "d82Nb5kMAK8T37S2JCSV"
rds_db = "postgres"
rds_port = "5432"

# Create PostgreSQL connection string
conn_str = f"postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{rds_db}"

# Create a database connection using SQLAlchemy
engine = create_engine(conn_str)

# Save DataFrame to AWS PostgreSQL (replace existing table)
df_economic.to_sql("economic_indicators", engine, if_exists="replace", index=False)

print("Economic indicators data cleaned and saved to AWS RDS PostgreSQL successfully!")



ValueError: FRED_API_KEY not found in environment variables

In [9]:
import pandas as pd

# Query the data from AWS PostgreSQL
with engine.connect() as connection:
    df_check = pd.read_sql("SELECT * FROM economic_indicators", connection)

print(df_check)  # Display first few rows


           GDP  Unemployment_Rate
0          NaN                NaN
1          NaN                NaN
2          NaN                NaN
3          NaN                NaN
4      243.164                NaN
..         ...                ...
929  29719.647                4.1
930        NaN                4.2
931        NaN                4.1
932        NaN                4.0
933        NaN                4.1

[934 rows x 2 columns]
