# ABNB Stock Article extractions

In [26]:
import requests

# api_key = '23f5208b7e2940aeb8b72c67195565b2'
api_key = 'bd9bb272078b41ebb0ee0a905d4a9532'
date_from = '2023-12-11'
date_to = '2024-01-08'
# Expanded list of sources
sources = 'bloomberg,cnbc,reuters,financial-times,techcrunch,the-wall-street-journal,the-verge,business-insider,the-economist,wired,engadget,bbc-news,fortune,techradar'

# Broadening the search query
query = ('Airbnb OR "ABNB" OR "Short-term rental" OR "Brian Chesky" OR "Vacation rental" '
         'OR "Airbnb experiences" OR "Airbnb IPO" OR "Home sharing" OR "Travel industry" '
         'OR "Airbnb regulations" OR "Hospitality industry" OR "Airbnb market" OR "Platform economy" '
         'OR "Sharing economy" OR "Airbnb competitors" OR "Airbnb legal" OR "Airbnb challenges" '
         'OR "Tourism disruption" OR "Airbnb strategy" OR "Airbnb expansion" OR "Peer-to-peer lodging" '
         'OR "technology" OR "istings"')

url = f'https://newsapi.org/v2/everything?q={query}&from={date_from}&to={date_to}&sources={sources}&apiKey={api_key}'
response = requests.get(url)
data = response.json()

# Check for 'articles' in response and extract headlines with publication dates
if 'articles' in data:
    news_data = [(article['title'], article['publishedAt'][:10]) for article in data['articles']]
    for headline, date in news_data:
        print(f"Date: {date}, Headline: {headline}")
else:
    print("Error or no articles found. Response:")
    print(data)


Date: 2023-12-20, Headline: New York’s Airbnb Ban Is Causing a Christmas Crunch
Date: 2023-12-29, Headline: WIRED’s 2023 Year-in-Review Quiz
Date: 2023-12-21, Headline: Are Airbnb cleaning fees too high? Tell us what you think.
Date: 2023-12-30, Headline: How Airbnb uses AI to weed out people trying to use its rental properties for New Year's parties
Date: 2023-12-13, Headline: Airbnb to pay Italy €576m to settle tax dispute
Date: 2024-01-06, Headline: Airbnb hosts are happily paying this startup $49 a month to take out the trash
Date: 2024-01-04, Headline: A self-described real estate visionary was charged with running an Airbnb scam that made millions by tricking guests into booking non-existent properties
Date: 2023-12-14, Headline: My Surprisingly Unbiased Week With Elon Musk's 'Politically Biased' Chatbot
Date: 2024-01-05, Headline: More than 40 train cars got turned into apartments decades ago. Now 3 are for sale for just $125,000 — but there's a catch.
Date: 2024-01-03, Headline

# Sentiment Analysis on News Articles

In [27]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('yiyanghkust/finbert-tone')
model = AutoModelForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone')

# Extract just the headlines from news_data
headlines = [headline for headline, _ in news_data]

# Tokenize and encode headlines
max_length = 512  # You can adjust this based on your requirements
inputs = tokenizer(headlines, padding=True, truncation=True, max_length=max_length, return_tensors="pt")

# Predict sentiment
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

# Pair each sentiment score with its corresponding date
sentiment_data = []
for (headline, date), prediction in zip(news_data, predictions):
    sentiment_data.append({
        'date': date,
        'headline': headline,
        'sentiment_positive': prediction[0].item(),
        'sentiment_neutral': prediction[1].item(),
        'sentiment_negative': prediction[2].item()
    })




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



In [28]:
sentiment_data

[{'date': '2023-12-20',
  'headline': 'New York’s Airbnb Ban Is Causing a Christmas Crunch',
  'sentiment_positive': 0.00048489830805920064,
  'sentiment_neutral': 1.9828953554679174e-06,
  'sentiment_negative': 0.9995131492614746},
 {'date': '2023-12-29',
  'headline': 'WIRED’s 2023 Year-in-Review Quiz',
  'sentiment_positive': 0.9999856948852539,
  'sentiment_neutral': 3.3034064017556375e-07,
  'sentiment_negative': 1.3977928574604448e-05},
 {'date': '2023-12-21',
  'headline': 'Are Airbnb cleaning fees too high? Tell us what you think.',
  'sentiment_positive': 0.9404019117355347,
  'sentiment_neutral': 8.722837083041668e-05,
  'sentiment_negative': 0.05951083451509476},
 {'date': '2023-12-30',
  'headline': "How Airbnb uses AI to weed out people trying to use its rental properties for New Year's parties",
  'sentiment_positive': 0.9999823570251465,
  'sentiment_neutral': 8.011996328605164e-07,
  'sentiment_negative': 1.6758140191086568e-05},
 {'date': '2023-12-13',
  'headline': 'A

# Historical Stock Data and Sentiment Analysis Combined

In [35]:
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler

# Convert sentiment_data to a DataFrame
sentiment_df = pd.DataFrame(sentiment_data)

# Convert date to datetime and set as index
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])
sentiment_df.set_index('date', inplace=True)

# Aggregate sentiment scores by date
average_sentiment = sentiment_df.groupby('date').mean()

# Download stock data
ticker_symbol = 'ABNB'
stock_data = yf.download(ticker_symbol, start='2023-12-09', end='2024-01-08')

# Selecting the required columns
stock_data = stock_data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]

# Convert stock_data index to datetime
stock_data.index = pd.to_datetime(stock_data.index)

# Combine stock data with sentiment data
combined_data = stock_data.join(average_sentiment)

# Interpolate missing values
combined_data = combined_data.interpolate(method='time')

# Scale price-related columns
price_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close']
scaler_prices = StandardScaler()
combined_data[price_cols] = scaler_prices.fit_transform(combined_data[price_cols])

# Scale Volume column independently
scaler_volume = StandardScaler()
combined_data['Volume'] = scaler_volume.fit_transform(combined_data[['Volume']])

print(combined_data)


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



[*********************100%%**********************]  1 of 1 completed
                Open      High       Low     Close  Adj Close    Volume  \
Date                                                                      
2023-12-11  0.190274  0.491114  0.429729  0.543988   0.543988 -0.142536   
2023-12-12 -0.508652 -0.211824 -0.259476  0.043477   0.043477  1.125276   
2023-12-13  0.511456  0.581025  0.322237  0.883315   0.883315  1.041621   
2023-12-14  0.925560  1.598647  1.300190  1.466538   1.466538  2.663549   
2023-12-15  1.382082  1.355480  1.485665  1.326567   1.326567  1.656337   
2023-12-18  1.361884  1.300310  1.540463  1.517438   1.517438 -0.358424   
2023-12-19  1.430564  1.330960  1.658491  1.392311   1.392311  0.154371   
2023-12-20  1.228562  0.899798  0.387574  0.153760   0.153760  0.506022   
2023-12-21  0.394295  0.284729  0.315914  0.370080   0.370080 -0.838441   
2023-12-22  0.295316  0.084472  0.193670  0.096497   0.096497 -1.052703   
2023-12-26  0.117554 -0.224084

In [36]:
import pandas as pd
df = pd.DataFrame(combined_data)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,sentiment_positive,sentiment_neutral,sentiment_negative
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-12-11,0.190274,0.491114,0.429729,0.543988,0.543988,-0.142536,0.272845,1.4e-05,0.727142
2023-12-12,-0.508652,-0.211824,-0.259476,0.043477,0.043477,1.125276,0.855147,0.000953,0.1439
2023-12-13,0.511456,0.581025,0.322237,0.883315,0.883315,1.041621,0.999841,3e-06,0.000156
2023-12-14,0.92556,1.598647,1.30019,1.466538,1.466538,2.663549,0.833217,0.166679,0.000105
2023-12-15,1.382082,1.35548,1.485665,1.326567,1.326567,1.656337,0.99993,4e-06,6.6e-05
2023-12-18,1.361884,1.30031,1.540463,1.517438,1.517438,-0.358424,0.681974,0.002501,0.315525
2023-12-19,1.430564,1.33096,1.658491,1.392311,1.392311,0.154371,0.999963,5e-06,3.2e-05
2023-12-20,1.228562,0.899798,0.387574,0.15376,0.15376,0.506022,0.603056,0.134539,0.262405
2023-12-21,0.394295,0.284729,0.315914,0.37008,0.37008,-0.838441,0.850662,0.000864,0.148474
2023-12-22,0.295316,0.084472,0.19367,0.096497,0.096497,-1.052703,0.600005,0.200001,0.199994


# Transformer Architecture

In [52]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
from tensorflow.keras.callbacks import EarlyStopping

selected_features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'sentiment_positive', 'sentiment_neutral', 'sentiment_negative']
data = combined_data[selected_features]

train_size = int(len(data) * 0.7)
val_size = int(len(data) * 0.15)
train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:train_size + val_size]
test_data = data.iloc[train_size + val_size:]

# Prepare features and target for model training
X_train = train_data.drop('Close', axis=1)
y_train = train_data['Close']
X_val = val_data.drop('Close', axis=1)
y_val = val_data['Close']
X_test = test_data.drop('Close', axis=1)
y_test = test_data['Close']

# Define and compile the neural network model
model = Sequential()
model.add(Dense(32, activation='LeakyReLU', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(16, activation='LeakyReLU'))
model.add(Dense(1))
model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    X_train, y_train,
    epochs=50,  # Set back to 50 or an arbitrarily large number
    batch_size=8,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping]  # Add the early stopping callback here
)

train_loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(train_loss) + 1)

# Convert the epochs range object to a list
epochs_list = list(epochs)

# Plotting the training and validation loss
loss_fig = go.Figure()
loss_fig.add_trace(go.Scatter(x=epochs_list, y=train_loss, mode='lines', name='Training Loss'))
loss_fig.add_trace(go.Scatter(x=epochs_list, y=val_loss, mode='lines', name='Validation Loss'))
loss_fig.update_layout(title='Training and Validation Loss per Epoch',
                       xaxis_title='Epochs',
                       yaxis_title='Loss',
                       legend_title='Type')
loss_fig.show()

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
print(f'MSE: {mse}, MAE: {mae}, R^2: {r2}')

# Reshape y_test and predictions for inverse transform
y_test_reshaped = y_test.values.reshape(-1, 1)
predictions_reshaped = predictions.reshape(-1, 1)

num_scaled_cols = 5

# Create separate dummy arrays for inverse scaling
dummy_array_y_test = np.zeros((len(y_test_reshaped), num_scaled_cols))
dummy_array_predictions = np.zeros((len(predictions_reshaped), num_scaled_cols))

# Fill in the 'Close' column values in the dummy arrays
# Assuming 'Close' is the last of the scaled columns
dummy_array_y_test[:, -1] = y_test_reshaped.flatten()
dummy_array_predictions[:, -1] = predictions_reshaped.flatten()

# Inverse transform the 'Close' prices using the dummy arrays
y_test_original = scaler_prices.inverse_transform(dummy_array_y_test)[:, -1]
predictions_original = scaler_prices.inverse_transform(dummy_array_predictions)[:, -1]

# Extracting testing dates
test_dates = test_data.index

# Plotting with Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_dates, y=y_test_original, mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=test_dates, y=predictions_original, mode='lines', name='Predicted'))
fig.update_layout(title='Actual vs Predicted Stock Prices', xaxis_title='Date', yaxis_title='Stock Price', legend_title='Legend')
fig.show()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 15: early stopping


MSE: 0.006830944772539633, MAE: 0.05957286584011598, R^2: 0.8452490870849484


In [32]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 32)                288       
                                                                 
 dropout_2 (Dropout)         (None, 32)                0         
                                                                 
 dense_7 (Dense)             (None, 16)                528       
                                                                 
 dense_8 (Dense)             (None, 1)                 17        
                                                                 
Total params: 833 (3.25 KB)
Trainable params: 833 (3.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
