## 1 Datenerhebung mittels API & Web Scraping

In [21]:
#import modules
import pandas as pd
import numpy as np
import seaborn as sns
#sns.get_dataset_names()
from pandas_datareader import data
from sklearn.model_selection import train_test_split
import time
import datetime

### 1.1 Yahoo Finance API: Aktienkurs

In [22]:
ticker = 'AAPL'
period1 = int(time.mktime(datetime.datetime(2010, 1, 1, 23, 59).timetuple()))
period2 = int(time.mktime(datetime.datetime(2022, 2, 1, 23, 59).timetuple()))
interval = '1d'
query_string = f'https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={period1}&period2={period2}&interval={interval}&events=history&includeAdjustedClose=true'
data = pd.read_csv(query_string)
print(data)
data.to_csv('APPL Prices.csv')

            Date        Open        High         Low       Close   Adj Close  \
0     2010-01-04    7.622500    7.660714    7.585000    7.643214    6.505280   
1     2010-01-05    7.664286    7.699643    7.616071    7.656429    6.516527   
2     2010-01-06    7.656429    7.686786    7.526786    7.534643    6.412874   
3     2010-01-07    7.562500    7.571429    7.466071    7.520714    6.401017   
4     2010-01-08    7.510714    7.571429    7.466429    7.570714    6.443573   
...          ...         ...         ...         ...         ...         ...   
3037  2022-01-26  163.500000  164.389999  157.820007  159.690002  158.526520   
3038  2022-01-27  162.449997  163.839996  158.279999  159.220001  158.059921   
3039  2022-01-28  165.710007  170.350006  162.800003  170.330002  169.088959   
3040  2022-01-31  170.160004  175.000000  169.509995  174.779999  173.506546   
3041  2022-02-01  174.009995  174.839996  172.309998  174.610001  173.337784   

         Volume  
0     493729600  
1  

### 1.2 Web Scraping

In [23]:
from pygooglenews import GoogleNews
import json
import time

gn = GoogleNews()
s = gn.search('apple')

for entry in s["entries"]:
    print(entry["title"])


Apple to issue disappointing forecast this week, Bank of America predicts - CNBC
Apple Watch Series 9 Could Gain Long-Awaited Feature, New Leak Claims - Forbes
WatchOS 10 Details: Apple Watch to Gain Widgets as Part of New Software at WWDC - Bloomberg
Apple's watchOS 10 overhaul to spotlight widgets instead of apps - Macworld
New York City's new tool to stop car thefts: Apple AirTags - CBS News
NYPD asks people to put Apple AirTags in their cars to help find stolen vehicles - CNBC
An Apple AirTag Can Be Indispensable On Your Next Road Trip - Forbes
Apple's First-Ever Store Moving to New Location: 'A New Chapter is Coming Soon' - MacRumors
How to use AirDrop on all your Apple devices - Fox News
Why Google CEO Sundar Pichai and Apple CEO Tim Cook were fighting over 2 IITians - Business Today
Apple Experts Say You Should Delete This One App On Your iPhone To Increase Its Battery Life - Yahoo Life
How to drink apple cider vinegar for weight loss - USA TODAY
Apple drops lawsuit against form

## 2 Datenaufbereitung

### Entfernen NAs und Duplikate, Erstellen neuer Variablen, Anreicherung der Daten

In [24]:
df = data

# Descriptive Statistics
df.describe()
# Check for format and change it¶
df.info()

# Data cleaning --> Hier noch mehr Befehle suchen
df = data.drop_duplicates()
df['Date'] = pd.to_datetime(df['Date'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3042 entries, 0 to 3041
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       3042 non-null   object 
 1   Open       3042 non-null   float64
 2   High       3042 non-null   float64
 3   Low        3042 non-null   float64
 4   Close      3042 non-null   float64
 5   Adj Close  3042 non-null   float64
 6   Volume     3042 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 166.5+ KB


## 3 DB - PostgreSQL DB initiate -> In Docker

In [25]:
# Libraries
import os
import fnmatch
import tempfile
import psycopg2
import pandas as pd
from sqlalchemy import create_engine

os.environ['MPLCONFIGDIR'] = "/home/jovyan"
import matplotlib.pyplot as plt

# Settings
import warnings
warnings.filterwarnings("ignore")

# Connect DB
conn = psycopg2.connect("host=db dbname=postgres user=admin password=secret")

# Insert data to appl_prices
engine = create_engine('postgresql://admin:secret@db:5432/postgres')
data.to_sql('appl_prices', engine, if_exists='replace')
cur = conn.cursor()

# Spalte positive hinzufügen
cur.execute("ALTER TABLE appl_prices ADD COLUMN Positive INTEGER DEFAULT 0;")

# Änderungen speichern
conn.commit()

# Datenbankverbindung schließen
cur.close()
conn.close()

cur.execute("""UPDATE appl_prices SET positive = CASE WHEN "Close" >= "Open" THEN 1 ELSE 0 END;""")

OperationalError: could not translate host name "db" to address: Unknown host


## 4. EDA

In [None]:
#Daten aus DB lesen und bearbeiten
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pandas_datareader import data
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

# Exploratory data analysis
print(df.info())
print(df.describe())

# Plotting
sns.set_style('whitegrid')
plt.figure(figsize=(12,6))
plt.title('Apple Stock Price')
plt.xlabel('Year')

plt.ylabel('Price ($)')
sns.lineplot(data=df, x='Date', y='Close')
plt.show()

plt.figure(figsize=(12,6))
plt.title('Daily Change in Apple Stock Price')
plt.xlabel('Year')
plt.ylabel('Change in price ($)')
sns.lineplot(data=df, x='Date', y='Close').set(ylabel='Price ($)', xlabel='Year')
sns.lineplot(data=df, x='Date', y=df['Close'].diff()).set(ylabel='Change in price ($)', xlabel='Year')
plt.legend(labels=['Price', 'Daily Change'])
plt.show()

plt.figure(figsize=(12,6))
plt.title('Apple Stock Price Distribution')
sns.histplot(data=df, x='Close', bins=30)
plt.show()

# Split the data into training and test sets
X = df['Open'].values.reshape(-1, 1)
y = df['Close'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f'R-squared: {r2:.2f}')
print

### 5. Verwendung eines ML Frameworks/Library & 6. Erstellen von Modellvorhersagen

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from pandas_datareader import data
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import time
import datetime
import sqlite3

# Fetch data from Yahoo Finance
ticker = 'AAPL'
period1 = int(time.mktime(datetime.datetime(2010, 1, 1, 23, 59).timetuple()))
period2 = int(time.mktime(datetime.datetime(2022, 2, 1, 23, 59).timetuple()))
interval = '1d'
query_string = f'https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={period1}&period2={period2}&interval={interval}&events=history&includeAdjustedClose=true'
data = pd.read_csv(query_string)

# establish connection to SQLite database
db_file = r"aaplsqlite.db"
with sqlite3.connect(db_file) as conn:
    # write data to a table in the database
    data.to_sql('aapl_prices', conn, if_exists='replace', index=False)

print("Data written to SQLite database successfully!")

# Set up connection to SQLite database
with sqlite3.connect(db_file) as conn:
    # Load data from database table
    df = pd.read_sql_query("SELECT * from aapl_prices", conn)

# Data cleaning
df = df.drop_duplicates()
df['Date'] = pd.to_datetime(df['Date'])

# Plotting
sns.set_style('whitegrid')
plt.figure(figsize=(12,6))
plt.title('Apple Stock Price')
plt.xlabel('Year')
plt.ylabel('Price ($)')
sns.lineplot(data=df, x='Date', y='Close')
plt.show()

# Map stock exchange

# Split the data into training and test sets
X = df['Open'].values.reshape(-1, 1)
y = df['Close'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to tensors
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()

# Define the model architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)
        
    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)

# Train the model
num_epochs = 5000
for epoch in range(num_epochs):
    # Forward pass
    y_pred = net(X_train_tensor)
    loss = criterion(y_pred, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

# Make predictions
X_tensor = torch.from_numpy(X).float()

with torch.no_grad():
    y_pred_tensor = net(X_tensor)
    
y_pred = y_pred_tensor.numpy().flatten()

# Make predictions on test set
X_test_tensor = torch.from_numpy(X_test).float()
with torch.no_grad():
    y_test_pred_tensor = net(X_test_tensor)

y_test_pred = y_test_pred_tensor.numpy().flatten()

# Compute R2-score and MSE on test set
r2 = r2_score(y_test, y_test_pred)
mse = mean_squared_error(y_test, y_test_pred)

print(f"R2-score on test set: {r2:.4f}")
print(f"MSE on test set: {mse:.4f}")

# Plot predictions against true values
plt.figure(figsize=(12,6))
plt.title('Apple Stock Price Predictions')
plt.xlabel('Open Price ($)')
plt.ylabel('Close Price ($)')
sns.scatterplot(x=X_test.flatten(), y=y_test)
sns.lineplot(x=X_test.flatten(), y=y_test_pred, color='red')
plt.show()

### 7. Evaluation der Modelle mit Hilfe geeigneter Modellgütemasse


### 8. Korrekte Interpretation der Modellergebnisse und Modellgütemasse

### Z.3 Integration und Visualisierung von geographischen Daten

In [None]:
import yfinance as yf
import folium
import requests
import webbrowser
import os
from bs4 import BeautifulSoup

# Get the Exchange from Yahoo Finance
ticker = yf.Ticker('AAPL').info
market_place = ticker['exchange']
print('Ticker:', ticker)
print('Ticker: AAPL')
print('Market Place:', market_place)

# Yahoo Finance API URL to get exchange symbols for AAPL stock
yahoo_api_url = 'https://finance.yahoo.com/quote/AAPL'

# Nominatim API URL to get geocoding data for exchange locations
nominatim_api_url = 'https://nominatim.openstreetmap.org/search'

# Get exchange symbols for AAPL stock
response = requests.get(yahoo_api_url)
soup = BeautifulSoup(response.content, 'html.parser')
exchange_symbols = market_place
print(exchange_symbols)

# OpenStreetMap URL to get location data for NMS stock exchange
#osm_url = f'https://www.openstreetmap.org/search?query=Cupertino'
osm_url = f'https://nominatim.openstreetmap.org/search.php?q={exchange_symbols}+stock+exchange&format=json'


# Get location data for NMS stock exchange
response = requests.get(osm_url)
location_data = response.json()[0]

# Extract latitude and longitude from location data
lat = float(location_data['lat'])
lon = float(location_data['lon'])

# Create a folium map centered on the NMS stock exchange
m = folium.Map(location=[lat, lon], zoom_start=16)

# Add a marker for the NMS stock exchange
folium.Marker(location=[lat, lon], tooltip='NMS stock exchange').add_to(m)

# Display the map
m
m.save('Exchange.html')
url = 'file://' + os.path.abspath('Exchange.html')
webbrowser.open(url)

Ticker: {'address1': 'One Apple Park Way', 'city': 'Cupertino', 'state': 'CA', 'zip': '95014', 'country': 'United States', 'phone': '408 996 1010', 'website': 'https://www.apple.com', 'industry': 'Consumer Electronics', 'sector': 'Technology', 'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts. In addition, the company offers various services, such as Apple Arcade, a game subscription service; Apple Fitness+, a personalized 

True