In [3]:
%pip install tensorflow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import tensorflow as tf
import matplotlib.pyplot as plt

# Load data
file_path = "./data/final2.csv"
df = pd.read_csv(file_path)

X = df[['House price']].values
y = df['GDP per capita (current US$)'].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()

X_test_scaled = scaler_X.transform(X_test)

# Use PolynomialFeatures to transform feature X into cubic terms
poly = PolynomialFeatures(degree=3)
X_train_cubic = poly.fit_transform(X_train_scaled)
X_test_cubic = poly.transform(X_test_scaled)

# Create a neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_cubic.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train_cubic, y_train_scaled, epochs=100, batch_size=32, verbose=1, validation_split=0.2)

# Predictions on training data
y_train_pred_scaled = model.predict(X_train_cubic)
y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled)

# Predictions on test data
y_test_pred_scaled = model.predict(X_test_cubic)
y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled)

# Plot fitting results
plt.scatter(X_train, y_train, label='Actual (Training)')
plt.plot(X_train, y_train_pred, color='red', label='Predicted (Training)')
plt.xlabel('House price')
plt.ylabel('GDP per capita')
plt.title('Fitting Results on Training Data')
plt.legend()
plt.show()

# Plot prediction results
plt.scatter(X_test, y_test, label='Actual (Test)')
plt.plot(X_test, y_test_pred, color='red', label='Predicted (Test)')
plt.xlabel('House price')
plt.ylabel('GDP per capita')
plt.title('Prediction Results on Test Data')
plt.legend()
plt.show()


Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/ed/30/310fee0477ce46f722c561dd7e21eebca0d1d29bdb3cf4a2335b845fbba4/tensorflow-2.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Using cached tensorflow-2.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Using cached absl_py-1.4.0-py3-none-any.whl (126 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting flatbuffers>=23.1.21 (from tensorflow)
  Obtaining dependency information for flatbuffers>=23.1.21 from https://files.pythonhosted.org/packages/6f/12/d5c79ee252793ffe845d58a913197bfa02ae9a0b5c9bc3dc4b58d477b9e7/flatbuffers-23.5.26-py2.py3-none-any.whl.metadata
  Using cached flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow)
  Using cached 

ModuleNotFoundError: No module named 'tensorflow'