<a href="https://colab.research.google.com/github/votientrung/House_predict_AI_project/blob/main/House_predict_AI_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#insert data set

In [None]:
from google.colab import files
uploaded = files.upload()

Saving dataset.zip to dataset.zip


#training data

##unzip dataset

In [None]:
import zipfile
import os

# Define the path to the zip file
zip_file_path = 'dataset.zip'

# Define the directory to extract to
extract_dir = 'dataset'

# Create the extraction directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"'{zip_file_path}' unzipped to '{extract_dir}' successfully.")

'dataset.zip' unzipped to 'dataset' successfully.


##load and preprocess data

In [None]:
import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
import os

# Define transformations for the images
# Common steps include resizing, converting to tensor, and normalization
transform = transforms.Compose([
    transforms.Resize((128, 128)), # Resize all images to 128x128 pixels
    transforms.ToTensor(),        # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize with ImageNet stats
])

# Correctly load the dataset using ImageFolder, pointing to the 'Houses-Images' subdirectory
dataset_root = os.path.join(extract_dir, 'Houses-Images')
dataset = datasets.ImageFolder(root=dataset_root, transform=transform)

# Define the split ratios for training and validation
train_size = int(0.8 * len(dataset)) # 80% for training
val_size = len(dataset) - train_size # Remaining 20% for validation

# Split the dataset into training and validation sets
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create data loaders for training and validation
batch_size = 32 # You can adjust this based on GPU memory
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

print(f"Total number of images: {len(dataset)}")
print(f"Number of training images: {len(train_dataset)}")
print(f"Number of validation images: {len(val_dataset)}")
print(f"Classes: {dataset.classes}")

Total number of images: 180
Number of training images: 144
Number of validation images: 36
Classes: ['eligible', 'not_eligible']


##define and build model

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# Define the CNN model
class HouseClassifier(nn.Module):
    def __init__(self):
        super(HouseClassifier, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) # Input channels 3 (RGB), 16 output channels
        self.pool = nn.MaxPool2d(2, 2) # Max pooling layer
        # Second convolutional layer
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        # Third convolutional layer
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        # Fully connected layers
        # Calculation for the input features to the first linear layer:
        # After 3 pooling layers (each reducing size by 2), the 128x128 image becomes 128/2/2/2 = 16x16
        # So, 64 output channels * 16 * 16 spatial dimensions
        self.fc1 = nn.Linear(64 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, 2) # Output layer: 2 classes (eligible, not_eligible)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x))) # Conv1 -> ReLU -> Pool
        x = self.pool(F.relu(self.conv2(x))) # Conv2 -> ReLU -> Pool
        x = self.pool(F.relu(self.conv3(x))) # Conv3 -> ReLU -> Pool
        x = x.view(-1, 64 * 16 * 16) # Flatten the tensor for the fully connected layer
        x = F.relu(self.fc1(x)) # FC1 -> ReLU
        x = self.fc2(x) # FC2 (output)
        return x

# Instantiate the model
model = HouseClassifier()

# Move model to GPU if available, else CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

print(f"Model instantiated and moved to {device}")

Model instantiated and moved to cpu


##train model

In [None]:
import torch.optim as optim
import torch.nn as nn

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Number of epochs
num_epochs = 10 # You can adjust this

print("Starting model training...")

# Training loop
for epoch in range(num_epochs):
    model.train() # Set the model to training mode
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad() # Zero the parameter gradients

        outputs = model(inputs) # Forward pass
        loss = criterion(outputs, labels) # Calculate loss
        loss.backward() # Backward pass
        optimizer.step() # Optimize

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_accuracy = 100 * correct_train / total_train
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%')

    # Validation loop
    model.eval() # Set the model to evaluation mode
    correct_val = 0
    total_val = 0
    val_loss = 0.0
    with torch.no_grad(): # No need to calculate gradients during validation
        for data in val_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_accuracy = 100 * correct_val / total_val
    print(f'Validation Loss: {val_loss / len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.2f}%')

print('Finished Training')


Starting model training...
Epoch 1, Loss: 0.8195, Train Accuracy: 55.56%
Validation Loss: 0.6005, Validation Accuracy: 66.67%
Epoch 2, Loss: 0.5169, Train Accuracy: 81.25%
Validation Loss: 0.8467, Validation Accuracy: 77.78%
Epoch 3, Loss: 0.4166, Train Accuracy: 82.64%
Validation Loss: 0.7187, Validation Accuracy: 69.44%
Epoch 4, Loss: 0.3050, Train Accuracy: 86.81%
Validation Loss: 0.7856, Validation Accuracy: 72.22%
Epoch 5, Loss: 0.2414, Train Accuracy: 89.58%
Validation Loss: 0.6917, Validation Accuracy: 75.00%
Epoch 6, Loss: 0.1844, Train Accuracy: 93.75%
Validation Loss: 0.9952, Validation Accuracy: 75.00%
Epoch 7, Loss: 0.1294, Train Accuracy: 95.14%
Validation Loss: 0.7893, Validation Accuracy: 83.33%
Epoch 8, Loss: 0.0711, Train Accuracy: 97.22%
Validation Loss: 1.0545, Validation Accuracy: 77.78%
Epoch 9, Loss: 0.0437, Train Accuracy: 100.00%
Validation Loss: 1.0054, Validation Accuracy: 80.56%
Epoch 10, Loss: 0.0208, Train Accuracy: 100.00%
Validation Loss: 1.4605, Validati

##evaluate model

In [None]:
model.eval() # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad(): # No need to calculate gradients during evaluation
    for data in val_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the network on the {total} validation images: {accuracy:.2f}%')

Accuracy of the network on the 36 validation images: 77.78%


##save model

In [None]:
import torch

# Define a path to save the model
model_save_path = 'house_classifier_model.pth'

# Save the model's state dictionary
torch.save(model.state_dict(), model_save_path)

print(f"Model saved successfully to {model_save_path}")

Model saved successfully to house_classifier_model.pth


#Testing


##input

In [None]:
from google.colab import files
import os

print("Please upload the house image for prediction:")
uploaded = files.upload()

# Assuming only one file is uploaded, get its name
if uploaded:
    uploaded_filename = list(uploaded.keys())[0]
    new_filename = 'house_predict.jpg' # Or .png, based on expected input

    # Rename the file
    os.rename(uploaded_filename, new_filename)
    print(f"File '{uploaded_filename}' uploaded and renamed to '{new_filename}'.")

    # Now, you can load and preprocess this 'new_filename' for prediction
else:
    print("No file was uploaded.")

Please upload the house image for prediction:


Saving not_elegible.jpg to not_elegible.jpg
File 'not_elegible.jpg' uploaded and renamed to 'house_predict.jpg'.


##output

In [None]:
from PIL import Image
import torchvision.transforms as transforms
import torch

# Define the path to the uploaded image
image_path = 'house_predict.jpg'

# Define the same transformations as used for training data
# Make sure to include the ToTensor() and Normalize() steps
preprocess = transforms.Compose([
    transforms.Resize((128, 128)), # Resize to the same size as training images
    transforms.ToTensor(),        # Convert to PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize with ImageNet stats
])

# Load the image
image = Image.open(image_path).convert('RGB') # Ensure image is RGB

# Preprocess the image
input_tensor = preprocess(image)
input_batch = input_tensor.unsqueeze(0) # Add a batch dimension

# Load the saved model state dictionary
model_save_path = 'house_classifier_model.pth'
model.load_state_dict(torch.load(model_save_path))
model.eval() # Set the model to evaluation mode

# Move the input to the same device as the model
input_batch = input_batch.to(device)

with torch.no_grad():
    output = model(input_batch)

# Get probabilities or predicted class
probabilities = torch.nn.functional.softmax(output[0], dim=0)
_, predicted_idx = torch.max(output, 1)

class_names = dataset.classes
predicted_class = class_names[predicted_idx.item()]

print(f"The house in '{image_path}' is predicted to be: {predicted_class}")
print(f"Probabilities: {probabilities}")

The house in 'house_predict.jpg' is predicted to be: not_eligible
Probabilities: tensor([0.4625, 0.5375])
