In [32]:
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib
import os
import numpy as np
import requests
from bs4 import BeautifulSoup

# Load the saved model
model = tf.keras.models.load_model('my_model.keras')

# Function to extract data from Instagram profile URL
def extract_instagram_profile_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Example extraction logic (this will vary based on the actual HTML structure)
    profile_pic = 1 if soup.find('img', {'class': 'profile-pic'}) else 0
    username = soup.find('h1', {'class': 'username'})
    username = username.text if username else ''
    
    fullname = soup.find('h1', {'class': 'fullname'})
    fullname = fullname.text if fullname else ''
    
    description = soup.find('div', {'class': 'description'})
    description = description.text if description else ''
    external_url = 1 if soup.find('a', {'class': 'external-url'}) else 0
    profile_status = soup.find('div', {'class': 'profile-status'})
    private = 1 if profile_status and 'private' in profile_status.text else 0
    
    posts = soup.find('span', {'class': 'posts'})
    posts = int(posts.text) if posts else 0
    
    followers = soup.find('span', {'class': 'followers'})
    followers = int(followers.text) if followers else 0
    
    follows = soup.find('span', {'class': 'follows'})
    follows = int(follows.text) if follows else 0
    
    # Calculate additional features
    nums_length_username = sum(c.isdigit() for c in username) / len(username) if len(username) > 0 else 0
    fullname_words = len(fullname.split())
    nums_length_fullname = sum(c.isdigit() for c in fullname) / len(fullname) if len(fullname) > 0 else 0
    name_equals_username = 1 if fullname == username else 0
    description_length = len(description)
    
    profile_data = {
        'profile pic': [profile_pic],
        'nums/length username': [nums_length_username],
        'fullname words': [fullname_words],
        'nums/length fullname': [nums_length_fullname],
        'name==username': [name_equals_username],
        'description length': [description_length],
        'external URL': [external_url],
        'private': [private],
        '#posts': [posts],
        '#followers': [followers],
        '#follows': [follows]
    }
    
    return profile_data

# Example usage
url = 'https://www.instagram.com/urstrulymahesh/'
profile_data = extract_instagram_profile_data(url)

# Convert to DataFrame
profile_df = pd.DataFrame(profile_data)

# Check if the scaler file exists
scaler_path = 'scaler.joblib'
if os.path.exists(scaler_path):
    # Load the scaler used during training
    scaler = joblib.load(scaler_path)
else:
    # If the scaler file does not exist, create a new scaler and fit it with the profile data
    scaler = StandardScaler()
    scaler.fit(profile_df)
    # Save the scaler for future use
    joblib.dump(scaler, scaler_path)

# Scale the new data
X_profile_scaled = scaler.transform(profile_df)

# Make prediction
profile_prediction = model.predict(X_profile_scaled)
predicted_class = np.argmax(profile_prediction)

print(f'The predicted class for the Instagram profile is: {predicted_class}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
The predicted class for the Instagram profile is: 0


In [33]:
print(profile_df)

   profile pic  nums/length username  fullname words  nums/length fullname  \
0            0                     0               0                     0   

   name==username  description length  external URL  private  #posts  \
0               1                   0             0        0       0   

   #followers  #follows  
0           0         0  


In [34]:
# Assuming you have a validation set
# Replace the ellipsis with actual validation data
# Example:
# X_val = np.array([[...], [...], ...])
# y_val = np.array([...])

# For demonstration purposes, let's create some dummy validation data
X_val = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
y_val = np.array([0])

# Preprocess the validation data
X_val_scaled = scaler.transform(X_val)

# Evaluate the model
val_predictions = model.predict(X_val_scaled)
val_predicted_classes = np.argmax(val_predictions, axis=1)

# Print evaluation metrics
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print('Validation Accuracy:', accuracy_score(y_val, val_predicted_classes))
print('Classification Report:\n', classification_report(y_val, val_predicted_classes))
print('Confusion Matrix:\n', confusion_matrix(y_val, val_predicted_classes))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Validation Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

Confusion Matrix:
 [[1]]




In [43]:


from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

def extract_instagram_profile_data_selenium(url):
    # Set up the Selenium WebDriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get(url)
    
    # Wait for the page to load
    time.sleep(5)
    
    # Example extraction logic (this will vary based on the actual HTML structure)
    profile_pic = 1 if driver.find_element(By.CSS_SELECTOR, 'img.profile-pic') else 0
    print(f'Profile Pic: {profile_pic}')
    
    username = driver.find_element(By.CSS_SELECTOR, 'h1.username').text
    print(f'Username: {username}')
    
    fullname = driver.find_element(By.CSS_SELECTOR, 'h1.fullname').text
    print(f'Fullname: {fullname}')
    
    description = driver.find_element(By.CSS_SELECTOR, 'div.description').text
    print(f'Description: {description}')
    
    external_url = 1 if driver.find_element(By.CSS_SELECTOR, 'a.external-url') else 0
    print(f'External URL: {external_url}')
    
    profile_status = driver.find_element(By.CSS_SELECTOR, 'div.profile-status').text
    private = 1 if 'private' in profile_status else 0
    print(f'Private: {private}')
    
    posts = int(driver.find_element(By.CSS_SELECTOR, 'span.posts').text.replace(',', ''))
    print(f'Posts: {posts}')
    
    followers = int(driver.find_element(By.CSS_SELECTOR, 'span.followers').text.replace(',', ''))
    print(f'Followers: {followers}')
    
    follows = int(driver.find_element(By.CSS_SELECTOR, 'span.follows').text.replace(',', ''))
    print(f'Follows: {follows}')
    
    # Calculate additional features
    nums_length_username = sum(c.isdigit() for c in username) / len(username) if len(username) > 0 else 0
    fullname_words = len(fullname.split())
    nums_length_fullname = sum(c.isdigit() for c in fullname) / len(fullname) if len(fullname) > 0 else 0
    name_equals_username = 1 if fullname == username else 0
    description_length = len(description)
    
    profile_data = {
        'profile pic': [profile_pic],
        'nums/length username': [nums_length_username],
        'fullname words': [fullname_words],
        'nums/length fullname': [nums_length_fullname],
        'name==username': [name_equals_username],
        'description length': [description_length],
        'external URL': [external_url],
        'private': [private],
        '#posts': [posts],
        '#followers': [followers],
        '#follows': [follows]
    }
    
    driver.quit()
    return profile_data

# Example usage
url = 'https://www.instagram.com/urstrulymahesh/'
profile_data = extract_instagram_profile_data_selenium(url)

# Convert to DataFrame
if profile_data:
    profile_df = pd.DataFrame(profile_data)
    print(profile_df)
else:
    print("Failed to extract profile data.")

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"img.profile-pic"}
  (Session info: chrome=132.0.6834.160); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00C874A3+25091]
	(No symbol) [0x00C0DC04]
	(No symbol) [0x00AEB373]
	(No symbol) [0x00B2F4DC]
	(No symbol) [0x00B2F65B]
	(No symbol) [0x00B6D8E2]
	(No symbol) [0x00B51F54]
	(No symbol) [0x00B6B49E]
	(No symbol) [0x00B51CA6]
	(No symbol) [0x00B231D5]
	(No symbol) [0x00B2435D]
	GetHandleVerifier [0x00F807C3+3142947]
	GetHandleVerifier [0x00F91A2B+3213195]
	GetHandleVerifier [0x00F8C412+3191154]
	GetHandleVerifier [0x00D28720+685184]
	(No symbol) [0x00C16E1D]
	(No symbol) [0x00C13E18]
	(No symbol) [0x00C13FB6]
	(No symbol) [0x00C066F0]
	BaseThreadInitThunk [0x76785D49+25]
	RtlInitializeExceptionChain [0x7730CDEB+107]
	RtlGetAppContainerNamedObjectPath [0x7730CD71+561]
