In [None]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

# Function to fetch LinkedIn profile data
def get_profile_data(url):
    # Send a GET request to the LinkedIn profile URL
    response = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the summary section from the HTML content
    summary = soup.find('div', {'class': 'pv-top-card-section__summary'}).get_text()
    # Extract the experience section from the HTML content
    experience = soup.find('section', {'class': 'pv-profile-section__section experience-section ember-view'}).get_text()
    # Extract the education section from the HTML content
    education = soup.find('section', {'class': 'pv-profile-section__section education-section ember-view'}).get_text()
    # Combine the summary, experience, and education sections into a single string
    profile_data = summary + experience + education
    return profile_data

# Function to train the classifier
def train_classifier(profile_data, labels):
    # Use a TfidfVectorizer to convert the profile data into numerical features
    vectorizer = TfidfVectorizer()
    # Use a Naive Bayes classifier to train the model
    classifier = MultinomialNB()
    # Create a pipeline to combine the vectorizer and classifier
    pipeline = Pipeline([('vectorizer', vectorizer), ('classifier', classifier)])
    # Fit the pipeline to the training data
    pipeline.fit(profile_data, labels)
    return pipeline

# Function to predict the personality of a LinkedIn profile
def predict_personality(pipeline, url):
    # Get the profile data
    profile_data = get_profile_data(url)
    # Use the pipeline to make a prediction
    prediction = pipeline.predict([profile_data])
    return prediction

# Example usage:
# Training data
profile_urls = [
    'https://www.linkedin.com/in/alice',
    'https://www.linkedin.com/in/bob',
    'https://www.linkedin.com/in/charlie',
    'https://www.linkedin.com/in/dave'
]
labels = ['introvert', 'extrovert', 'introvert', 'extrovert']
# Train the classifier
pipeline = train_classifier([get_profile_data(url) for url in profile_urls], labels)

# Test data
test_url = 'https://www.linkedin.com/in/eve'

# Predict the personality
prediction = predict_personality(pipeline, test_url)
print(prediction)


In [None]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

# Function to fetch LinkedIn profile data
def get_profile_data(url):
    # Send a GET request to the LinkedIn profile URL
    response = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the summary section from the HTML content
    summary = soup.find('div', {'class': 'pv-top-card-section__summary'}).get_text()
    # Extract the experience section from the HTML content
    experience = soup.find('section', {'class': 'pv-profile-section__section experience-section ember-view'}).get_text()
    # Extract the education section from the HTML content
    education = soup.find('section', {'class': 'pv-profile-section__section education-section ember-view'}).get_text()
    # Extract the Job titles
    job_titles = soup.find_all('h3', {'class': 't-16 t-black t-normal'})
    job_titles = [title.get_text() for title in job_titles]
    # Extract the number of connections
    connections = soup.find('a', {'data-link-to': 'about:connections'}).get_text()
    connections = int(connections.split(' ')[0])
    # Extract the number of Recommendations
    recommendations = soup.find('a', {'data-link-to': 'about:recommendations'}).get_text()
    recommendations = int(recommendations.split(' ')[0])
    # Extract the number of Posts and Shares made by the user
    posts = soup.find_all('span', {'class': 'visually-hidden'})
    num_posts = 0
    num_shares = 0
    for post in posts:
        if 'post' in post.get_text():
            num_posts += int(post.get_text().split(' ')[0])
        elif 'share' in post.get_text():
            num_shares += int(post.get_text().split(' ')[0])
    # Create a dictionary of the additional data
    additional_data = {
        'job_titles': job_titles,
        'connections': connections,
        'recommendations': recommendations,
        'num_posts': num_posts,
        'num_shares': num_shares
    }
    # Combine the summary, experience, and education sections into a single string
    profile_data = summary + experience + education
    return profile_data, additional_data

# Function to train the classifier
def train_classifier(profile_data, additional_data, labels):
    # Combine the profile data and additional data into a single feature vector
    features = []
    for i in range(len(profile_data)):
        features.append(profile_


In [None]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np

# Function to fetch LinkedIn profile data
def get_profile_data(url):
    # Send a GET request to the LinkedIn profile URL
    response = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the summary section from the HTML content
    summary = soup.find('div', {'class': 'pv-top-card-section__summary'}).get_text()
    # Extract the experience section from the HTML content
    experience = soup.find('section', {'class': 'pv-profile-section__section experience-section ember-view'}).get_text()
    # Extract the education section from the HTML content
    education = soup.find('section', {'class': 'pv-profile-section__section education-section ember-view'}).get_text()
    # Extract the Job titles
    job_titles = soup.find_all('h3', {'class': 't-16 t-black t-normal'})
    job_titles = [title.get_text() for title in job_titles]
    # Extract the number of connections
    connections = soup.find('a', {'data-link-to': 'about:connections'}).get_text()
    connections = int(connections.split(' ')[0])
    # Extract the number of Recommendations
    recommendations = soup.find('a', {'data-link-to': 'about:recommendations'}).get_text()
    recommendations = int(recommendations.split(' ')[0])
    # Extract the number of Posts and Shares made by the user
    posts = soup.find_all('span', {'class': 'visually-hidden'})
    num_posts = 0
    num_shares = 0
    for post in posts:
        if 'post' in post.get_text():
            num_posts += int(post.get_text().split(' ')[0])
        elif 'share' in post.get_text():
            num_shares += int(post.get_text().split(' ')[0])
    # Create a dictionary of the additional data
    additional_data = {
        'job_titles': job_titles,
        'connections': connections,
        'recommendations': recommendations,
        'num_posts': num_posts,
        'num_shares': num_shares
    }
    # Combine the summary, experience, and education sections into a single string
    profile_data = summary + experience + education
    return profile


In [None]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Create the pipeline
text_transformer = TfidfVectorizer()
numerical_transformer = StandardScaler()
preprocessor = ColumnTransformer(
    transformers=[
        ('text', text_transformer, 'profile_data'),
        ('num', numerical_transformer, ['connections', 'recommendations', 'num_posts', 'num_shares'])
    ])
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', classifier)
])

# Fit the pipeline to the training data
pipeline.fit(X_train, y_train)

# Make predictions on the test data
y_pred = pipeline.predict(X_test)

# Measure the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))



In [None]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np

# Function to fetch LinkedIn profile data
def get_profile_data(url):
    # Send a GET request to the LinkedIn profile URL
    response = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the summary section from the HTML content
    summary = soup.find('div', {'class': 'pv-top-card-section__summary'}).get_text()
    # Extract the experience section from the HTML content
    experience = soup.find('section', {'class': 'pv-profile-section__section experience-section ember-view'}).get_text()
    # Extract the education section from the HTML content
    education = soup.find('section', {'class': 'pv-profile-section__section education-section ember-view'}).get_text()
    # Extract the Job titles
    job_titles = soup.find_all('h3', {'class': 't-16 t-black t-normal'})
    job_titles = [title.get_text() for title in job_titles]
    # Extract the number of connections
    connections = soup.find('a', {'data-link-to': 'about:connections'}).get_text()
    connections = int(connections.split(' ')[0])
    # Extract the number of Recommendations
    recommendations = soup.find('a', {'data-link-to': 'about:recommendations'}).get_text()
    recommendations


In [None]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np

def get_big_five_scores(url):
    """
    Function to fetch big five scores from a website
    """
    # Fetch scores using API or webscrapping
    scores = {"Openness":0.5, "Conscientiousness":0.6, "Extraversion":0.7, "Agreeableness":0.8, "Neuroticism":0.4 }
    return scores

def get_profile_data(url):
    """
    Function to fetch LinkedIn profile data
    """
    # Send a GET request to the LinkedIn profile URL
    response = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the summary section from the HTML content
    summary = soup.find('div', {'class': 'pv-top-card-section__summary'}).get_text()
    # Extract the experience section from the HTML content
    experience = soup.find('section', {'class': 'pv-profile-section__section experience-section ember-view'}).get_text()
    # Extract the education section from the HTML content
    education = soup.find('section', {'class': 'pv-profile-section__section education-section ember-view'}).get_text()
    # Extract the Job titles
    job_titles = soup.find_all('h3', {'class': 't-16 t-black t-normal'})
    job_titles = [title.get_text() for title in job_titles]
    # Extract the number of connections
    connections = soup.find('a', {'data-link-to': 'about:connections'}).get_text()
    connections = int(connections.split(' ')[0])
    # Extract the number of Recommendations
    recommendations = soup.find('a', {'data-link-to': 'about:recommendations'}).get_text()
    recommendations = int(recommendations.split(' ')[0])
    # Extract the number of Posts and Shares made by the user
    posts = soup.find_all('span', {'class': 'visually-hidden'})
    num_posts = 0
    num_shares = 0
    for post in posts:
        if 'post' in post.get_text():
            num_posts += int(post.get_text().split(' ')[0])
        elif 'share' in post.get_text():
            num_shares += int(post.get_text().split(' ')[0])
    # Combine the summary, experience, and education sections into a single string
    profile


In [None]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np

def get_big_five_scores(url):
    """
    Function to fetch big five scores from a website
    """
    # Fetch scores using API or webscrapping
    scores = {"Openness":0.5, "Conscientiousness":0.6, "Extraversion":0.7, "Agreeableness":0.8, "Neuroticism":0.4 }
    return scores

def get_profile_data(url):
    """
    Function to fetch LinkedIn profile data
    """
    # Send a GET request to the LinkedIn profile URL
    response = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the summary section from the HTML content
    summary = soup.find('div', {'class': 'pv-top-card-section__summary'}).get_text()
    # Extract the experience section from the HTML content
    experience = soup.find('section', {'class': 'pv-profile-section__section experience-section ember-view'}).get_text()
    # Extract the education section from the HTML content
    education = soup.find('section', {'class': 'pv-profile-section__section education-section ember-view'}).get_text()
    # Extract the Job titles
    job_titles = soup.find_all('h3', {'class': 't-16 t-black t-normal'})
    job_titles = [


In [None]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from nltk.sentiment import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np

def get_big_five_scores(url):
    """
    Function to fetch big five scores from a website
    """
    # Fetch scores using API or webscrapping
    scores = {"Openness":0.5, "Conscientiousness":0.6, "Extraversion":0.7, "Agreeableness":0.8, "Neuroticism":0.4 }
    return scores

def get_tone(summary, experience, education):
    """
    Function to classify tone of text
    """
    all_text = summary + experience + education
    # Creating SentimentIntensityAnalyzer object
    sid_obj = SentimentIntensityAnalyzer()
    # Getting the polarity score
    pol_score = sid_obj.polarity_scores(all_text)
    if pol_score['compound'] >= 0.05:
        tone = "positive"
    elif pol_score['compound'] <= - 0.05:
        tone = "negative"
    else:
        tone = "neutral"
    return tone

def get_profile_data(url):
    """
    Function to fetch LinkedIn profile data
    """
    # Send a GET request to the LinkedIn profile URL
    response = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the summary section from the HTML content
    summary = soup.find('div', {'class': 'pv-top-card-section__summary'}).get_text()
    # Extract the experience section from the HTML content
    experience = soup.find('section', {'class': 'pv-profile-section__section experience-section ember-view'}).get_text()
    # Extract the education section from the HTML content
    education = soup.find('section', {'class': 'pv-profile-section__section education-section ember-view'}).get_text()
    # Extract the Job titles
    job_titles = soup.find_all('h3', {'class': 't-16 t-black t-normal'})
    job_titles = [title.get_text() for title in job_titles]
    # Extract the number of connections
    connections = soup.find


In [None]:
from collections import Counter
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

def get_most_common_words(text, n=10):
    """
    Function to get the most common words used in a text
    """
    # Tokenize the text
    tokens = nltk.word_tokenize(text)
    # Convert the tokens to lowercase
    tokens = [token.lower() for token in tokens]
    # Remove punctuation and stopwords
    stop_words = set(stopwords.words("english"))
    tokens = [token for token in tokens if token.isalpha() and token not in stop_words]
    # Count the frequency of each token
    word_freq = Counter(tokens)
    # Get the n most common words
    most_common_words = word_freq.most_common(n)
    return most_common_words

def get_profile_data(url):
    """
    Function to fetch LinkedIn profile data
    """
    # Send a GET request to the LinkedIn profile URL
    response = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the summary section from the HTML content
    summary = soup.find('div', {'class': 'pv-top-card-section__summary'}).get_text()
    # Extract the experience section from the HTML content
    experience = soup.find('section', {'class': 'pv-profile-section__section experience-section ember-view'
