In [1]:
from json import load
from pydantic import BaseModel, Field
from langchain.tools import tool
import requests
import os
from dotenv import load_dotenv

load_dotenv()


class LinkedinInput(BaseModel):
    query_url: str = Field(
        ..., description="Linkedin URL of the person for whom you want to fetch data"
    )


@tool(args_schema=LinkedinInput)
def get_linkedin_profile_data(query_url: str) -> dict:
    """Fetch the linkedin profile data of a person using the linkedin profile URL mentioned in the Linkedin Column of in the database.
    Output will contain the persons information like his current job, company, location, headline, summary, positions, educations, skills, projects etc.
    """
    url = "https://linkedin-data-api.p.rapidapi.com/get-profile-data-by-url"

    querystring = {"url": query_url}  # Replace with the actual profile URL

    headers = {
        "x-rapidapi-key": os.getenv("RAPID_API_KEY"),
        "x-rapidapi-host": "linkedin-data-api.p.rapidapi.com",
    }

    try:
        response = requests.get(url, headers=headers, params=querystring)
        response.raise_for_status()  # Raises HTTPError for bad responses (4xx or 5xx)
        profile_data = response.json()

        extracted_data = {
            "username": profile_data.get("username"),
            "full_name": f"{profile_data.get('firstName', '')} {profile_data.get('lastName', '')}",
            "is_open_to_work": profile_data.get("isOpenToWork"),
            "is_hiring": profile_data.get("isHiring"),
            "headline": profile_data.get("headline"),
            "summary": profile_data.get("summary"),
            "location": profile_data.get("geo", {}).get("full"),
            "skills": profile_data.get("skills", []),
            "education": [
                {
                    "institution": edu.get("institutionName"),
                    "degree": edu.get("degreeName"),
                    "field_of_study": edu.get("fieldOfStudy"),
                    "start_date": edu.get("timePeriod", {})
                    .get("startDate", {})
                    .get("year"),
                    "end_date": edu.get("timePeriod", {})
                    .get("endDate", {})
                    .get("year"),
                }
                for edu in profile_data.get("educations", [])
            ],
            "positions": [
                {
                    "title": pos.get("title"),
                    "company": pos.get("companyName"),
                    "location": pos.get("geoLocationName"),
                    "start_date": pos.get("timePeriod", {})
                    .get("startDate", {})
                    .get("year"),
                    "end_date": pos.get("timePeriod", {})
                    .get("endDate", {})
                    .get("year"),
                    "description": pos.get("description"),
                }
                for pos in profile_data.get("position", [])
            ],
            "projects": [
                {
                    "title": proj.get("title"),
                    "description": proj.get("description"),
                    "start_date": proj.get("timePeriod", {})
                    .get("startDate", {})
                    .get("year"),
                    "end_date": proj.get("timePeriod", {})
                    .get("endDate", {})
                    .get("year"),
                }
                for proj in profile_data.get("projects", {}).get("items", [])
            ],
        }

        return extracted_data

    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except requests.exceptions.ConnectionError as conn_err:
        print(f"Connection error occurred: {conn_err}")
    except requests.exceptions.Timeout as timeout_err:
        print(f"Timeout error occurred: {timeout_err}")
    except requests.exceptions.RequestException as req_err:
        print(f"An error occurred: {req_err}")
    except Exception as err:
        print(f"An unexpected error occurred: {err}")



In [3]:
get_linkedin_profile_data("https://www.linkedin.com/in/vatsal-thakkar-880320161/")

{'username': 'vatsal-thakkar-880320161',
 'full_name': 'Vatsal Thakkar',
 'is_open_to_work': True,
 'is_hiring': False,
 'headline': 'Masters in Computer Science | Passionate about Deep Learning & Generative Models | Computer Vision | LLMs | Multimodal Representation Learning',
 'summary': "I am currently pursuing a Master's Degree in Computer Science from The University of Georgia. I am passionate about Generative Deep Learning Models, including GPT, BERT, Stable Diffusion, and GANs, as well as Multimodal Representation Learning.\n\n💼 Professional Experience:\nAs a Research Assistant at The Hoarfrost Lab, I'm currently contributing to a cutting-edge project that involves integrating textual and DNA sequence data using the Multimodal Language Model.\n\nIn my most recent role as an LLM and AI Engineer at ColomboAI, I led the implementation of the RAG (Retrieval Augmented Generation) Pipeline. Along with this, I designed and executed a Mixture-of-Experts (MoE) methodology. I also enginee