In [None]:
!pip install neo4j


Collecting neo4j
  Downloading neo4j-5.12.0.tar.gz (190 kB)
[K     |████████████████████████████████| 190 kB 5.5 MB/s eta 0:00:01
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (PEP 517) ... [?25ldone
[?25h  Created wheel for neo4j: filename=neo4j-5.12.0-py3-none-any.whl size=263745 sha256=1e69d8c39be93194c1e478178637344a683382de47b75f12055b0a81417c47dc
  Stored in directory: /Users/jiangyiwei/Library/Caches/pip/wheels/57/f0/58/85b6179dfa6386d12499758bd654fc095bd01b6df38a99301d
Successfully built neo4j
Installing collected packages: neo4j
Successfully installed neo4j-5.12.0


In [None]:
!pip install spacy
!python -m spacy download en_core_web_lg


Collecting spacy
  Downloading spacy-3.6.1-cp39-cp39-macosx_10_9_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 7.0 MB/s eta 0:00:01
Collecting smart-open<7.0.0,>=5.2.1
  Downloading smart_open-6.4.0-py3-none-any.whl (57 kB)
[K     |████████████████████████████████| 57 kB 5.3 MB/s eta 0:00:011
Collecting wasabi<1.2.0,>=0.9.1
  Downloading wasabi-1.1.2-py3-none-any.whl (27 kB)
Collecting cymem<2.1.0,>=2.0.2
  Downloading cymem-2.0.8-cp39-cp39-macosx_10_9_x86_64.whl (42 kB)
[K     |████████████████████████████████| 42 kB 2.0 MB/s  eta 0:00:01
[?25hCollecting pathy>=0.10.0
  Downloading pathy-0.10.2-py3-none-any.whl (48 kB)
[K     |████████████████████████████████| 48 kB 8.3 MB/s  eta 0:00:01
Collecting typer<0.10.0,>=0.3.0
  Downloading typer-0.9.0-py3-none-any.whl (45 kB)
[K     |████████████████████████████████| 45 kB 6.3 MB/s eta 0:00:011
Collecting langcodes<4.0.0,>=3.2.0
  Downloading langcodes-3.3.0-py3-none-any.whl (181 kB)
[K     |████████████████████

In [None]:
#Connect to Neo4j

In [None]:
from neo4j import GraphDatabase

class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)

    def close(self):
        if self.__driver is not None:
            self.__driver.close()

    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally:
            if session is not None:
                session.close()
        return response

conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="123456")


In [None]:
# Use Spacy to compare the similities between texts

In [None]:
import spacy

# Initialize spaCy model
nlp = spacy.load("en_core_web_lg")

def compute_similarity(text1, text2):
    # Check if the text exists and is not empty
    if not text1 or not text2:
        return 0.0

    doc1 = nlp(text1)
    doc2 = nlp(text2)
    return doc1.similarity(doc2)

# Read data by CQL
roles = conn.query("MATCH (r:Role) RETURN r.role_description, r.project_name, r.role_name")
applicants = conn.query("MATCH (a:Applicant) RETURN a.biography, a.UID")

# Compute textual similarity for each role and applicant
similarities = {}
for role in roles:
    role_desc = role["r.role_description"]
#     print(role_desc)
    for applicant in applicants:
        applicant_bio = applicant["a.biography"]
#         print(applicant_bio)

        # Check that description and resume are valid
        if role_desc and applicant_bio:
            key = (role["r.project_name"], role["r.role_name"], applicant["a.UID"])
            sim_score=compute_similarity(role_desc, applicant_bio)
            similarities[key] = sim_score


In [None]:
print(list(similarities.items())[:5])  # Print the first 5 similarity values


[(('Inc', 'Technical Product Manager', 'bb48d3b4-2724-4bd9-b050-430349ec7dc4'), 0.7248541582207213), (('Inc', 'Technical Product Manager', '08ade3a7-a2b8-4304-bfb3-c0c6285fc8d5'), 0.7224082255946336), (('Inc', 'Technical Product Manager', '93f9cf43-8c60-4b27-8535-bf8a121f61f6'), 0.7187216950136979), (('Inc', 'Technical Product Manager', '6f92a1ec-56c3-4ac6-9532-7e62116fa756'), 0.7194197972371968), (('Inc', 'Technical Product Manager', '135a7fd8-c368-4788-81ea-05db87b47e5a'), 0.720741503383385)]


In [None]:
# Store the results in Neo4j

for key, sim_score in similarities.items():
    project_name, role_name, applicant_uid = key
    cql = f"""
    MATCH (r:Role {{project_name: '{project_name}', role_name: '{role_name}'}}),
          (a:Applicant {{UID: '{applicant_uid}'}})
    MERGE (r)-[:HAS_TEXT_SIMILARITY {{score: {sim_score}}}]->(a)
    """
    conn.query(cql)




In [None]:
# role-talent matching

In [None]:
cql2 = """
MATCH (r:Role), (a:Applicant), (r)-[rel:HAS_TEXT_SIMILARITY]->(a)
WITH r, a,
    size([x IN r.required_skills WHERE x IN a.skills]) AS matchedRequiredSkills,
    size(r.required_skills) AS totalRequiredSkills,
    CASE WHEN r.Location = a.Location THEN 1 ELSE 0 END AS locationMatch,
    CASE WHEN r.Industry = a.industry THEN 1 ELSE 0 END AS industryMatch,
    CASE WHEN r.Language = a.language THEN 1 ELSE 0 END AS languageMatch,
    CASE WHEN r.availability_date <= a.availability THEN 1 ELSE 0 END AS availabilityMatch,
    CASE WHEN r.min_available_hours_per_week <= a.working_hours THEN 1 ELSE 0 END AS hoursMatch,
    rel.score * 0.25 AS textSimilarityScore

WHERE matchedRequiredSkills >= 0.75 * totalRequiredSkills

WITH r, a,
    1*matchedRequiredSkills + 1*locationMatch + 1*industryMatch + 1*languageMatch +
    1*availabilityMatch + 1*hoursMatch +
    size([x IN r.preferred_skills WHERE x IN a.skills]) * 0.5 +
    size([x IN r.tools WHERE x IN a.tools]) * 0.5 +
    CASE WHEN a.experience >= r.years_of_experience THEN 0.75 ELSE 0 END +
    textSimilarityScore AS totalScore

ORDER BY r.project_name, r.role_name, totalScore DESC

WITH r.project_name AS project_name, r.role_name AS role_name,
    COLLECT({applicant: a, score: totalScore})[0..5] AS topMatches
UNWIND topMatches AS match

RETURN project_name, role_name, match.applicant.first_name AS applicant_name, match.score AS score
ORDER BY project_name, role_name, score DESC;

"""

In [None]:
try:
    results = conn.query(cql2)
except Exception as e:
    print(f"Error executing CQL: {e}")


for record in results:
#     print(record)
    project_name = record.get('project_name', 'N/A')
    role_name = record.get('role_name', 'N/A')
    applicant_name = record.get('applicant_name', 'N/A')
    score = record.get('score', 'N/A')
    print(f"Project Name: {project_name}\nRole Name: {role_name}\nApplicant Name: {applicant_name}\nScore: {score}\n{'-'*50}")


Project Name: Group
Role Name: Business Development Manager
Applicant Name: Wendy
Score: 6.932003013942328
--------------------------------------------------
Project Name: Group
Role Name: Business Development Manager
Applicant Name: Barry
Score: 6.931802088021631
--------------------------------------------------
Project Name: Group
Role Name: Business Development Manager
Applicant Name: Lindsey
Score: 6.930724463418918
--------------------------------------------------
Project Name: Group
Role Name: Business Development Manager
Applicant Name: Melissa
Score: 6.432227833762068
--------------------------------------------------
Project Name: Group
Role Name: Business Development Manager
Applicant Name: Corey
Score: 6.430962347291866
--------------------------------------------------
Project Name: Group
Role Name: HR Officer
Applicant Name: Robin
Score: 8.430407864605447
--------------------------------------------------
Project Name: Group
Role Name: HR Officer
Applicant Name: Sean
Sco

In [None]:
# test the compute_similarity function

In [None]:
text1 = "I am a Marketing professional with expertise in API Design, Android. My previous role was as a DevOps Engineer at Beard, Wheeler and Hoover in the Marketing industry where I gained 8 years of experience. Skills: API Design, Android, Brand Strategy"
text2 = "This role at LLC involves Technical Product Manager-related responsibilities in the ['IT', 'Finance', 'Healthcare', 'Education', 'Engineering', 'Marketing'] industry. We are looking for candidates with Fundraising, Brand Strategy, 3D Engineering skills and expertise. The role requires 8 years of experience."
compute_similarity(text1,text2)

0.7248541582207213

In [None]:
#job recommendation

In [None]:
roles = conn.query("MATCH (r:Role) RETURN r.role_description, r.project_name, r.role_name")
applicants = conn.query("MATCH (a:Applicant) RETURN a.biography, a.personality_questions, a.UID")

biography_similarities = {}
question_similarities = {}

In [None]:
for role in roles:
    role_desc = role["r.role_description"]
    for applicant in applicants:
        applicant_bio = applicant["a.biography"]
        applicant_questions = applicant["a.personality_questions"]
        key = (role["r.project_name"], role["r.role_name"], applicant["a.UID"])

        biography_similarities[key] = compute_similarity(role_desc, applicant_bio)
        question_similarities[key] = compute_similarity(role_desc, applicant_questions)

In [None]:
for key, sim_score in biography_similarities.items():
    project_name, role_name, applicant_uid = key
    cql = f"""
    MATCH (r:Role {{project_name: '{project_name}', role_name: '{role_name}'}}),
          (a:Applicant {{UID: '{applicant_uid}'}})
    MERGE (r)-[:HAS_BIOGRAPHY_SIMILARITY {{score: {sim_score}}}]->(a)
    """
    conn.query(cql)

for key, sim_score in question_similarities.items():
    project_name, role_name, applicant_uid = key
    cql = f"""
    MATCH (r:Role {{project_name: '{project_name}', role_name: '{role_name}'}}),
          (a:Applicant {{UID: '{applicant_uid}'}})
    MERGE (r)-[:HAS_QUESTION_SIMILARITY {{score: {sim_score}}}]->(a)
    """
    conn.query(cql)


In [None]:
cql3 = """
MATCH (r:Role), (a:Applicant),
      (r)-[bio_rel:HAS_BIOGRAPHY_SIMILARITY]->(a),
      (r)-[ques_rel:HAS_QUESTION_SIMILARITY]->(a)

WITH r, a,
     size([x IN r.required_skills WHERE x IN a.skills]) AS matchedRequiredSkills,
     size(r.required_skills) AS totalRequiredSkills,
     CASE WHEN r.Location = a.Location THEN 1 ELSE 0 END AS locationMatch,
     CASE WHEN r.Industry = a.industry THEN 1 ELSE 0 END AS industryMatch,
     CASE WHEN r.Language = a.language THEN 1 ELSE 0 END AS languageMatch,
     CASE WHEN r.availability_date <= a.availability THEN 1 ELSE 0 END AS availabilityMatch,
     CASE WHEN r.min_available_hours_per_week <= a.working_hours THEN 1 ELSE 0 END AS hoursMatch,
     COALESCE(bio_rel.score, 0) * 0.25 AS biographySimilarityScore,
     COALESCE(ques_rel.score, 0) * 0.25 AS questionSimilarityScore

WHERE matchedRequiredSkills >= 0.8 * totalRequiredSkills

WITH a, r,
     1*matchedRequiredSkills + 1*locationMatch + 1*industryMatch + 1*languageMatch +
     1*availabilityMatch + 1*hoursMatch +
     size([x IN r.preferred_skills WHERE x IN a.skills]) * 0.5 +
     size([x IN r.tools WHERE x IN a.tools]) * 0.5 +
     CASE WHEN a.experience >= r.years_of_experience THEN 0.75 ELSE 0 END +
     biographySimilarityScore + questionSimilarityScore AS totalScore

ORDER BY a.UID, totalScore DESC

RETURN a.first_name AS applicant_name, r.project_name AS project_name, r.role_name AS role_name, totalScore AS score
ORDER BY applicant_name, score DESC;


"""

In [None]:
try:
    recommend_results = conn.query(cql3)
except Exception as e:
    print(f"Error executing CQL: {e}")


for record in recommend_results:
#     print(record)
    applicant_name = record.get('applicant_name', 'N/A')
    project_name = record.get('project_name', 'N/A')
    role_name = record.get('role_name', 'N/A')
    score = record.get('score', 'N/A')
    print(f"Applicant name: {applicant_name}\nProject Name: {project_name}\nRole Name: {role_name}\nScore: {score}\n{'-'*50}")


Applicant name: Aaron
Project Name: Inc
Role Name: Project Manager
Score: 7.54652279875407
--------------------------------------------------
Applicant name: Aaron
Project Name: Inc
Role Name: Data Scientist
Score: 7.0480923213099
--------------------------------------------------
Applicant name: Aaron
Project Name: Group
Role Name: HR Officer
Score: 6.799283071646788
--------------------------------------------------
Applicant name: Aaron
Project Name: Group
Role Name: Solution Architect
Score: 6.798883944879322
--------------------------------------------------
Applicant name: Aaron
Project Name: PLC
Role Name: Financial Advisory Consultant
Score: 6.796607361864254
--------------------------------------------------
Applicant name: Aaron
Project Name: Inc
Role Name: Technical Product Manager
Score: 6.795299238076024
--------------------------------------------------
Applicant name: Aaron
Project Name: PLC
Role Name: Content Editor
Score: 6.7951796902095
-------------------------------