In [1]:
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
import chromadb
import uuid
import pandas as pd

In [2]:
job_text = """
Munich, Germany
Munich, Germany
Intern / Working Student
Working Student/ Intern
Intern / Working Student
Engineering
(Intern / Working Student) NLP Engineer, LLM Agents
As a NLP Engineer, you will work with the tech team to further develop our AI Agent and expand its capabilities. This is a very high impact role where you will shape the product and work at the very edge of whats possible with the state-of-the-art technologies.

About Us:

We are KNOWRON GmbH, a deep tech startup based in Munich, Germany. Widespread loss of knowledge and an ageing workforce require a fundamental change in manufacturing. That's why we are building a Large Language Model (LLM) based platform to assist desk-less workers anywhere, 24/7.

As a NLP Engineer, you will work with the tech team to further develop our AI Agents capabilities. This is a very high impact role where you will shape the product and work at the very edge of whats possible with the state-of-the-art technologies.

‍

Key responsibilities

Implement and test new skills for our AI Agent.
Evaluate the performance and accuracy of the pipeline to ensure high value for the user
Write clean and maintainable code with sensible testing that you would enjoy taking over from a colleague.
Proactively communicate, document, and share your approach, progress, results, and challenges.
Your profile

You have built a project with LLM technology.
You are currently pursuing your Masters degree in Computer Science, Electrical Engineering or any other technical related field.
You are proficient in Python and are comfortable working with REST APIs
You have experience solving NLP problems and are comfortable with the usuals in this space (Spacy, Gensim, Cohere/ OpenAI / Gemini etc).
You are results-oriented and have strong organizational & communication skills
You have a go-getter attitude. You are proactive about finding solutions to problems.
You are fluent in English
What's in it for you?

Steep Learning Curve: Join a funded early stage startup and learn the product discovery and development process from the inside out.
Mentorship: We do regular 1:1s, retrospectives and support you to learn new things to grow personally and professionally.
Hybrid Work: We are a company built during the covid era, hybrid work is a core part of our culture.
Personal Development Budget: You get a 1000€ yearly budget for personal growth and development.
Team Events & Fitness: We provide healthy snacks, drinks, and exciting team activities and an Urban Sports Club membership
Hit us up!

‍

Please send your CV, your LinkedIn profile and your earliest possible starting date to jobs@knowron.com. We are looking forward to your application and will try to get back to you as soon as possible!

‍

KNOWRON is an equal opportunity employer. We embrace and celebrate diversity and are committed to creating an inclusive environment for all employees. We are open to all groups of people without regard to age, color, national origin, race, religion, gender, sex, sexual orientation, gender identity and/or expression, marital status, or any other legally protected characteristics.
"""

In [3]:
def extract_job_info(text):
    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        groq_api_key="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",
        temperature=0.2
    )

    prompt_extract = PromptTemplate(
        input_variables=["text"],
        template=
        """
        Extract detail information such as `role`, `responsibilities`, `requirement`, `skills`, `description` from {text} and made into JSON format.
        """
    )

    chain_extract = prompt_extract | llm
    response = chain_extract.invoke({"text": text})

    return response.content

In [4]:
extract_info = extract_job_info(job_text)

In [5]:
def parse_extract_info(extract_output):
    json_parser = JsonOutputParser()
    json_res = json_parser.parse(extract_output)
    return json_res

In [6]:
json_extract_info = parse_extract_info(extract_info)

In [7]:
portforlio_dir = "/Users/yu/Desktop/Code/ColdEmailGenerator/coldemail_practice/coldemail_practice3/my_portfolio.csv"

portforlio_df = pd.read_csv(portforlio_dir, delimiter=";")
portforlio_df

Unnamed: 0,Project Name,Project Description,Project Links,skills
0,"Zipf's Law, Text Generation Using A N-gram Mod...",,https://github.com/yyuuccii/CoLi_1/blob/main/C...,"LLM, NLP"
1,Comparative Analysis of BERT Variants on Senti...,"Evaluate BERT-base, RoBERTa, DistilBERT, and A...",https://drive.google.com/file/d/13RJ4inZYD4eEd...,"NLP, SpaCy"
2,Comparative Analysis of Sentiment Analysis Tec...,Compare sentiment analysis results from NLTK/V...,https://drive.google.com/file/d/1hioeSSo9hQ_g_...,"LLM, NLP"
3,IT Service Ticket Classification with DistilBERT,text Classification: Classify IT service ticke...,https://github.com/yyuuccii/TextClassification...,"LLM, NLP"
4,Job Application Generator Using LLM and LangChain,Use the ChatGroq API to get access to LLM: Met...,https://github.com/yyuuccii/JobApplicationGene...,"LLM, API, NLP, LangChain"


In [8]:
cilent = chromadb.PersistentClient("vectorstore")
collection = cilent.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in portforlio_df.iterrows():
        document =  f"{row['Project Name']} - {row['Project Description']}"
        collection.add(documents=[document],
                       metadatas={"links": row["Project Links"]},
                       ids=[str(uuid.uuid4())]
                       )


In [10]:
json_extract_info.keys()

dict_keys(['role', 'responsibilities', 'requirements', 'skills', 'description'])

In [11]:
json_extract_info

{'role': 'NLP Engineer, LLM Agents',
 'responsibilities': ['Implement and test new skills for our AI Agent.',
  'Evaluate the performance and accuracy of the pipeline to ensure high value for the user',
  'Write clean and maintainable code with sensible testing that you would enjoy taking over from a colleague.',
  'Proactively communicate, document, and share your approach, progress, results, and challenges.'],
 'requirements': ["Currently pursuing a Master's degree in Computer Science, Electrical Engineering or any other technical related field.",
  'Proficient in Python and comfortable working with REST APIs',
  'Experience solving NLP problems and comfortable with the usuals in this space (Spacy, Gensim, Cohere/ OpenAI / Gemini etc).',
  'Results-oriented and strong organizational & communication skills',
  'Go-getter attitude and proactive about finding solutions to problems.',
  'Fluent in English'],
 'skills': ['LLM technology',
  'Python',
  'REST APIs',
  'NLP',
  'Spacy',
  '

In [12]:
description = json_extract_info["description"]
skills = json_extract_info["skills"]
responsibilities = json_extract_info["responsibilities"]

In [13]:
query_text = []
query_text = str(skills) + " " + str(responsibilities)

In [15]:
links = collection.query(query_texts=query_text,
                         n_results=3)
links

{'ids': [['098edb35-bd48-469b-a9e8-19efedd1be85',
   '95bb1f69-d928-45d5-b0d6-cfb74e6e7c32',
   '54825f89-6961-4319-94f0-a0f83bff2e74']],
 'distances': [[1.367870475452547, 1.5867215893216307, 1.623989555244984]],
 'metadatas': [[{'links': 'https://github.com/yyuuccii/JobApplicationGenerator/blob/main/JobApplicationGenerator.ipynb'},
   {'links': 'https://drive.google.com/file/d/13RJ4inZYD4eEdOVK9lHhEQ64VAs1WgSs/view'},
   {'links': 'https://github.com/yyuuccii/TextClassification_ITService/tree/main'}]],
 'embeddings': None,
 'documents': [['Job Application Generator Using LLM and LangChain - Use the ChatGroq API to get access to LLM: Meta 3 Llama 70B model and LangChain to extract job details, and integrate with chromadb for relevant portfolio projects to craft personalized job application letter.',
   'Comparative Analysis of BERT Variants on Sentiment Analysis - Evaluate BERT-base, RoBERTa, DistilBERT, and ALBERT with an added linear layer for sentiment analysis, comparing their per

In [28]:
def write_letter(links, query_text):
    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        groq_api_key="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",
        temperature=0.7
    )

    prompt_writing = PromptTemplate(
        input_variables=["links", "query_text"],
        template=
        """
        You are Yuci Chen. You are a master's student studying Computational Linguistics at Saarland University. You are applying for a NLP internship/workstudent position at the company called "Knowron".

        Write a short (about 100 words) and concise application letter, where you read the skills in {query_text} and add the most RELEVANT project from the following links {links} to showcase portfolio, but don't write down details about the projects, just provide the links and the name of the projects.

        Greeting with the company's name in the beginning.

        Add in the end: P.S. this letter is generated by a Llama model. 
        """
    )

    chain_writing = prompt_writing | llm
    response = chain_writing.invoke({"links": links, "query_text": query_text})

    return response

In [30]:
letter = write_letter(links, description)
print(letter.content)

Dear Knowron Team,

I am writing to express my strong interest in the NLP internship/workstudent position at Knowron. As a master's student in Computational Linguistics at Saarland University, I am confident that my skills and experience make me an ideal fit for this role.

I am particularly drawn to this position because of the opportunity to work with a talented tech team to develop and expand the capabilities of your AI Agent. I am excited about the prospect of working at the edge of what's possible with state-of-the-art technologies and shaping the product.

Some of my relevant projects that demonstrate my skills in NLP include:

- Job Application Generator Using LLM and LangChain (https://github.com/yyuuccii/JobApplicationGenerator/blob/main/JobApplicationGenerator.ipynb)
- Comparative Analysis of BERT Variants on Sentiment Analysis (https://drive.google.com/file/d/13RJ4inZYD4eEdOVK9lHhEQ64VAs1WgSs/view)
- IT Service Ticket Classification with DistilBERT (https://github.com/yyuucc