In [5]:
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
import pandas as pd
import chromadb
import uuid

In [59]:
job_info_path = "/Users/yu/Desktop/JobProjects/ApplicationLetterGenerator_1/job_info.txt"
api_path = "/Users/yu/Desktop/JobProjects/ApplicationLetterGenerator_1/api_keys.txt"
prompt_letter_path = "/Users/yu/Desktop/JobProjects/ApplicationLetterGenerator_1/prompt_letter_template.txt"

In [60]:
def read(path):
    with open(path, "r") as f:
        content = f.read()
    
    return content

In [123]:
job_content = read(job_info_path)
api_keys = read(api_path)
prompt_letter_content = read(prompt_letter_path)

In [65]:
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    groq_api_key=api_keys,
    temperature=0.3
)

prompt_job = PromptTemplate(
    input_variables=["job_info"],
    template="""
    extract detail information `role`, `experience`, `skills`, `description` from {job_info} and made into JSON format. Don't create more keys under these three keys
    """
)

chain_job = prompt_job | llm
response = chain_job.invoke({"job_info": job_content}) 

In [66]:
def parse_job_info(job_info):
    parser = JsonOutputParser()
    json_response = parser.parse(job_info)
    
    return json_response

In [67]:
job_info_json = parse_job_info(response.content)
job_info_json

{'role': 'Intern / Working Student (NLP Engineer, LLM Agents)',
 'experience': ['Currently pursuing Masters degree in Computer Science, Electrical Engineering or any other technical related field',
  'Experience solving NLP problems',
  'Experience with LLM technology'],
 'skills': ['Python',
  'REST APIs',
  'Spacy',
  'Gensim',
  'Cohere/OpenAI/Gemini',
  'English (fluent)'],
 'description': 'As a NLP Engineer, you will work with the tech team to further develop our AI Agent and expand its capabilities. This is a very high impact role where you will shape the product and work at the very edge of whats possible with the state-of-the-art technologies.'}

In [68]:
data_dir = "my_portfolio.csv"
df = pd.read_csv(data_dir, delimiter=";")
df

Unnamed: 0,Project Name,Project Description,Project Links,skills
0,"Zipf's Law, Text Generation Using A N-gram Mod...",,https://github.com/yyuuccii/CoLi_1/blob/main/C...,"LLM, NLP"
1,Comparative Analysis of BERT Variants on Senti...,"Evaluate BERT-base, RoBERTa, DistilBERT, and A...",https://drive.google.com/file/d/13RJ4inZYD4eEd...,"NLP, SpaCy"
2,Comparative Analysis of Sentiment Analysis Tec...,Compare sentiment analysis results from NLTK/V...,https://drive.google.com/file/d/1hioeSSo9hQ_g_...,"LLM, NLP"
3,IT Service Ticket Classification with DistilBERT,text Classification: Classify IT service ticke...,https://github.com/yyuuccii/TextClassification...,"LLM, NLP"
4,Job Application Generator Using LLM and LangChain,Use the ChatGroq API to get access to LLM: Met...,https://github.com/yyuuccii/JobApplicationGene...,"LLM, API, NLP, LangChain"


In [69]:
client = chromadb.PersistentClient("vectorstore")
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        document = f"{row['Project Name']} - {row['Project Description']}"
        collection.add(documents=[document],
                       metadatas={"links": row["Project Links"]},
                       ids=[str(uuid.uuid4())]
                       )

In [70]:
job_info_json.keys()

dict_keys(['role', 'experience', 'skills', 'description'])

In [71]:
query = job_info_json["description"] + str(job_info_json["experience"]) + str(job_info_json["skills"])
query

"As a NLP Engineer, you will work with the tech team to further develop our AI Agent and expand its capabilities. This is a very high impact role where you will shape the product and work at the very edge of whats possible with the state-of-the-art technologies.['Currently pursuing Masters degree in Computer Science, Electrical Engineering or any other technical related field', 'Experience solving NLP problems', 'Experience with LLM technology']['Python', 'REST APIs', 'Spacy', 'Gensim', 'Cohere/OpenAI/Gemini', 'English (fluent)']"

In [72]:
links = collection.query(query_texts=query,
                         n_results=3)

In [124]:
def write_letter(links, job_info_json):
    
    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        groq_api_key=api_keys,
        temperature=0.6
    )
    
    prompt_letter = PromptTemplate(
        input_variables=["links", "job_description"],
        template=prompt_letter_content
    )
    
    chain_letter = prompt_letter | llm
    letter = chain_letter.invoke({"links": links, "job_description": job_info_json})
    
    return letter

In [127]:
letter = write_letter(links=links, job_info_json=job_info_json["skills"])

In [128]:
print(letter.content)

Dear Knowron Team,

I'm Yuci Chen, a master's student in Computational Linguistics at Saarland University. I'm excited to apply for the NLP internship/workstudent position at Knowron.

With a strong foundation in Python programming and experience in NLP tools such as REST APIs, Spacy, Gensim, and Cohere/OpenAI/Gemini, I'm confident in my ability to contribute to your team. I'm fluent in English and have a passion for NLP applications.

Some relevant projects I've worked on include:
• Job Application Generator Using LLM and LangChain (https://github.com/yyuuccii/JobApplicationGenerator/blob/main/JobApplicationGenerator.ipynb)
• Comparative Analysis of Sentiment Analysis Techniques and Preprocessing Impact (https://drive.google.com/file/d/1hioeSSo9hQ_g_aQTISfR2UZtYFH4GR57/view?usp=sharing)
• IT Service Ticket Classification with DistilBERT (https://github.com/yyuuccii/TextClassification_ITService/tree/main)

I'm looking forward to discussing my qualifications further.

Best regards,
Yuci