In [9]:
import argparse
import logging
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from data_preprocess import (
    create_dataset_prompt_col,
    create_dataset_splits,
    data_preprocess,
    transform_df_to_pytorch,
)
from sklearn.metrics import f1_score
from torch.utils.data import DataLoader, Dataset
from transformers import AdamW, BartConfig, BartForConditionalGeneration, BartTokenizer

In [10]:
logging.basicConfig(level=logging.INFO)
logging.info("Reading data")
with open("../data/res_job_score_report_combined.pkl", "rb") as f:
    df = pickle.load(f)
logging.info("Data read")

df_processed = data_preprocess(df)
df_processed = create_dataset_prompt_col(df_processed)

# train 70%, validation 15%, test 15%
_, _, df_test = create_dataset_splits(
    df_processed, train_split=0.3, val_split=0.5, random_state=42
)


logging.info("Testing setup. Only using 100 samples")
df_test = df_test[:5]
logging.info(f"Shape of the test data is {df_test.shape}")

INFO:root:Reading data
INFO:root:Data read
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["prompt"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["res_and_job"] = "<<RESUME>> " + df["resume"] + ", <<JOB>> " + df["job"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["score_and_report"] = "<<SCORE>> " + df["score"] + ", <<REPORT>> "

In [None]:
# 

In [11]:
checkpoint = "model_ckpt"
logging.info("Initializing model and tokenizer from checkpoint")
tokenizer = BartTokenizer.from_pretrained(checkpoint)
model = BartForConditionalGeneration.from_pretrained(checkpoint)
# config = BartConfig.from_pretrained(checkpoint)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
    model.to(device)
logging.info("Model initialized and sent to device")

model.eval()

INFO:root:Initializing model and tokenizer from checkpoint


SafetensorError: Error while deserializing header: MetadataIncompleteBuffer

In [6]:
def generate_report_from_model(res_job):
    # Tokenize and encode the text
    inputs = tokenizer(res_job, return_tensors="pt")
    report_ids = model.generate(**inputs, max_new_tokens=256)
    return (
        (tokenizer.decode(report_ids[0], skip_special_tokens=True))
        .replace("<", "")
        .replace(">", "")
    )

# generate a report for every row of the df
def generate_report(row):
    # res_and_job = row["res_and_job"]
    try:
        generated_report = generate_report_from_model(row)
        return generated_report
    except Exception as e:
        logging.info(e)
        return ""


In [7]:
for _, row in df_test.iloc[:3].iterrows():
    # df_results = generate_report(row["res_and_job"])
    # print(temp_list)
    # logging.info(df_results.head())
    # logging.info(df_results.shape)
    print(row)


resume              kavya baltha data scientist linkedin i summary...
job                 about you you love breaking down complex probl...
score                                                            0.85
report              The candidate has a Master's in Computer Scien...
prompt              <<RESUME>> kavya baltha data scientist linkedi...
res_and_job         <<RESUME>> kavya baltha data scientist linkedi...
score_and_report    <<SCORE>> 0.85, <<REPORT>> The candidate has a...
Name: 145, dtype: object
resume              john yoo software engineer 10 2022 02 2023 xac...
job                 what you ll do understand design iterate and s...
score                                                             0.7
report              The candidate has a strong background in softw...
prompt              <<RESUME>> john yoo software engineer 10 2022 ...
res_and_job         <<RESUME>> john yoo software engineer 10 2022 ...
score_and_report    <<SCORE>> 0.7, <<REPORT>> The candidate has a

In [22]:
df_test.iloc[0]["resume"]

'kavya baltha data scientist linkedin i summary summary acadamic projects netflix data analysis tableau public data analysis utilized tableau to explore and analyze netflix data and derived meaningful insights into viewership patterns genre preferences and user behavior visualizations designed interactive and visually appealing dashboards and charts to present meaningful insights drug activity prediction python smote pca adaboost treated the imbalanced data using smote and applied pca for dimensionality reduction to compress the data developed an ensemble classification model using adaboost to prognosticate the likelihood of the drug given its topological shapes and other characteristics and achieved an accuracy of 79 by tuning the hyperparameters using across validation technique digit recognizer tsne k means handled mnist dataset which has gray scale images of hand drawn digits by normalizing the data and used tsne technique to reduce the number of dimensions implemented k means tech

In [23]:
dataset_test = transform_df_to_pytorch(df_test, tokenizer)
loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False)
model.eval()
total_test_loss = 0
res = []
with torch.no_grad():
    for test_batch in loader_test:
        test_input_ids = test_batch["input_ids"].to("cuda")
        test_attention_mask = test_batch["attention_mask"].to("cuda")
        test_labels = test_batch["labels"].to("cuda")
        
        report_ids = model.generate(input_ids=test_input_ids, attention_mask=test_attention_mask, max_length=256, num_return_sequences=1)
        decoded_outputs = (tokenizer.batch_decode(report_ids, skip_special_tokens=True)
        res.extend(decoded_outputs)
        
        test_outputs = model(
            input_ids=test_input_ids,
            attention_mask=test_attention_mask,
            labels=test_labels,
        )
        test_loss = test_outputs.loss

        total_test_loss += test_loss.item()
logging.info(f"Total validation loss is {total_test_loss/len(loader_test)}")

NameError: name 'model' is not defined

In [25]:
df_test["bart_generated_report"] = res
df_test

NameError: name 'res' is not defined

In [14]:
def generate_report_from_model(res_job):
    # Tokenize and encode the text
    inputs = tokenizer(res_job, return_tensors="pt")
    report_ids = model.generate(**inputs, max_new_tokens=256)
    return (
        (tokenizer.decode(report_ids[0], skip_special_tokens=True))
        .replace("<", "")
        .replace(">", "")
    )

AttributeError: 'PyTorchDataset' object has no attribute 'inputs'

In [8]:
generate_report(df_test.iloc[1]["res_and_job"])


INFO:root:name 'model' is not defined


''

In [1]:
df_test.iloc[0]["report"]


NameError: name 'df_test' is not defined

In [15]:
df_test.iloc[:3].head()

Unnamed: 0,resume,job,score,report,prompt,res_and_job,score_and_report
145,kavya baltha data scientist linkedin i summary...,about you you love breaking down complex probl...,0.85,The candidate has a Master's in Computer Scien...,<<RESUME>> kavya baltha data scientist linkedi...,<<RESUME>> kavya baltha data scientist linkedi...,"<<SCORE>> 0.85, <<REPORT>> The candidate has a..."
1628,john yoo software engineer 10 2022 02 2023 xac...,what you ll do understand design iterate and s...,0.7,The candidate has a strong background in softw...,<<RESUME>> john yoo software engineer 10 2022 ...,<<RESUME>> john yoo software engineer 10 2022 ...,"<<SCORE>> 0.7, <<REPORT>> The candidate has a ..."
1641,sairam tabibu seattle wa email me on indeed wi...,you have 8 years of experience designing modif...,0.8,The candidate has a strong background in softw...,<<RESUME>> sairam tabibu seattle wa email me o...,<<RESUME>> sairam tabibu seattle wa email me o...,"<<SCORE>> 0.8, <<REPORT>> The candidate has a ..."


In [19]:
df_test.iloc[0]["res_and_job"][:-100]



'<<RESUME>> kavya baltha data scientist linkedin i summary summary acadamic projects netflix data analysis tableau public data analysis utilized tableau to explore and analyze netflix data and derived meaningful insights into viewership patterns genre preferences and user behavior visualizations designed interactive and visually appealing dashboards and charts to present meaningful insights drug activity prediction python smote pca adaboost treated the imbalanced data using smote and applied pca for dimensionality reduction to compress the data developed an ensemble classification model using adaboost to prognosticate the likelihood of the drug given its topological shapes and other characteristics and achieved an accuracy of 79 by tuning the hyperparameters using across validation technique digit recognizer tsne k means handled mnist dataset which has gray scale images of hand drawn digits by normalizing the data and used tsne technique to reduce the number of dimensions implemented k

In [28]:
df_testing = df_test.copy()

In [29]:
df_testing.head()

Unnamed: 0,resume,job,score,report,prompt,res_and_job,score_and_report
145,kavya baltha data scientist linkedin i summary...,about you you love breaking down complex probl...,0.85,The candidate has a Master's in Computer Scien...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> 0 derek leckner new york ny e...,<<SCORE>> 0 0.85\n1 0.85\n3 ...
1628,john yoo software engineer 10 2022 02 2023 xac...,what you ll do understand design iterate and s...,0.7,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> 0 derek leckner new york ny e...,<<SCORE>> 0 0.85\n1 0.85\n3 ...
1641,sairam tabibu seattle wa email me on indeed wi...,you have 8 years of experience designing modif...,0.8,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> 0 derek leckner new york ny e...,<<SCORE>> 0 0.85\n1 0.85\n3 ...
1802,144 lewis street valley stream ny 11 marie gre...,who you ll work with as an engineer you will w...,0.7,The resume is a good match for the job. The ca...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> 0 derek leckner new york ny e...,<<SCORE>> 0 0.85\n1 0.85\n3 ...
1819,hardhika venkatesan data scientist lewis cente...,job title sr software quality engineer android...,0.7,The candidate has a strong background in data ...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> 0 derek leckner new york ny e...,<<SCORE>> 0 0.85\n1 0.85\n3 ...


In [47]:
df_testing.drop(columns=["prompt", "res_and_job", "score_and_report"])

KeyError: "['prompt', 'res_and_job', 'score_and_report'] not found in axis"

In [48]:
df_testing.head()

Unnamed: 0,resume,job,score,report,prompt_method1,prompt_method3
145,kavya baltha data scientist linkedin i summary...,about you you love breaking down complex probl...,0.85,The candidate has a Master's in Computer Scien...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> kavya baltha data scientist linkedi...
1628,john yoo software engineer 10 2022 02 2023 xac...,what you ll do understand design iterate and s...,0.7,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> john yoo software engineer 10 2022 ...
1641,sairam tabibu seattle wa email me on indeed wi...,you have 8 years of experience designing modif...,0.8,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> sairam tabibu seattle wa email me o...
1802,144 lewis street valley stream ny 11 marie gre...,who you ll work with as an engineer you will w...,0.7,The resume is a good match for the job. The ca...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> 144 lewis street valley stream ny 1...
1819,hardhika venkatesan data scientist lewis cente...,job title sr software quality engineer android...,0.7,The candidate has a strong background in data ...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> hardhika venkatesan data scientist ...


In [33]:
df_testing[
        "prompt_method1"
    ] = f"<<RESUME>> {df['resume']}, <<JOB>> {df['job']}, <<SCORE>> {df['score']}, <<REPORT>> {df['report']}"
df_testing.head()

Unnamed: 0,resume,job,score,report,prompt_method1
145,kavya baltha data scientist linkedin i summary...,about you you love breaking down complex probl...,0.85,The candidate has a Master's in Computer Scien...,<<RESUME>> 0 derek leckner new york ny e...
1628,john yoo software engineer 10 2022 02 2023 xac...,what you ll do understand design iterate and s...,0.7,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...
1641,sairam tabibu seattle wa email me on indeed wi...,you have 8 years of experience designing modif...,0.8,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...
1802,144 lewis street valley stream ny 11 marie gre...,who you ll work with as an engineer you will w...,0.7,The resume is a good match for the job. The ca...,<<RESUME>> 0 derek leckner new york ny e...
1819,hardhika venkatesan data scientist lewis cente...,job title sr software quality engineer android...,0.7,The candidate has a strong background in data ...,<<RESUME>> 0 derek leckner new york ny e...


In [34]:
df_testing["prompt_method2"] = df.apply(lambda row: f"<<RESUME>> {row['resume']}, <<JOB>> {row['job']}, <<SCORE>> {row['score']}, <<REPORT>> {row['report']}", axis=1)
df_testing

ValueError: cannot reindex on an axis with duplicate labels

In [35]:
df_testing['prompt_method3'] = (
    "<<RESUME>> " + df_testing['resume'] + ", <<JOB>> " + df_testing['job'] +
    ", <<SCORE>> " + df_testing['score'] + ", <<REPORT>> " + df_testing['report']
)
df_testing.head()


Unnamed: 0,resume,job,score,report,prompt_method1,prompt_method3
145,kavya baltha data scientist linkedin i summary...,about you you love breaking down complex probl...,0.85,The candidate has a Master's in Computer Scien...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> kavya baltha data scientist linkedi...
1628,john yoo software engineer 10 2022 02 2023 xac...,what you ll do understand design iterate and s...,0.7,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> john yoo software engineer 10 2022 ...
1641,sairam tabibu seattle wa email me on indeed wi...,you have 8 years of experience designing modif...,0.8,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> sairam tabibu seattle wa email me o...
1802,144 lewis street valley stream ny 11 marie gre...,who you ll work with as an engineer you will w...,0.7,The resume is a good match for the job. The ca...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> 144 lewis street valley stream ny 1...
1819,hardhika venkatesan data scientist lewis cente...,job title sr software quality engineer android...,0.7,The candidate has a strong background in data ...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> hardhika venkatesan data scientist ...


In [42]:
def create_dataset_prompt_col(df: pd.DataFrame) -> pd.DataFrame:
    df["prompt"] = (
        "<<RESUME>> "
        + df["resume"]
        + ", <<JOB>> "
        + df["job"]
        + ", <<SCORE>> "
        + df["score"]
        + ", <<REPORT>> "
        + df["report"]
    )
    df["res_and_job"] = "<<RESUME>> " + df["resume"] + ", <<JOB>> " + df["job"]
    df["score_and_report"] = "<<SCORE>> " + df["score"] + ", <<REPORT>> " + df["report"]

    return df

In [None]:
df_testing.drop("prompt")

In [49]:
df_yolo = create_dataset_prompt_col(df_testing)
df_yolo.head()

Unnamed: 0,resume,job,score,report,prompt_method1,prompt_method3,prompt,res_and_job,score_and_report
145,kavya baltha data scientist linkedin i summary...,about you you love breaking down complex probl...,0.85,The candidate has a Master's in Computer Scien...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> kavya baltha data scientist linkedi...,<<RESUME>> kavya baltha data scientist linkedi...,<<RESUME>> kavya baltha data scientist linkedi...,"<<SCORE>> 0.85, <<REPORT>> The candidate has a..."
1628,john yoo software engineer 10 2022 02 2023 xac...,what you ll do understand design iterate and s...,0.7,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> john yoo software engineer 10 2022 ...,<<RESUME>> john yoo software engineer 10 2022 ...,<<RESUME>> john yoo software engineer 10 2022 ...,"<<SCORE>> 0.7, <<REPORT>> The candidate has a ..."
1641,sairam tabibu seattle wa email me on indeed wi...,you have 8 years of experience designing modif...,0.8,The candidate has a strong background in softw...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> sairam tabibu seattle wa email me o...,<<RESUME>> sairam tabibu seattle wa email me o...,<<RESUME>> sairam tabibu seattle wa email me o...,"<<SCORE>> 0.8, <<REPORT>> The candidate has a ..."
1802,144 lewis street valley stream ny 11 marie gre...,who you ll work with as an engineer you will w...,0.7,The resume is a good match for the job. The ca...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> 144 lewis street valley stream ny 1...,<<RESUME>> 144 lewis street valley stream ny 1...,<<RESUME>> 144 lewis street valley stream ny 1...,"<<SCORE>> 0.7, <<REPORT>> The resume is a good..."
1819,hardhika venkatesan data scientist lewis cente...,job title sr software quality engineer android...,0.7,The candidate has a strong background in data ...,<<RESUME>> 0 derek leckner new york ny e...,<<RESUME>> hardhika venkatesan data scientist ...,<<RESUME>> hardhika venkatesan data scientist ...,<<RESUME>> hardhika venkatesan data scientist ...,"<<SCORE>> 0.7, <<REPORT>> The candidate has a ..."


In [37]:
model.model.config

BartConfig {
  "_name_or_path": "../model_ckpt",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 

In [41]:
from datetime import datetime

current_datetime = datetime.today().strftime('%Y-%m-%d_%H:%M:%S')
current_datetime


'2023-12-12_21:42:52'

In [50]:
train_loss = [1,2,3,4]

In [54]:
val_loss = [0,0,0,0]

In [55]:
df_train_loss = pd.DataFrame({'train_loss': train_loss, 'val_loss': val_loss})
df_train_loss


Unnamed: 0,train_loss,val_loss
0,1,0
1,2,0
2,3,0
3,4,0


In [26]:
text = "SCORE 0.8, REPORT The candidate has a strong background in data science and machine learning, with a Master's degree in Computer Science from George Mason University and a Bachelor's degree from Jawaharlal Nehru Institute of Technology. They have 3 years of experience as a Data Scientist at Amazon, where they used Python, R, SQL, and SAS to perform data analysis and extract valuable insights from large datasets. They also developed an ensemble classification model using Adaboost to prognosticate the likelihood of a drug given its topological shapes and other characteristics. The candidate's skills and experience are a good match for the job requirements, and they would likely be able to make a significant contribution to the team."

In [34]:
# for inference
def extract_score_and_report(text):
    pattern = r"(?i)SCORE\s+(\d+\.\d+)[,:]?[\s\n]+REPORT\s+(.*)"

    # Search for the pattern in the text
    match = re.search(pattern, text)

    if match:
        score = match.group(1)
        report = match.group(2)
        return float(score), report.strip()
    else:
        return None, None


def combine_data_and_generated_report(
    df: pd.DataFrame, generated_report: list
) -> pd.DataFrame:
    df["generated_report"] = generated_report
    df["score_from_report"], df["report_from_report"] = zip(
        *df["generated_report"].apply(extract_score_and_report)
    )
    return df


In [36]:
df_test.iloc[0]

resume              kavya baltha data scientist linkedin i summary...
job                 about you you love breaking down complex probl...
score                                                            0.85
report              The candidate has a Master's in Computer Scien...
prompt              <<RESUME>> kavya baltha data scientist linkedi...
res_and_job         <<RESUME>> kavya baltha data scientist linkedi...
score_and_report    <<SCORE>> 0.85, <<REPORT>> The candidate has a...
Name: 145, dtype: object

In [45]:
# res = text * 5
# # res.append(text)
# res

# create a lits of 5 texts
res = [text] * 5
res

["SCORE 0.8, REPORT The candidate has a strong background in data science and machine learning, with a Master's degree in Computer Science from George Mason University and a Bachelor's degree from Jawaharlal Nehru Institute of Technology. They have 3 years of experience as a Data Scientist at Amazon, where they used Python, R, SQL, and SAS to perform data analysis and extract valuable insights from large datasets. They also developed an ensemble classification model using Adaboost to prognosticate the likelihood of a drug given its topological shapes and other characteristics. The candidate's skills and experience are a good match for the job requirements, and they would likely be able to make a significant contribution to the team.",
 "SCORE 0.8, REPORT The candidate has a strong background in data science and machine learning, with a Master's degree in Computer Science from George Mason University and a Bachelor's degree from Jawaharlal Nehru Institute of Technology. They have 3 year

In [46]:
df_result = combine_data_and_generated_report(df_test, res)

In [47]:
df_result

Unnamed: 0,resume,job,score,report,prompt,res_and_job,score_and_report,generated_report,score_from_report,report_from_report
145,kavya baltha data scientist linkedin i summary...,about you you love breaking down complex probl...,0.85,The candidate has a Master's in Computer Scien...,<<RESUME>> kavya baltha data scientist linkedi...,<<RESUME>> kavya baltha data scientist linkedi...,"<<SCORE>> 0.85, <<REPORT>> The candidate has a...","SCORE 0.8, REPORT The candidate has a strong b...",0.8,The candidate has a strong background in data ...
1628,john yoo software engineer 10 2022 02 2023 xac...,what you ll do understand design iterate and s...,0.7,The candidate has a strong background in softw...,<<RESUME>> john yoo software engineer 10 2022 ...,<<RESUME>> john yoo software engineer 10 2022 ...,"<<SCORE>> 0.7, <<REPORT>> The candidate has a ...","SCORE 0.8, REPORT The candidate has a strong b...",0.8,The candidate has a strong background in data ...
1641,sairam tabibu seattle wa email me on indeed wi...,you have 8 years of experience designing modif...,0.8,The candidate has a strong background in softw...,<<RESUME>> sairam tabibu seattle wa email me o...,<<RESUME>> sairam tabibu seattle wa email me o...,"<<SCORE>> 0.8, <<REPORT>> The candidate has a ...","SCORE 0.8, REPORT The candidate has a strong b...",0.8,The candidate has a strong background in data ...
1802,144 lewis street valley stream ny 11 marie gre...,who you ll work with as an engineer you will w...,0.7,The resume is a good match for the job. The ca...,<<RESUME>> 144 lewis street valley stream ny 1...,<<RESUME>> 144 lewis street valley stream ny 1...,"<<SCORE>> 0.7, <<REPORT>> The resume is a good...","SCORE 0.8, REPORT The candidate has a strong b...",0.8,The candidate has a strong background in data ...
1819,hardhika venkatesan data scientist lewis cente...,job title sr software quality engineer android...,0.7,The candidate has a strong background in data ...,<<RESUME>> hardhika venkatesan data scientist ...,<<RESUME>> hardhika venkatesan data scientist ...,"<<SCORE>> 0.7, <<REPORT>> The candidate has a ...","SCORE 0.8, REPORT The candidate has a strong b...",0.8,The candidate has a strong background in data ...
