In [4]:
from huggingface_hub.hf_api import HfFolder
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer
import transformers
import torch

In [5]:
from huggingface_hub import login

login()
model = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /Users/ge35zur/.cache/huggingface/token
Login successful


In [6]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_length=50,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [21]:
from langchain import PromptTemplate,  LLMChain

template = """Classify whether the input text is sexist or not sexist. Reply only with either one of the two following phrases: sexist, not sexist.

Text: {text}
Label:"""

prompt = PromptTemplate(template=template, input_variables=["text"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

In [35]:
def classify(text):
    raw_llm_answer = llm_chain.run(text)
    llm_answer = raw_llm_answer.lower()
    if "not sexist" in llm_answer:
        return "not sexist"
    elif "sexist" in llm_answer:
        return "sexist"
    else:
        return 0

In [23]:
classify("women are stupid")

1

In [59]:
import langchain as lc
from datasets import load_dataset

dataset = load_dataset("tum-nlp/sexism-socialmedia-balanced", split="train")
data = dataset.to_pandas()
data

Unnamed: 0,text,label_sexist
0,I feel the same way Abortion's only 4 rape inc...,not sexist
1,What subreddits do Chads frequent? How do wome...,not sexist
2,"I'm a blackpilled normie, getting laid doesn't...",not sexist
3,Facebook instructing Gaza Muslims to bring gun...,not sexist
4,"Yup, I remember when she said that. It amazes ...",not sexist
...,...,...
20103,When a set of twins male or female secretly ch...,sexist
20104,suck my massive dick.,sexist
20105,one who fingers pussy,sexist
20106,A lesbian sex partner,sexist


In [60]:
data = data.sample(500)

In [61]:
data['label_sexist'].value_counts()['not sexist']

244

In [62]:
import swifter

data['label_llama'] = data['text'].apply(lambda x: llm_chain.run(x))
data



Unnamed: 0,text,label_sexist,label_llama
3500,Charlie Kirk says not to give money to univers...,not sexist,sex
5298,"WOW, I hate to think that could be true, but s...",not sexist,sex
12667,She's 40.........her kids days are numbered. S...,sexist,Not
10869,Yeah man that's all true and you know as well ...,sexist,sex
18752,Bitch idc when Ima see you hoe,sexist,sex
...,...,...,...
6957,A man is no good to anyone if he can't do the ...,not sexist,sex
7050,Frankenstein said she only had 5 mins then had...,not sexist,Not
7413,"Touche,I give you the day mi'lady. :)",not sexist,sex
19657,"Women are weak, they need men's strength to ac...",sexist,sex


In [95]:
import pandas as pd
data = pd.read_csv("llama.csv")

In [96]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

tokenizer = AutoTokenizer.from_pretrained('tum-nlp/bertweet-sexism')
model = AutoModelForSequenceClassification.from_pretrained('tum-nlp/bertweet-sexism')

tum_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)


In [97]:
data['label_tum'] = [tum_classifier(x) for x in data['text'].values]
data

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,text,label_sexist,label_llama,label_tum
0,0,3500,Charlie Kirk says not to give money to univers...,not sexist,sexist,"[{'label': 'LABEL_0', 'score': 0.9999786615371..."
1,1,5298,"WOW, I hate to think that could be true, but s...",not sexist,sexist,"[{'label': 'LABEL_0', 'score': 0.9999755620956..."
2,2,12667,She's 40.........her kids days are numbered. S...,sexist,not sexist,"[{'label': 'LABEL_1', 'score': 0.9999326467514..."
3,3,10869,Yeah man that's all true and you know as well ...,sexist,sexist,"[{'label': 'LABEL_1', 'score': 0.9996775388717..."
4,4,18752,Bitch idc when Ima see you hoe,sexist,sexist,"[{'label': 'LABEL_1', 'score': 0.9999403953552..."
...,...,...,...,...,...,...
494,495,6957,A man is no good to anyone if he can't do the ...,not sexist,sexist,"[{'label': 'LABEL_0', 'score': 0.9999579191207..."
495,496,7050,Frankenstein said she only had 5 mins then had...,not sexist,not sexist,"[{'label': 'LABEL_0', 'score': 0.9999728202819..."
496,497,7413,"Touche,I give you the day mi'lady. :)",not sexist,sexist,"[{'label': 'LABEL_0', 'score': 0.9999377727508..."
497,498,19657,"Women are weak, they need men's strength to ac...",sexist,sexist,"[{'label': 'LABEL_1', 'score': 0.9999158382415..."


In [106]:
data = data = pd.read_csv("tum.csv")
data

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,text,label_sexist,label_llama,label_tum
0,0,0,3500,Charlie Kirk says not to give money to univers...,not sexist,sexist,"[{'label': 'LABEL_0', 'score': 0.9999786615371..."
1,1,1,5298,"WOW, I hate to think that could be true, but s...",not sexist,sexist,"[{'label': 'LABEL_0', 'score': 0.9999755620956..."
2,2,2,12667,She's 40.........her kids days are numbered. S...,sexist,not sexist,"[{'label': 'LABEL_1', 'score': 0.9999326467514..."
3,3,3,10869,Yeah man that's all true and you know as well ...,sexist,sexist,"[{'label': 'LABEL_1', 'score': 0.9996775388717..."
4,4,4,18752,Bitch idc when Ima see you hoe,sexist,sexist,"[{'label': 'LABEL_1', 'score': 0.9999403953552..."
...,...,...,...,...,...,...,...
494,494,495,6957,A man is no good to anyone if he can't do the ...,not sexist,sexist,"[{'label': 'LABEL_0', 'score': 0.9999579191207..."
495,495,496,7050,Frankenstein said she only had 5 mins then had...,not sexist,not sexist,"[{'label': 'LABEL_0', 'score': 0.9999728202819..."
496,496,497,7413,"Touche,I give you the day mi'lady. :)",not sexist,sexist,"[{'label': 'LABEL_0', 'score': 0.9999377727508..."
497,497,498,19657,"Women are weak, they need men's strength to ac...",sexist,sexist,"[{'label': 'LABEL_1', 'score': 0.9999158382415..."


In [107]:
data['label_tum'] = data['label_tum'].apply(lambda x: 'sexist' if 'LABEL_1' in x else 'not sexist')
data

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,text,label_sexist,label_llama,label_tum
0,0,0,3500,Charlie Kirk says not to give money to univers...,not sexist,sexist,not sexist
1,1,1,5298,"WOW, I hate to think that could be true, but s...",not sexist,sexist,not sexist
2,2,2,12667,She's 40.........her kids days are numbered. S...,sexist,not sexist,sexist
3,3,3,10869,Yeah man that's all true and you know as well ...,sexist,sexist,sexist
4,4,4,18752,Bitch idc when Ima see you hoe,sexist,sexist,sexist
...,...,...,...,...,...,...,...
494,494,495,6957,A man is no good to anyone if he can't do the ...,not sexist,sexist,not sexist
495,495,496,7050,Frankenstein said she only had 5 mins then had...,not sexist,not sexist,not sexist
496,496,497,7413,"Touche,I give you the day mi'lady. :)",not sexist,sexist,not sexist
497,497,498,19657,"Women are weak, they need men's strength to ac...",sexist,sexist,sexist


In [109]:
tokenizer = AutoTokenizer.from_pretrained('NLP-LTU/bertweet-large-sexism-detector')
model = AutoModelForSequenceClassification.from_pretrained('NLP-LTU/bertweet-large-sexism-detector')

ltu_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

tokenizer_config.json:   0%|          | 0.00/361 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/909 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

In [110]:
data['label_ltu'] = [ltu_classifier(x) for x in data['text'].values]
data

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,text,label_sexist,label_llama,label_tum,label_ltu
0,0,0,3500,Charlie Kirk says not to give money to univers...,not sexist,sexist,not sexist,"[{'label': 'not sexist', 'score': 0.9991031885..."
1,1,1,5298,"WOW, I hate to think that could be true, but s...",not sexist,sexist,not sexist,"[{'label': 'not sexist', 'score': 0.9994339346..."
2,2,2,12667,She's 40.........her kids days are numbered. S...,sexist,not sexist,sexist,"[{'label': 'sexist', 'score': 0.86143362522125..."
3,3,3,10869,Yeah man that's all true and you know as well ...,sexist,sexist,sexist,"[{'label': 'not sexist', 'score': 0.6498970389..."
4,4,4,18752,Bitch idc when Ima see you hoe,sexist,sexist,sexist,"[{'label': 'sexist', 'score': 0.99812632799148..."
...,...,...,...,...,...,...,...,...
494,494,495,6957,A man is no good to anyone if he can't do the ...,not sexist,sexist,not sexist,"[{'label': 'not sexist', 'score': 0.9904353618..."
495,495,496,7050,Frankenstein said she only had 5 mins then had...,not sexist,not sexist,not sexist,"[{'label': 'not sexist', 'score': 0.9993417859..."
496,496,497,7413,"Touche,I give you the day mi'lady. :)",not sexist,sexist,not sexist,"[{'label': 'not sexist', 'score': 0.9990948438..."
497,497,498,19657,"Women are weak, they need men's strength to ac...",sexist,sexist,sexist,"[{'label': 'sexist', 'score': 0.99905937910079..."


In [111]:
data.to_csv("ltu.csv")

In [113]:
data = pd.read_csv("ltu.csv")
data['label_ltu'] = data['label_ltu'].apply(lambda x: 'not sexist' if 'not sexist' in x else 'sexist')
data

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,text,label_sexist,label_llama,label_tum,label_ltu
0,0,0,0,3500,Charlie Kirk says not to give money to univers...,not sexist,sexist,not sexist,not sexist
1,1,1,1,5298,"WOW, I hate to think that could be true, but s...",not sexist,sexist,not sexist,not sexist
2,2,2,2,12667,She's 40.........her kids days are numbered. S...,sexist,not sexist,sexist,sexist
3,3,3,3,10869,Yeah man that's all true and you know as well ...,sexist,sexist,sexist,not sexist
4,4,4,4,18752,Bitch idc when Ima see you hoe,sexist,sexist,sexist,sexist
...,...,...,...,...,...,...,...,...,...
494,494,494,495,6957,A man is no good to anyone if he can't do the ...,not sexist,sexist,not sexist,not sexist
495,495,495,496,7050,Frankenstein said she only had 5 mins then had...,not sexist,not sexist,not sexist,not sexist
496,496,496,497,7413,"Touche,I give you the day mi'lady. :)",not sexist,sexist,not sexist,not sexist
497,497,497,498,19657,"Women are weak, they need men's strength to ac...",sexist,sexist,sexist,sexist


In [114]:
data.to_csv("labelled.csv")

In [130]:
import langchain as lc
from datasets import load_dataset

dataset = load_dataset("lidiapierre/fr_sexism_labelled", split="train")
lidia = dataset.to_pandas()
lidia

Unnamed: 0.1,Unnamed: 0,Sentences,Label,fr_sentences
0,0,"For a woman, that is good",1,"Pour une femme, c'est bien."
1,1,Just dress sexy and you will sign any contract.,1,Habillez-vous sexy et vous signerez n'importe ...
2,2,Are you having period ?,1,Avez-vous des règles ?
3,3,Stop being bitchy,1,Arrête d'être garce.
4,4,I leave early because of her children instead ...,1,Je pars tôt à cause de ses enfants au lieu de ...
...,...,...,...,...
1132,1132,Opportunities are like night owls. They like t...,0,Les opportunités sont comme les chouettes de n...
1133,1133,It's not just a question of doing what you lov...,0,Il ne s'agit pas seulement de faire ce que vou...
1134,1134,A genius does what he masters. An ordinary man...,0,"Un génie fait ce qu'il maîtrise, un homme ordi..."
1135,1135,If you want to know the real reasons behind th...,0,Si vous voulez connaître les vraies raisons qu...


In [131]:
lidia = lidia.drop('fr_sentences', axis=1)
lidia

Unnamed: 0.1,Unnamed: 0,Sentences,Label
0,0,"For a woman, that is good",1
1,1,Just dress sexy and you will sign any contract.,1
2,2,Are you having period ?,1
3,3,Stop being bitchy,1
4,4,I leave early because of her children instead ...,1
...,...,...,...
1132,1132,Opportunities are like night owls. They like t...,0
1133,1133,It's not just a question of doing what you lov...,0
1134,1134,A genius does what he masters. An ordinary man...,0
1135,1135,If you want to know the real reasons behind th...,0


In [132]:
lidia = lidia.sample(499)
lidia['Label'].value_counts()


Label
1    252
0    247
Name: count, dtype: int64

In [133]:
lidia['label_llama'] = lidia['Sentences'].apply(lambda x: llm_chain.run(x))
lidia



Unnamed: 0.1,Unnamed: 0,Sentences,Label,label_llama
878,878,Writing is a worthwhile work.,0,sexist
494,494,Six foot two inches IS TOO TALL for a girl. Wow.,1,sex
821,821,Remind me why are women allowed opinions again?,1,sex
449,449,I don't think men and women are equal.,1,sex
639,639,But Men are superior to women.,1,sexist
...,...,...,...,...
96,96,Women and men can be promoted if they work well.,0,sex
237,237,"As a man, I would have worded that sentence tw...",1,sex
1034,1034,"If you have no job, help is needed.",0,sex
223,223,We can't be here without the participation of ...,0,sex


In [142]:
lidia.to_csv("lidiat.csv")

In [140]:
lidia = pd.read_csv("lidia.csv")
lidia


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sentences,Label,label_llama
0,878,878,Writing is a worthwhile work.,0,sexist
1,494,494,Six foot two inches IS TOO TALL for a girl. Wow.,1,sex
2,821,821,Remind me why are women allowed opinions again?,1,sex
3,449,449,I don't think men and women are equal.,1,sex
4,639,639,But Men are superior to women.,1,sexist
...,...,...,...,...,...
494,96,96,Women and men can be promoted if they work well.,0,sex
495,237,237,"As a man, I would have worded that sentence tw...",1,sex
496,1034,1034,"If you have no job, help is needed.",0,sex
497,223,223,We can't be here without the participation of ...,0,sex


In [141]:
lidia['label_llama'] = lidia['label_llama'].apply(lambda x: '1' if 'sex' in x else '0')
lidia

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sentences,Label,label_llama
0,878,878,Writing is a worthwhile work.,0,1
1,494,494,Six foot two inches IS TOO TALL for a girl. Wow.,1,1
2,821,821,Remind me why are women allowed opinions again?,1,1
3,449,449,I don't think men and women are equal.,1,1
4,639,639,But Men are superior to women.,1,1
...,...,...,...,...,...
494,96,96,Women and men can be promoted if they work well.,0,1
495,237,237,"As a man, I would have worded that sentence tw...",1,1
496,1034,1034,"If you have no job, help is needed.",0,1
497,223,223,We can't be here without the participation of ...,0,1


In [143]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

tokenizer = AutoTokenizer.from_pretrained('tum-nlp/bertweet-sexism')
model = AutoModelForSequenceClassification.from_pretrained('tum-nlp/bertweet-sexism')

tum_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)


In [146]:
lidia['label_tum'] = [tum_classifier(x) for x in lidia['Sentences'].values]
lidia

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sentences,Label,label_llama,label_tum
0,878,878,Writing is a worthwhile work.,0,1,"[{'label': 'LABEL_0', 'score': 0.9999433755874..."
1,494,494,Six foot two inches IS TOO TALL for a girl. Wow.,1,1,"[{'label': 'LABEL_0', 'score': 0.9999065399169..."
2,821,821,Remind me why are women allowed opinions again?,1,1,"[{'label': 'LABEL_1', 'score': 0.9998551607131..."
3,449,449,I don't think men and women are equal.,1,1,"[{'label': 'LABEL_1', 'score': 0.9998513460159..."
4,639,639,But Men are superior to women.,1,1,"[{'label': 'LABEL_1', 'score': 0.9998426437377..."
...,...,...,...,...,...,...
494,96,96,Women and men can be promoted if they work well.,0,1,"[{'label': 'LABEL_0', 'score': 0.9998352527618..."
495,237,237,"As a man, I would have worded that sentence tw...",1,1,"[{'label': 'LABEL_0', 'score': 0.9999583959579..."
496,1034,1034,"If you have no job, help is needed.",0,1,"[{'label': 'LABEL_0', 'score': 0.9999498128890..."
497,223,223,We can't be here without the participation of ...,0,1,"[{'label': 'LABEL_0', 'score': 0.9999624490737..."


In [147]:
lidia.to_csv("lidia_tum.csv")

In [149]:
lidia = pd.read_csv("lidia_tum.csv")

In [150]:
lidia['label_tumm'] = lidia['label_tum'].apply(lambda x: '0' if 'LABEL_0' in x else '1')
lidia

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sentences,Label,label_llama,label_tum,label_tumm
0,0,878,878,Writing is a worthwhile work.,0,1,"[{'label': 'LABEL_0', 'score': 0.9999433755874...",0
1,1,494,494,Six foot two inches IS TOO TALL for a girl. Wow.,1,1,"[{'label': 'LABEL_0', 'score': 0.9999065399169...",0
2,2,821,821,Remind me why are women allowed opinions again?,1,1,"[{'label': 'LABEL_1', 'score': 0.9998551607131...",1
3,3,449,449,I don't think men and women are equal.,1,1,"[{'label': 'LABEL_1', 'score': 0.9998513460159...",1
4,4,639,639,But Men are superior to women.,1,1,"[{'label': 'LABEL_1', 'score': 0.9998426437377...",1
...,...,...,...,...,...,...,...,...
494,494,96,96,Women and men can be promoted if they work well.,0,1,"[{'label': 'LABEL_0', 'score': 0.9998352527618...",0
495,495,237,237,"As a man, I would have worded that sentence tw...",1,1,"[{'label': 'LABEL_0', 'score': 0.9999583959579...",0
496,496,1034,1034,"If you have no job, help is needed.",0,1,"[{'label': 'LABEL_0', 'score': 0.9999498128890...",0
497,497,223,223,We can't be here without the participation of ...,0,1,"[{'label': 'LABEL_0', 'score': 0.9999624490737...",0


In [152]:
lidia = lidia.drop('label_tum', axis=1)
lidia

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sentences,Label,label_llama,label_tumm
0,0,878,878,Writing is a worthwhile work.,0,1,0
1,1,494,494,Six foot two inches IS TOO TALL for a girl. Wow.,1,1,0
2,2,821,821,Remind me why are women allowed opinions again?,1,1,1
3,3,449,449,I don't think men and women are equal.,1,1,1
4,4,639,639,But Men are superior to women.,1,1,1
...,...,...,...,...,...,...,...
494,494,96,96,Women and men can be promoted if they work well.,0,1,0
495,495,237,237,"As a man, I would have worded that sentence tw...",1,1,0
496,496,1034,1034,"If you have no job, help is needed.",0,1,0
497,497,223,223,We can't be here without the participation of ...,0,1,0


In [153]:
tokenizer = AutoTokenizer.from_pretrained('NLP-LTU/bertweet-large-sexism-detector')
model = AutoModelForSequenceClassification.from_pretrained('NLP-LTU/bertweet-large-sexism-detector')

ltu_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

In [154]:
lidia['label_ltu'] = [ltu_classifier(x) for x in lidia['Sentences'].values]
lidia

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sentences,Label,label_llama,label_tumm,label_ltu
0,0,878,878,Writing is a worthwhile work.,0,1,0,"[{'label': 'not sexist', 'score': 0.9988302588..."
1,1,494,494,Six foot two inches IS TOO TALL for a girl. Wow.,1,1,0,"[{'label': 'not sexist', 'score': 0.9990580677..."
2,2,821,821,Remind me why are women allowed opinions again?,1,1,1,"[{'label': 'sexist', 'score': 0.99629020690917..."
3,3,449,449,I don't think men and women are equal.,1,1,1,"[{'label': 'sexist', 'score': 0.84566944837570..."
4,4,639,639,But Men are superior to women.,1,1,1,"[{'label': 'sexist', 'score': 0.87595748901367..."
...,...,...,...,...,...,...,...,...
494,494,96,96,Women and men can be promoted if they work well.,0,1,0,"[{'label': 'not sexist', 'score': 0.9949151277..."
495,495,237,237,"As a man, I would have worded that sentence tw...",1,1,0,"[{'label': 'not sexist', 'score': 0.8822756409..."
496,496,1034,1034,"If you have no job, help is needed.",0,1,0,"[{'label': 'not sexist', 'score': 0.9993721842..."
497,497,223,223,We can't be here without the participation of ...,0,1,0,"[{'label': 'not sexist', 'score': 0.9987712502..."


In [155]:
lidia.to_csv("lidia_ltu.csv")

In [157]:
lidia = pd.read_csv("lidia_ltu.csv")
lidia['label_ltu'] = lidia['label_ltu'].apply(lambda x: '0' if 'not sexist' in x else '1')
lidia

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sentences,Label,label_llama,label_tumm,label_ltu
0,0,0,878,878,Writing is a worthwhile work.,0,1,0,0
1,1,1,494,494,Six foot two inches IS TOO TALL for a girl. Wow.,1,1,0,0
2,2,2,821,821,Remind me why are women allowed opinions again?,1,1,1,1
3,3,3,449,449,I don't think men and women are equal.,1,1,1,1
4,4,4,639,639,But Men are superior to women.,1,1,1,1
...,...,...,...,...,...,...,...,...,...
494,494,494,96,96,Women and men can be promoted if they work well.,0,1,0,0
495,495,495,237,237,"As a man, I would have worded that sentence tw...",1,1,0,0
496,496,496,1034,1034,"If you have no job, help is needed.",0,1,0,0
497,497,497,223,223,We can't be here without the participation of ...,0,1,0,0


In [158]:
lidia.to_csv("lidia_labelled.csv")

In [166]:
data

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,text,label_sexist,label_llama,label_tum,label_ltu
0,0,0,0,3500,Charlie Kirk says not to give money to univers...,not sexist,sexist,not sexist,not sexist
1,1,1,1,5298,"WOW, I hate to think that could be true, but s...",not sexist,sexist,not sexist,not sexist
2,2,2,2,12667,She's 40.........her kids days are numbered. S...,sexist,not sexist,sexist,sexist
3,3,3,3,10869,Yeah man that's all true and you know as well ...,sexist,sexist,sexist,not sexist
4,4,4,4,18752,Bitch idc when Ima see you hoe,sexist,sexist,sexist,sexist
...,...,...,...,...,...,...,...,...,...
494,494,494,495,6957,A man is no good to anyone if he can't do the ...,not sexist,sexist,not sexist,not sexist
495,495,495,496,7050,Frankenstein said she only had 5 mins then had...,not sexist,not sexist,not sexist,not sexist
496,496,496,497,7413,"Touche,I give you the day mi'lady. :)",not sexist,sexist,not sexist,not sexist
497,497,497,498,19657,"Women are weak, they need men's strength to ac...",sexist,sexist,sexist,sexist


In [3]:
from sklearn.metrics import classification_report
import pandas as pd

lidia = pd.read_csv("lidia_labelled.csv")
data = pd.read_csv("labelled.csv")

def change(report, x):
    l = pd.DataFrame(report).transpose()
    l.to_csv(x)

print("Workplace dataset with Llama")


Workplace dataset with Llama


In [4]:

lidia_llama = classification_report(y_true=lidia['Label'], y_pred=lidia['label_llama'], output_dict=True)
lidia_llama = pd.DataFrame(lidia_llama).transpose()
lidia_llama.to_csv("r_lidia_llama.csv")

change(classification_report(y_true=lidia['Label'], y_pred=lidia['label_tumm'], output_dict=True), "r_lidia_tum.csv")
change(classification_report(y_true=lidia['Label'], y_pred=lidia['label_ltu'], output_dict=True), "r_lidia_ltu.csv")
change(classification_report(y_true=data['label_sexist'], y_pred=data['label_llama'], output_dict=True), "r_tum_llama.csv")
change(classification_report(y_true=data['label_sexist'], y_pred=data['label_tum'], output_dict=True), "r_tum_tum.csv")
change(classification_report(y_true=data['label_sexist'], y_pred=data['label_ltu'], output_dict=True), "r_tum_ltu.csv")


In [5]:
print("Workplace dataset with tum model")
lidia_tum = classification_report(y_true=lidia['Label'], y_pred=lidia['label_tumm'])
print("Workplace dataset with ltu model")
lidia_ltu =classification_report(y_true=lidia['Label'], y_pred=lidia['label_ltu'].astype(lidia['Label'].dtype))


print("tum dataset with Llama")
print(classification_report(y_true=data['label_sexist'], y_pred=data['label_llama']))
print("tum dataset with tum model")
print(classification_report(y_true=data['label_sexist'], y_pred=data['label_tum']))
print("tum dataset with ltu model")
print(classification_report(y_true=data['label_sexist'], y_pred=data['label_ltu'].astype(data['label_sexist'].dtype)))

Workplace dataset with tum model
Workplace dataset with ltu model
tum dataset with Llama
              precision    recall  f1-score   support

  not sexist       0.60      0.38      0.46       244
      sexist       0.56      0.76      0.64       255

    accuracy                           0.57       499
   macro avg       0.58      0.57      0.55       499
weighted avg       0.58      0.57      0.55       499

tum dataset with tum model
              precision    recall  f1-score   support

  not sexist       0.83      0.99      0.90       244
      sexist       0.99      0.80      0.89       255

    accuracy                           0.89       499
   macro avg       0.91      0.90      0.89       499
weighted avg       0.91      0.89      0.89       499

tum dataset with ltu model
              precision    recall  f1-score   support

  not sexist       0.80      1.00      0.89       244
      sexist       1.00      0.76      0.86       255

    accuracy                           

In [3]:
from huggingface_hub import login
from huggingface_hub.hf_api import HfFolder
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer
import transformers
import torch

model = "meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_length=100,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

llm_three = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})
from langchain import PromptTemplate,  LLMChain

template_three = """Label whether the input text is sexist or not sexist. Reply only with either one of the two following phrases: 'sexist' or 'not sexist'.

Examples:
Text: men are smarter than women
Label: 'sexist'

Text: women are smarter than men
Label: 'sexist'

Text: lock her up
Label: 'not sexist'

Text: {text}
Label:"""

prompt_three = PromptTemplate(template=template_three, input_variables=["text"])

llm_chain_three = LLMChain(prompt=prompt_three, llm=llm_three)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /Users/ge35zur/.cache/huggingface/token
Login successful


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
llm_chain_three.run("men are better than women")



" '"