Skip to content

Commit

Permalink
e
Browse files Browse the repository at this point in the history
  • Loading branch information
ym001 committed May 14, 2020
1 parent 2eef643 commit dba716e
Show file tree
Hide file tree
Showing 27 changed files with 643 additions and 136 deletions.
6 changes: 4 additions & 2 deletions Exemples/exemple_Classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#
#
from Manteia.Classification import Classification
from Manteia.Model import Model

def main(args):
documents = [
Expand All @@ -38,10 +39,11 @@ def main(args):
]

labels = [
['funny'],['not funny'],['funny'],['not funny'],['funny'],['not funny'],['not funny'],['not funny'],['funny'],['not funny'],
'funny','not funny','funny','not funny','funny','not funny','not funny','not funny','funny','not funny'
]

cl=Classification(model_name ='roberta',documents,labels,process=True)
model = Model(model_name ='roberta')
cl=Classification(model,documents,labels,process_classif=True)
print(cl.predict(documents[:2]))
return 0

Expand Down
9 changes: 9 additions & 0 deletions Exemples/exemple_Classification1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from Manteia.Classification import Classification
from Manteia.Model import Model

documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.','What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]

labels = ['funny','not funny']

model = Model(model_name ='roberta')
cl=Classification(model,documents,labels,process_classif=True)
13 changes: 13 additions & 0 deletions Exemples/exemple_Classification2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from Manteia.Classification import Classification
from Manteia.Preprocess import list_labels
from Manteia.Model import Model

documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.','What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]

labels = ['funny','not funny']

model = Model(model_name ='roberta')
cl=Classification(model,documents,labels)
cl.list_labels = list_labels(labels)
cl.process()
print(cl.predict(documents[:2]))
12 changes: 12 additions & 0 deletions Exemples/exemple_Classification3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from Manteia.Classification import Classification
from Manteia.Preprocess import list_labels

documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.','What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]

labels = ['funny','not funny']

cl=Classification(documents_train = documents,labels_train = labels)
cl.list_labels = list_labels(labels)
cl.load_model()
cl.model.devices()
print(cl.predict(documents[:2]))
13 changes: 13 additions & 0 deletions Exemples/exemple_Classification4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from Manteia.Classification import Classification
from Manteia.Preprocess import list_labels

documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.','What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]

labels = ['funny','not funny']

cl=Classification(documents_train = documents,labels_train = labels)
cl.list_labels = list_labels(labels)
cl.load_model()
dt_train ,dt_validation=cl.process_text()
cl.model.configuration(dt_train)
cl.model.fit(dt_train,dt_validation)
11 changes: 11 additions & 0 deletions Exemples/exemple_Classification5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from Manteia.Classification import Classification
from Manteia.Model import Model

documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]

labels = ['funny','not funny']

model = Model(model_name ='roberta')
cl=Classification(model,documents,labels,process_classif=True)
print(cl.predict(documents[:2]))
6 changes: 3 additions & 3 deletions Exemples/exemple_Classification_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
def main(args):

ds = Dataset('20newsgroups')
documents = ds.get_documents()
labels = ds.get_labels()
documents = ds.documents_train
labels = ds.labels_train
pp = Preprocess(documents=documents,labels=labels,nb_sample=500)
documents = pp.documents
labels = pp.labels
cl = Classification(documents=documents,labels=labels)
cl = Classification(documents_train=documents,labels_train=labels)
cl.list_labels = pp.list_labels

cl.load_model()
Expand Down
10 changes: 6 additions & 4 deletions Exemples/exemple_Dataset.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from Manteia.Dataset import Dataset

#ds=Dataset('20newsgroups')
ds=Dataset('20newsgroups')
##ds=Dataset('SST-2')
##ds=Dataset('SST-B')
#ds=Dataset('pubmed_rct20k')
#ds=Dataset('drugscom')
##ds=Dataset('yelp')
#ds=Dataset('trec')
#ds=Dataset('agnews')
#ds=Dataset('DBPedia')
#ds=Dataset('Amazon Review Full')
ds=Dataset('Amazon Review Polarity')
#ds=Dataset('Amazon Review Polarity')
#ds=Dataset('Sogou News')
#ds=Dataset('Yahoo! Answers')
#ds=Dataset('Yelp Review Full')

print('Train : ')
print(ds.documents_train[:5])
Expand All @@ -19,4 +21,4 @@
print(ds.documents_test[:5])
print(ds.labels_test[:5])
print('Description : ')
#print(ds.description)
print(ds.description)
7 changes: 7 additions & 0 deletions Exemples/exemple_Dataset1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from Manteia.Dataset import Dataset

ds=Dataset('DBPedia')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])
7 changes: 7 additions & 0 deletions Exemples/exemple_Dataset2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from Manteia.Dataset import Dataset

ds=Dataset('Yahoo! Answers')

print('Test : ')
print(ds.documents_test[:5])
print(ds.labels_test[:5])
8 changes: 8 additions & 0 deletions Exemples/exemple_Dataset3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from Manteia.Dataset import Dataset

ds=Dataset('pubmed_rct20k')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])

7 changes: 7 additions & 0 deletions Exemples/exemple_Dataset4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from Manteia.Dataset import Dataset

ds=Dataset('drugscom')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])
165 changes: 127 additions & 38 deletions Manteia/Classification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
.. module:: Classification
:platform: Unix, Windows
Expand All @@ -16,7 +18,7 @@
import datetime
import gc
from .Model import *
from .Preprocess import Preprocess
from .Preprocess import Preprocess,list_labels

class Classification:
r"""
Expand All @@ -32,47 +34,126 @@ class Classification:
labels (:obj:`float`, optional, defaults to None):
A list of labels.
Example::
Example 1::
from Manteia.Classification import Classification
documents=['a text','text b']
labels=['a','b']
Classification(documents,labels)
from Manteia.Classification import Classification
from Manteia.Model import Model
documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
labels = ['funny','not funny']
model = Model(model_name ='roberta')
cl=Classification(model,documents,labels,process_classif=True)
Attributes:
>>>Training complete!
"""
def __init__(self,documents = [],labels = [],model=None,process=False,verbose=True):
self.process = process
self.verbose = verbose
self.model = model
self.documents = documents
self.labels = labels
if self.process:
if self.verbose:
print('Classification process.')
pp = Preprocess(documents=self.documents,labels=self.labels)
self.list_labels = pp.list_labels
self.documents = pp.documents
self.labels = pp.labels
self.load_model()
dt_train ,dt_validation=self.process_text()
self.model.configuration(dt_train)
self.model.fit(dt_train,dt_validation)
def __init__(self,model=None,documents_train = [],labels_train = [],documents_test = [],labels_test = [],process_classif=False,verbose=True):

self.process_classif = process_classif
self.verbose = verbose
self.model = model
self.documents_train = documents_train
self.labels_train = labels_train
self.documents_test = documents_test
self.labels_test = labels_test

if self.process_classif and self.documents_train!=[] and self.labels_train!=[]:

self.list_labels = list_labels(self.labels_train)
self.process()


def test(self):

return "Classification Mantéïa."


def process(self):
"""
Example 2::
from Manteia.Classification import Classification
from Manteia.Preprocess import list_labels
from Manteia.Model import Model
documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
labels = ['funny','not funny']
model = Model(model_name ='roberta')
cl=Classification(model,documents,labels)
cl.list_labels = list_labels(labels)
cl.process()
print(cl.predict(documents[:2]))
>>>['funny', 'funny']
"""
self.load_model()
dt_train ,dt_validation=self.process_text()
self.model.configuration(dt_train)
self.model.fit(dt_train,dt_validation)
if self.documents_test != []:
predictions_test=self.predict(self.documents_test)
if self.labels_test !=[]:
if self.verbose:
print("accuracy : ".format(accuracy(predictions_test, self.labels_test)))

def load_model(self):
if self.model is not None:
self.model = model
else:
self.model = Model(num_labels=len(self.list_labels))
"""
Example 3::
from Manteia.Classification import Classification
from Manteia.Preprocess import list_labels
documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
labels = ['funny','not funny']
cl=Classification(documents_train = documents,labels_train = labels)
cl.list_labels = list_labels(labels)
cl.load_model()
cl.model.devices()
print(cl.predict(documents[:2]))
>>>['funny', 'funny']
"""
if self.model is None:
self.model = Model()
self.model.load_tokenizer()
self.model.num_labels=len(self.list_labels)
self.model.load_class()



def process_text(self):
train_text, validation_text, train_labels, validation_labels = train_test_split(self.documents,self.labels, random_state=2018, test_size=0.1)
r"""
This is the description of the process_text function.
Example 4::
from Manteia.Classification import Classification
from Manteia.Preprocess import list_labels
documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
labels = ['funny','not funny']
cl=Classification(documents_train = documents,labels_train = labels)
cl.list_labels = list_labels(labels)
cl.load_model()
dt_train ,dt_validation=cl.process_text()
cl.model.configuration(dt_train)
cl.model.fit(dt_train,dt_validation)
>>>Training complete!
"""
train_text, validation_text, train_labels, validation_labels = train_test_split(self.documents_train,self.labels_train, random_state=2018, test_size=0.1)

train_ids,train_masks = encode_text(train_text,self.model.tokenizer,self.model.MAX_SEQ_LEN)
validation_ids,validation_masks = encode_text(validation_text,self.model.tokenizer,self.model.MAX_SEQ_LEN)
Expand All @@ -85,21 +166,29 @@ def process_text(self):

def predict(self,documents):
r"""
This is the description of the predict function of the Classification class.
This is the description of the predict function.
Args:
documents (:obj:`list`, optional, defaults to None):
A list of documents.
A list of documents (str).
Example::
Example 5::
from Manteia.Classification import Classification
documents=['a text','text b']
labels=['a','b']
cl = Classification(documents,labels)
print(cl.predict(documents[0]))
from Manteia.Classification import Classification
from Manteia.Model import Model
documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
labels = ['funny','not funny']
model = Model(model_name ='roberta')
cl=Classification(model,documents,labels,process_classif=True)
print(cl.predict(documents[:2]))
>>>['funny', 'funny']
"""
inputs,masks = encode_text(documents,self.model.tokenizer)
predict_inputs = totensors(inputs)
Expand Down

0 comments on commit dba716e

Please sign in to comment.