e

ym001 · May 14, 2020 · dba716e · dba716e
1 parent 2eef643
commit dba716e
Show file tree

Hide file tree

Showing 27 changed files with 643 additions and 136 deletions.
diff --git a/Exemples/exemple_Classification.py b/Exemples/exemple_Classification.py
@@ -22,6 +22,7 @@
 #  
 #
 from Manteia.Classification import Classification 
+from Manteia.Model import Model 
 
 def main(args):
 	documents = [
@@ -38,10 +39,11 @@ def main(args):
 			]
 
 	labels = [
-			['funny'],['not funny'],['funny'],['not funny'],['funny'],['not funny'],['not funny'],['not funny'],['funny'],['not funny'],
+			'funny','not funny','funny','not funny','funny','not funny','not funny','not funny','funny','not funny'
 			]
 
-	cl=Classification(model_name ='roberta',documents,labels,process=True)
+	model = Model(model_name ='roberta')
+	cl=Classification(model,documents,labels,process_classif=True)
 	print(cl.predict(documents[:2]))
 	return 0
 

diff --git a/Exemples/exemple_Classification1.py b/Exemples/exemple_Classification1.py
@@ -0,0 +1,9 @@
+from Manteia.Classification import Classification 
+from Manteia.Model import Model 
+
+documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.','What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+
+labels = ['funny','not funny']
+
+model = Model(model_name ='roberta')
+cl=Classification(model,documents,labels,process_classif=True)
diff --git a/Exemples/exemple_Classification2.py b/Exemples/exemple_Classification2.py
@@ -0,0 +1,13 @@
+from Manteia.Classification import Classification 
+from Manteia.Preprocess import list_labels 
+from Manteia.Model import Model 
+
+documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.','What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+
+labels = ['funny','not funny']
+
+model = Model(model_name ='roberta')
+cl=Classification(model,documents,labels)
+cl.list_labels     = list_labels(labels)
+cl.process()
+print(cl.predict(documents[:2]))
diff --git a/Exemples/exemple_Classification3.py b/Exemples/exemple_Classification3.py
@@ -0,0 +1,12 @@
+from Manteia.Classification import Classification 
+from Manteia.Preprocess import list_labels 
+
+documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.','What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+
+labels = ['funny','not funny']
+
+cl=Classification(documents_train = documents,labels_train = labels)
+cl.list_labels     = list_labels(labels)
+cl.load_model()
+cl.model.devices()
+print(cl.predict(documents[:2]))
diff --git a/Exemples/exemple_Classification4.py b/Exemples/exemple_Classification4.py
@@ -0,0 +1,13 @@
+from Manteia.Classification import Classification 
+from Manteia.Preprocess import list_labels 
+
+documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.','What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+
+labels = ['funny','not funny']
+
+cl=Classification(documents_train = documents,labels_train = labels)
+cl.list_labels     = list_labels(labels)
+cl.load_model()
+dt_train ,dt_validation=cl.process_text()
+cl.model.configuration(dt_train)
+cl.model.fit(dt_train,dt_validation)
diff --git a/Exemples/exemple_Classification5.py b/Exemples/exemple_Classification5.py
@@ -0,0 +1,11 @@
+from Manteia.Classification import Classification 
+from Manteia.Model import Model 
+
+documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
+			,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+
+labels = ['funny','not funny']
+
+model = Model(model_name ='roberta')
+cl=Classification(model,documents,labels,process_classif=True)
+print(cl.predict(documents[:2]))
diff --git a/Exemples/exemple_Classification_dataset.py b/Exemples/exemple_Classification_dataset.py
@@ -28,12 +28,12 @@
 def main(args):
 
 	ds             = Dataset('20newsgroups')
-	documents      = ds.get_documents()
-	labels         = ds.get_labels()
+	documents      = ds.documents_train
+	labels         = ds.labels_train
 	pp             = Preprocess(documents=documents,labels=labels,nb_sample=500)
 	documents      = pp.documents
 	labels         = pp.labels
-	cl             = Classification(documents=documents,labels=labels)
+	cl             = Classification(documents_train=documents,labels_train=labels)
 	cl.list_labels = pp.list_labels
 
 	cl.load_model()

diff --git a/Exemples/exemple_Dataset.py b/Exemples/exemple_Dataset.py
@@ -1,16 +1,18 @@
 from Manteia.Dataset import Dataset
 
-#ds=Dataset('20newsgroups')
+ds=Dataset('20newsgroups')
 ##ds=Dataset('SST-2')
 ##ds=Dataset('SST-B')
 #ds=Dataset('pubmed_rct20k')
 #ds=Dataset('drugscom')
-##ds=Dataset('yelp')
 #ds=Dataset('trec')
 #ds=Dataset('agnews')
 #ds=Dataset('DBPedia')
 #ds=Dataset('Amazon Review Full')
-ds=Dataset('Amazon Review Polarity')
+#ds=Dataset('Amazon Review Polarity')
+#ds=Dataset('Sogou News')
+#ds=Dataset('Yahoo! Answers')
+#ds=Dataset('Yelp Review Full')
 
 print('Train : ')
 print(ds.documents_train[:5])
@@ -19,4 +21,4 @@
 print(ds.documents_test[:5])
 print(ds.labels_test[:5])
 print('Description : ')
-#print(ds.description)
+print(ds.description)
diff --git a/Exemples/exemple_Dataset1.py b/Exemples/exemple_Dataset1.py
@@ -0,0 +1,7 @@
+from Manteia.Dataset import Dataset
+
+ds=Dataset('DBPedia')
+
+print('Train : ')
+print(ds.documents_train[:5])
+print(ds.labels_train[:5])
diff --git a/Exemples/exemple_Dataset2.py b/Exemples/exemple_Dataset2.py
@@ -0,0 +1,7 @@
+from Manteia.Dataset import Dataset
+
+ds=Dataset('Yahoo! Answers')
+
+print('Test : ')
+print(ds.documents_test[:5])
+print(ds.labels_test[:5])
diff --git a/Exemples/exemple_Dataset3.py b/Exemples/exemple_Dataset3.py
@@ -0,0 +1,8 @@
+from Manteia.Dataset import Dataset
+
+ds=Dataset('pubmed_rct20k')
+
+print('Train : ')
+print(ds.documents_train[:5])
+print(ds.labels_train[:5])
+
diff --git a/Exemples/exemple_Dataset4.py b/Exemples/exemple_Dataset4.py
@@ -0,0 +1,7 @@
+from Manteia.Dataset import Dataset
+
+ds=Dataset('drugscom')
+
+print('Train : ')
+print(ds.documents_train[:5])
+print(ds.labels_train[:5])
diff --git a/Manteia/Classification.py b/Manteia/Classification.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 """
 .. module:: Classification
    :platform: Unix, Windows
@@ -16,7 +18,7 @@
 import datetime
 import gc
 from .Model import *
-from .Preprocess import Preprocess
+from .Preprocess import Preprocess,list_labels
 
 class Classification:
 	r"""
@@ -32,47 +34,126 @@ class Classification:
 				
 			labels (:obj:`float`, optional, defaults to None):
 				A list of labels.
+
 				 
-		Example::
+		Example 1::
+
 		
-			from Manteia.Classification import Classification
-			documents=['a text','text b']
-			labels=['a','b']
-			Classification(documents,labels)
+			from Manteia.Classification import Classification 
+			from Manteia.Model import Model 
+			
+			documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
+			,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+			
+			labels = ['funny','not funny']
+			
+			model = Model(model_name ='roberta')
+			cl=Classification(model,documents,labels,process_classif=True)
 			
-		Attributes:
+			>>>Training complete!
 	"""
-	def __init__(self,documents = [],labels = [],model=None,process=False,verbose=True):
-		self.process   = process
-		self.verbose   = verbose
-		self.model     = model
-		self.documents = documents
-		self.labels    = labels
-		if self.process:
-			if self.verbose:
-				print('Classification process.')
-			pp               = Preprocess(documents=self.documents,labels=self.labels)
-			self.list_labels = pp.list_labels
-			self.documents   = pp.documents
-			self.labels      = pp.labels
-			self.load_model()
-			dt_train ,dt_validation=self.process_text()
-			self.model.configuration(dt_train)
-			self.model.fit(dt_train,dt_validation)
+	def __init__(self,model=None,documents_train = [],labels_train = [],documents_test = [],labels_test = [],process_classif=False,verbose=True):
+
+		self.process_classif = process_classif
+		self.verbose         = verbose
+		self.model           = model
+		self.documents_train = documents_train
+		self.labels_train    = labels_train
+		self.documents_test  = documents_test
+		self.labels_test     = labels_test
+
+		if self.process_classif and self.documents_train!=[] and self.labels_train!=[]:
+
+			self.list_labels     = list_labels(self.labels_train)
+			self.process()
+
 
 	def test(self):
+
 		return "Classification Mantéïa."
+
+
+	def process(self):
+		"""
+		Example 2::
 		
+			from Manteia.Classification import Classification 
+			from Manteia.Preprocess import list_labels 
+			from Manteia.Model import Model 
+			
+			documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
+			,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+			
+			labels = ['funny','not funny']
+			
+			model = Model(model_name ='roberta')
+			cl=Classification(model,documents,labels)
+			cl.list_labels     = list_labels(labels)
+			cl.process()
+			print(cl.predict(documents[:2]))
+			>>>['funny', 'funny']
+		"""
+		self.load_model()
+		dt_train ,dt_validation=self.process_text()
+		self.model.configuration(dt_train)
+		self.model.fit(dt_train,dt_validation)
+		if self.documents_test != []:
+			predictions_test=self.predict(self.documents_test)
+			if self.labels_test !=[]:
+				if self.verbose:
+					print("accuracy : ".format(accuracy(predictions_test, self.labels_test)))
+
 	def load_model(self):
-		if self.model is not None:
-			self.model = model
-		else:
-			self.model = Model(num_labels=len(self.list_labels))
+		"""
+		Example 3::
+		
+			from Manteia.Classification import Classification 
+			from Manteia.Preprocess import list_labels 
+			
+			documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
+			,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+			
+			labels = ['funny','not funny']
+			
+			cl=Classification(documents_train = documents,labels_train = labels)
+			cl.list_labels     = list_labels(labels)
+			cl.load_model()
+			cl.model.devices()
+			print(cl.predict(documents[:2]))
+			>>>['funny', 'funny']
+		"""
+		if self.model is None:
+			self.model = Model()
 		self.model.load_tokenizer()
+		self.model.num_labels=len(self.list_labels)
 		self.model.load_class()
 
+
+
 	def process_text(self):
-		train_text, validation_text, train_labels, validation_labels = train_test_split(self.documents,self.labels, random_state=2018, test_size=0.1)
+		r"""
+		This is the description of the process_text function.
+		
+		Example 4::
+		
+			from Manteia.Classification import Classification 
+			from Manteia.Preprocess import list_labels 
+			
+			documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
+			,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+			
+			labels = ['funny','not funny']
+			
+			cl=Classification(documents_train = documents,labels_train = labels)
+			cl.list_labels     = list_labels(labels)
+			cl.load_model()
+			dt_train ,dt_validation=cl.process_text()
+			cl.model.configuration(dt_train)
+			cl.model.fit(dt_train,dt_validation)
+
+			>>>Training complete!
+		"""
+		train_text, validation_text, train_labels, validation_labels = train_test_split(self.documents_train,self.labels_train, random_state=2018, test_size=0.1)
 
 		train_ids,train_masks           = encode_text(train_text,self.model.tokenizer,self.model.MAX_SEQ_LEN)
 		validation_ids,validation_masks = encode_text(validation_text,self.model.tokenizer,self.model.MAX_SEQ_LEN)
@@ -85,21 +166,29 @@ def process_text(self):
 
 	def predict(self,documents):
 		r"""
-		This is the description of the predict function of the Classification class.
+		This is the description of the predict function.
 		
 		Args:
 		
 			documents (:obj:`list`, optional, defaults to None):
-				A list of documents.
+				A list of documents (str).
 				 
-		Example::
+					 
+		Example 5::
 		
-			from Manteia.Classification import Classification
-			documents=['a text','text b']
-			labels=['a','b']
-			cl = Classification(documents,labels)
-			print(cl.predict(documents[0]))
-
+			from Manteia.Classification import Classification 
+			from Manteia.Model import Model 
+			
+			documents = ['What should you do before criticizing Pac-Man? WAKA WAKA WAKA mile in his shoe.'
+			,'What did Arnold Schwarzenegger say at the abortion clinic? Hasta last vista, baby.',]
+			
+			labels = ['funny','not funny']
+			
+			model = Model(model_name ='roberta')
+			cl=Classification(model,documents,labels,process_classif=True)
+			print(cl.predict(documents[:2]))
+			
+			>>>['funny', 'funny']
 		"""
 		inputs,masks   = encode_text(documents,self.model.tokenizer)
 		predict_inputs = totensors(inputs)