jh

ym001 · May 5, 2020 · 4151327 · 4151327
1 parent 4fb4ce7
commit 4151327
Show file tree

Hide file tree

Showing 21 changed files with 1,280 additions and 1 deletion.
diff --git a/Manteia.egg-info/PKG-INFO b/Manteia.egg-info/PKG-INFO
@@ -0,0 +1,37 @@
+Metadata-Version: 1.1
+Name: Manteia
+Version: 0.0.10
+Summary: deep learning,NLP,classification,text,bert,distilbert,albert,xlnet,roberta,gpt2
+Home-page: https://github.com/ym001/Manteia
+Author: Yves Mercadier
+Author-email: manteia.ym001@gmail.com
+License: UNKNOWN
+Description: Manteia - proclaim the good word
+        ================================================================
+
+        This module proclaims the good word. May they
+        regain total freedom of artificial thought towards a new age
+        reminiscent.
+
+        You can install it with pip:
+
+             pip install Manteia
+
+        Example of use:
+
+             >>> from Manteia.Classification import Classification
+             >>> # Initializing a list of texts,labels
+             >>> documents=['a text','text b']
+             >>> labels=['a','b']
+             >>> Classification(documents,labels)
+
+        This code is licensed under MIT.
+
+Platform: UNKNOWN
+Classifier: Programming Language :: Python
+Classifier: Development Status :: 1 - Planning
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Natural Language :: English
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Topic :: Communications
diff --git a/Manteia.egg-info/SOURCES.txt b/Manteia.egg-info/SOURCES.txt
@@ -0,0 +1,18 @@
+MANIFEST.in
+README.md
+setup.cfg
+setup.py
+Manteia/Classification.py
+Manteia/Generation.py
+Manteia/Model.py
+Manteia/Preprocess.py
+Manteia/Statistic.py
+Manteia/Task.py
+Manteia/Visualisation.py
+Manteia/__init__.py
+Manteia.egg-info/PKG-INFO
+Manteia.egg-info/SOURCES.txt
+Manteia.egg-info/dependency_links.txt
+Manteia.egg-info/entry_points.txt
+Manteia.egg-info/requires.txt
+Manteia.egg-info/top_level.txt
diff --git a/Manteia.egg-info/dependency_links.txt b/Manteia.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/Manteia.egg-info/entry_points.txt b/Manteia.egg-info/entry_points.txt
@@ -0,0 +1,5 @@
+[console_scripts]
+Manteia-classification = Manteia.Manteia:makeClassification
+Manteia-data = Manteia.Manteia:readData
+Manteia-test = Manteia.Manteia:testManteia
+
diff --git a/Manteia.egg-info/requires.txt b/Manteia.egg-info/requires.txt
@@ -0,0 +1,7 @@
+matplotlib==3.2.1
+transformers==2.8.0
+pandas==1.0.3
+torch==1.5.0
+nltk==3.4.5
+numpy==1.18.1
+scikit_learn==0.22.2.post1
diff --git a/Manteia.egg-info/top_level.txt b/Manteia.egg-info/top_level.txt
@@ -0,0 +1 @@
+Manteia
diff --git a/Manteia/Generation.pyc b/Manteia/Generation.pyc
diff --git a/Manteia/__pycache__/Classification.cpython-37.pyc b/Manteia/__pycache__/Classification.cpython-37.pyc
diff --git a/Manteia/__pycache__/Model.cpython-37.pyc b/Manteia/__pycache__/Model.cpython-37.pyc
diff --git a/Manteia/__pycache__/__init__.cpython-37.pyc b/Manteia/__pycache__/__init__.cpython-37.pyc
diff --git a/build/lib/Manteia/Classification.py b/build/lib/Manteia/Classification.py
@@ -0,0 +1,70 @@
+
+import numpy as np
+import random
+import pandas as pd
+import sklearn
+from sklearn.model_selection import train_test_split,KFold
+import time
+import datetime
+import gc
+############
+from .Model import *
+from .Preprocess import Preprocess
+
+class Classification:
+	r"""
+		This is the class to classify text in categorie a NLP task.
+		
+		Args:
+			model_name (:obj:`string`, optional, defaults to  'bert'):
+				give the name of a model.
+			documents (:obj:`list`, optional, defaults to None):
+				A list of documents.
+			labels (:obj:`float`, optional, defaults to None):
+				A list of labels.
+				 
+		Example::
+			from Manteia.Classification import Classification
+			# Initializing a list of texts,labels
+			documents=['a text','text b']
+			labels=['a','b']
+			Classification(documents,labels)
+		Attributes:
+	"""
+	def __init__(self,model_name ='bert',documents = None,labels = None): 
+		self.MAX_SEQ_LEN = 64
+		self.model_name  = model_name
+
+		if documents!=None and labels!=None:
+			pp               = Preprocess(documents,labels)
+			self.list_labels = pp.list_labels
+			self.model       = Model(num_labels=len(pp.list_labels))
+			self.model.load()
+
+			train_text, validation_text, train_labels, validation_labels = train_test_split(pp.documents, pp.labels, random_state=2018, test_size=0.1)
+
+			train_ids,train_masks           = encode_text(train_text,self.model.tokenizer,self.MAX_SEQ_LEN)
+			validation_ids,validation_masks = encode_text(validation_text,self.model.tokenizer,self.MAX_SEQ_LEN)
+			train_labels                    = encode_label(train_labels,pp.list_labels)
+			validation_labels               = encode_label(validation_labels,pp.list_labels)
+
+			dt_train          = Create_DataLoader_train(train_ids,train_masks,train_labels)
+			dt_validation     = Create_DataLoader_train(validation_ids,validation_masks,validation_labels)
+
+			self.model.configuration(dt_train)
+			self.model.fit(dt_train,dt_validation)
+	def test(self):
+		return "Classification Mantéïa."
+
+	def predict(self,documents):
+		inputs,masks   = encode_text(documents,self.model.tokenizer)
+		predict_inputs = totensors(inputs)
+		predict_masks  = totensors(masks)
+		dt             = Create_DataLoader_predict(predict_inputs,predict_masks)
+		prediction     = self.model.predict(dt)
+		prediction     = decode_label(prediction,self.list_labels)
+		return prediction
+
+
+
+
diff --git a/build/lib/Manteia/Generation.py b/build/lib/Manteia/Generation.py
@@ -0,0 +1,58 @@
+
+import numpy as np
+import random
+import pandas as pd
+import sklearn
+from sklearn.model_selection import train_test_split,KFold
+import time
+import datetime
+import gc
+############
+from .Model import *
+from .Preprocess import Preprocess
+
+class Generation:
+	r"""
+		This is the class to gnerate text in categorie a NLP task.
+		
+		Args:
+			model_name (:obj:`string`, optional, defaults to  'bert'):
+				give the name of a model.
+			documents (:obj:`list`, optional, defaults to None):
+				A list of documents.
+			labels (:obj:`float`, optional, defaults to None):
+				A list of labels.
+				 
+		Example::
+			from Manteia.Classification import Classification
+			# Initializing a list of texts,labels
+			documents=['a text','text b']
+			labels=['a','b']
+			Classification(documents,labels)
+		Attributes:
+	"""
+	def __init__(self,model_name ='gpt2-medium',documents = None,labels = None):
+
+
+		model               = Model(model_name =model_name)
+		model.load()
+		text_loader         = Create_DataLoader_generation(documents)
+		model.BATCH_SIZE    = 16
+		model.EPOCHS        = 10
+		model.LEARNING_RATE = 3e-5
+		model.WARMUP_STEPS  = 500
+		model.MAX_SEQ_LEN   = 400
+
+		model.fit_generation(text_loader)
+
+		output              = model.predict_generation('joke')
+		output_text         = decode_text(output,model.tokenizer)
+		print(output_text)
+
+
+	def test(self):
+		return "Generation Mantéïa."
+
+
+
+
diff --git a/build/lib/Manteia/Manteia.py b/build/lib/Manteia/Manteia.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#  core.py
+#  
+#  Copyright 2020 Yves <yves@mercadier>
+#  
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#  
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#  
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#  MA 02110-1301, USA.
+#  
+#
+
+"""
+	This module proclaims the good word. May they
+	regain total freedom of artificial thought towards a new age
+	reminiscent.
+
+	You can install it with pip:
+
+		pip install Manteia
+
+	Example of use:
+
+	>>> from Manteia import testManteia
+	>>> testManteia ()
+
+	This code is licensed under MIT.
+"""
+__all__ = ['testManteia','testData','testClassification']
+
+from .Preprocess import Preprocess
+from .Classification import Classification
+from .Statistic import Statistic
+from .Visualisation import Visualisation
+from .Model import Model
+
+class Manteia:
+	def __init__(self,documents=None,labels=None,task='classification'):
+		if documents!=None:
+			self.data=Data(documents,labels)
+		if task=='classification':
+			self.classification=Classification(data=self.data)
+	def testManteia():
+		return "Hello, Mantéïa is alive."
+
+def testManteia():
+    print ("Hello, Mantéïa is alive.")
+
+def testData():
+	documents=['    ,;:123test   car','test houses']
+	labels=['1','0']
+	mant=Data(documents,labels)
+	print(mant.data.list_labels)
+	print(mant.data.get_df())
+
+def testClassification():
+	documents=['test car','test house']
+	labels=['1','0']
+	mant=Classification(documents,labels)
+