Skip to content

Commit

Permalink
Mantéïa
Browse files Browse the repository at this point in the history
  • Loading branch information
ym001 committed May 20, 2020
1 parent 3302b1d commit 010ff32
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 20 deletions.
20 changes: 10 additions & 10 deletions Manteia/Dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ def load_drugscom(self):
"""
self.path_dir = os.path.join(self.path,'drugscom')
if not os.path.isdir(self.path_dir):
os.mkdir(path_dir)
os.mkdir(self.path_dir)

self.path_train = os.path.join(self.path_dir,'drugsComTrain_raw.tsv')
self.path_test = os.path.join(self.path_dir,'drugsComTest_raw.tsv')
Expand Down Expand Up @@ -917,9 +917,9 @@ def load_pubmed_rct20k(self):
self.documents_test,self.labels_test = [],[]
self.documents_dev,self.labels_dev = [],[]

path_dir=os.path.join(self.path,'PubMed_20k_RCT')
if not os.path.isdir(path_dir):
os.mkdir(path_dir)
self.path_dir=os.path.join(self.path,'PubMed_20k_RCT')
if not os.path.isdir(self.path_dir):
os.mkdir(self.path_dir)

url_train = 'https://raw.githubusercontent.com/Franck-Dernoncourt/pubmed-rct/master/PubMed_20k_RCT/train.txt'
url_dev = 'https://raw.githubusercontent.com/Franck-Dernoncourt/pubmed-rct/master/PubMed_20k_RCT/dev.txt'
Expand Down Expand Up @@ -975,9 +975,9 @@ def load_Short_Jokes(self):
"""
self.documents_train = []

path_dir=os.path.join(self.path,'Short_Jokes')
if not os.path.isdir(path_dir):
os.mkdir(path_dir)
self.path_dir=os.path.join(self.path,'Short_Jokes')
if not os.path.isdir(self.path_dir):
os.mkdir(self.path_dir)
url_train = 'https://github.com/ym001/Dune/raw/master/datasets/short-jokes.zip'
if self.verbose:
print("Downloading and extracting Short_Jokes...")
Expand Down Expand Up @@ -1015,9 +1015,9 @@ def load_Tweeter_Airline_Sentiment(self):
self.documents_train = []
self.labels_train = []

path_dir=os.path.join(self.path,'Tweeter_Airline_Sentiment')
if not os.path.isdir(path_dir):
os.mkdir(path_dir)
self.path_dir=os.path.join(self.path,'Tweeter_Airline_Sentiment')
if not os.path.isdir(self.path_dir):
os.mkdir(self.path_dir)
url_train = 'https://github.com/ym001/Dune/raw/master/datasets/Airline-Sentiment.zip'
if self.verbose:
print("Downloading and extracting Tweeter_Airline_Sentiment...")
Expand Down
4 changes: 4 additions & 0 deletions Manteia/Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
"""

import warnings
warnings.filterwarnings("ignore")

import os
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
Expand Down
22 changes: 18 additions & 4 deletions Manteia/Statistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,21 @@ def dictionary(self,documents):

def number_word(self):
return len(self.dictionary(self.documents))


def dictionnary_stat_labels(self):
"""
récupére les stats de nombre de mot par label
"""
dic_word={}
print(self.list_labels)
for lab in self.list_labels:
dic_word[lab]= 0
for doc,lab in zip(self.documents,self.labels):
tab=doc.split(" ")
for l in lab:
dic_word[l]=dic_word[l]+len(tab)
if l in self.list_labels:
dic_word[l]=dic_word[l]+len(tab)
return dic_word

def length_of_documents_by_class(self):
Expand All @@ -101,11 +107,11 @@ def length_of_documents_by_class(self):
classe=[]
for lab in self.list_labels:
dic_length[lab]= []
for doc,cl in zip(self.document,self.labels):
for doc,cl in zip(self.documents,self.labels):
sentence=doc.split(" ")
length_of_doc.append(len(sentence))
classe.append(cl[0])
dic_length[cl[0]].append(len(sentence))
classe.append(cl)
dic_length[cl].append(len(sentence))
for lab in self.list_labels:
tab=np.array(dic_length[lab])
dic_length[lab]=np.mean(tab)
Expand All @@ -120,6 +126,14 @@ def length_of_documents_by_class(self):
'''
return dic_length

def length_of_documents(self):
length_of_doc=[]
for doc in self.documents:
sentence=doc.split(" ")
length_of_doc.append(len(sentence))

return length_of_doc

def word_by_doc(self):
c=0
for doc in self.documents:
Expand Down
15 changes: 15 additions & 0 deletions Manteia/Visualisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from .Model import Model

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

class Visualisation:

Expand Down Expand Up @@ -103,4 +105,17 @@ def plot_bar(self):
plt.savefig(self.path)
if self.show:
plt.show()

def plot_boxplot(self,labels,longueur_des_doc,ylim=200):
"""
figure for show exploration length of document.
"""
data = pd.DataFrame({'Labels':labels ,'Length of document':longueur_des_doc})
fig, ax = plt.subplots()
plt.xticks(rotation=90)
sns.boxplot(x='Labels', y='Length of document', data=data, palette='Set2',notch=True,showfliers=True, showmeans=True, meanline=True)
ax.set_ylim(0, ylim)
plt.show()
#plt.savefig('/home/mercadier/these/resultat/image/longueur-doc-by-classe.png')


2 changes: 1 addition & 1 deletion Manteia/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
reminiscent.
"""

__version__ = "0.0.20"
__version__ = "0.0.21"


from Manteia import Classification
Expand Down
5 changes: 3 additions & 2 deletions require_readthedoc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ matplotlib==3.2.1
nltk==3.5
numpy==1.18.4
pandas==1.0.3
scikit_learn==0.22.2.post1
scikit_learn==0.23.1
seaborn==0.10.1
torch==1.5.0
transformers==2.9.0
transformers==2.9.1
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ matplotlib==3.2.1
nltk==3.5
numpy==1.18.4
pandas==1.0.3
scikit_learn==0.22.2.post1
scikit_learn==0.23.1
seaborn==0.10.1
torch==1.5.0
transformers==2.9.0
transformers==2.9.1
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

name='Manteia',

#version='0.0.10',
version=Manteia.__version__,

packages=find_packages(),
Expand Down

0 comments on commit 010ff32

Please sign in to comment.