~

FreedomIntelligence · Apr 11, 2019 · 7c1dfae · 7c1dfae
1 parent 4eef323
commit 7c1dfae
Show file tree

Hide file tree

Showing 150 changed files with 12,521 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,20 @@
+*.bak
+__pycache__/*
+*.pyc
+glove/*
+data/*
+eval/*
+models/__pycache__/*
+dataset/__pycache__/*
+complexnn/__pycache__/*
+*.text
+test.py
+log/*
+*.txt
+case study/*
+*.cases
+*.csv
+temp/*
+*.pkl
+.spyproject/*
+best_parameters/*
diff --git a/README.md b/README.md
@@ -0,0 +1,3 @@
+# qnn
+
+This is supported by Li Qiuchi (李秋池) and Waby.
diff --git a/analysis/read_history.py b/analysis/read_history.py
@@ -0,0 +1,109 @@
+import os
+import codecs
+import numpy as np
+from params import Params
+path = 'case study/'
+performance_dict = {}
+params = Params()
+
+from sklearn.neighbors import KDTree
+
+
+
+import os
+import codecs
+import numpy as np
+from scipy.spatial.distance import cosine
+path = 'eval'
+performance_dict = {}
+params = Params()
+from tqdm import tqdm
+
+def complex_metric(number1, number2):    
+    return np.linalg.norm(number1 * number2)
+
+
+def write_to_file(filename,strings):
+    with codecs.open(filename,"w",encoding="utf-8") as f:
+        f.write("\n".join(strings))
+
+def case_study(eval_dir):
+    strings = []
+    history = np.load(os.path.join(eval_dir,'history.npy'))
+    phase = np.load(os.path.join(eval_dir,'amplitude_embedding.npy'))
+    amplitude = np.load(os.path.join(eval_dir,'phase_embedding.npy'))
+    weights = np.load(os.path.join(eval_dir,'weights.npy'))[:,0]
+    measurements = np.load(os.path.join(eval_dir,'measurements.npy'))
+    id2word = np.load(os.path.join(eval_dir,'id2word.npy'))
+
+    config_path = os.path.join(eval_dir,'config.ini')
+    # print(config_path)
+    params.parse_config(config_path)
+    params.export_to_config("sb.ini")
+
+    strings.append(" ".join(id2word[np.argsort(weights[1:])[:50]]))
+
+    strings.append(" ".join(id2word[np.argsort(weights[1:])[-50:]]))
+    strings.append("\n")
+
+    embedding = np.cos(phase)*amplitude+1j*np.sin(phase)*amplitude
+    measuremment_vector = measurements[:,:,0] + 1j *measurements[:,:,1]
+
+
+    for i in range(params.measurement_size):
+        numbers=[]
+        for j,word in tqdm(enumerate(embedding)):
+            vector = measuremment_vector[i,:]
+            sim =complex_metric(word, vector )
+            numbers.append(sim)
+        strings.append(" ".join(id2word[np.argsort(numbers[1:])[:50]]))
+#            q.put(Job(distance,id2word[j+1]))
+#            tree.query_ball_point(vector,1)
+#            tree = KDTree(embedding,metric='pyfunc',func=complex_metric)
+    write_to_file(params.dataset_name+".cases", strings)
+
+
+
+for file_name in os.listdir(path):
+    eval_dir = os.path.join(path, file_name)
+    history_path = os.path.join(eval_dir,'history.npy')
+    config_path = os.path.join(eval_dir,'config.ini')
+    # print(config_path)
+    params.parse_config(config_path)
+
+    dataset_name = params.dataset_name
+    # print(params.dataset_name)
+    history = np.load(history_path).tolist()
+    accuracy = max(history['val_acc'])
+    if not dataset_name in performance_dict:
+        performance_dict[dataset_name] = (accuracy, file_name)
+    else:
+        if accuracy > performance_dict[dataset_name][0]:
+            performance_dict[dataset_name] = (accuracy, file_name)
+
+for dataset, args in performance_dict.items():
+    percision, filename = args
+
+    eval_dir="eval\\" + filename
+    case_study(eval_dir)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#def complex_metric(number1, number2):
+#    
+#    return np.linalg.norm(number1 * number2)
+
+
+
+
diff --git a/analysis/read_log.py b/analysis/read_log.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+
+import pandas as pd
+import codecs
+filename = "log/acc20180812/20180812162841_546.log"
+
+def process(string):
+    result = dict()
+    end =string.index("]]") +2
+    start= string.index("[[") 
+
+    record = string[:start] +"0"+ string[end:]
+
+    print(record)
+    record = record.replace("->","").replace(":","performance")
+    tokens = [token for token in record.split(" ")[6:] if len(token.strip()) >0]
+    print(tokens)
+    for k,v in zip(tokens[::2],tokens[1::2]):
+        result[k]=v
+    return result
+sample= ""
+lines,records =[],[]
+with codecs.open(filename,encoding="utf-8") as f:
+    for line in f:
+        line = line.strip()
+
+
+        if "->" in line and "INFO: running" not in line and "INFO: Comput" not in line and "INFO: Found " not in line and not ("[" in line and "]" in line and "[[" not in line  and "]]" not in line) :            
+
+            sample = sample + line
+            if "[[" not in line:
+                record = process(sample)
+                records.append(record)
+                lines.append(sample)
+                sample= ""
+
+
+df=pd.DataFrame(records)
+df.to_csv("rerults.csv",sep="\t",index=None,encoding="utf-8")
+with codecs.open("demo","w",encoding="utf-8") as f:
+    f.write("\n".join(lines))
+
+print(df.groupby("dataset_name").apply(lambda group: group["performance"].max()))
+print(df[["dataset_name","measurement_size","performance"]])
+
+records = dict()
+for i, row in df[["dataset_name","measurement_size","performance"]].set_index(keys=["dataset_name","measurement_size"]).iterrows():
+    dataset = row.name[0]
+    size = row.name[1]
+    records.setdefault(dataset,[]);
+    records[dataset].append((row.values[0],size))
+print(records)
+for i in range(len(records["CR"])):
+    nums=[]
+    for dataset in  ["CR","MPQA","SUBJ","MR","SST_2","SST_5","TREC"]:
+        nums.append(records[dataset][-1*(i+1)][0])
+    print(records[dataset][-1*(i+1)][1] + " &"+ " & ".join(nums) +"\\\\")
+
+
diff --git a/complexnn/AESD.py b/complexnn/AESD.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+import sys; sys.path.append('.')
+import numpy as np
+from keras import backend as K
+from keras.layers import Layer,Dense,Dropout
+from keras.models import Model, Input
+import tensorflow as tf
+import sys
+import os
+import keras.backend as K
+import math
+
+class AESD(Layer):
+
+    def __init__(self, delta =0.5,c=1,dropout_keep_prob = 1, mean="geometric",axis = -1, keep_dims = True, **kwargs):
+        # self.output_dim = output_dim
+        self.axis = axis
+        self.keep_dims = keep_dims
+        self.dropout_probs = Dropout(dropout_keep_prob)
+        self.delta = delta
+        self.c = c
+        self.mean=mean
+        super(AESD, self).__init__(**kwargs)
+
+    def get_config(self):
+        config = {'axis': self.axis, 'keep_dims': self.keep_dims}
+        base_config = super(AESD, self).get_config()
+        return dict(list(base_config.items())+list(config.items()))
+
+    def build(self, input_shape):
+
+        # Create a trainable weight variable for this layer.
+
+
+
+        # self.kernel = self.add_weight(name='kernel',
+        #                               shape=(input_shape[1], self.output_dim),
+        #                               initializer='uniform',
+        #                               trainable=True)
+        super(AESD, self).build(input_shape)  # Be sure to call this somewhere!
+
+    def call(self, inputs):
+
+        x,y = inputs
+
+#        norm1 = K.sqrt(0.00001+ K.sum(x**2, axis = self.axis, keepdims = False))
+#        norm2 = K.sqrt(0.00001+ K.sum(y**2, axis = self.axis, keepdims = False))
+#        output= K.sum(self.dropout_probs(x*y),1) / norm1 /norm2
+        l2norm = K.sqrt(K.sum(self.dropout_probs((x-y)**2),keepdims = False,axis=-1)+0.00001)
+        if self.mean=="geometric":            
+            output =  1 /(1+ l2norm) *   1 /( 1+ K.exp(-1*self.delta*(self.c+K.sum(self.dropout_probs(x*y),-1)))) 
+        else:
+            output =  0.5 /(1+ l2norm) +   0.5 /( 1+ K.exp(-1*self.delta*(self.c+K.sum(self.dropout_probs(x*y),-1)))) 
+
+
+
+        return K.expand_dims(output)
+
+    def compute_output_shape(self, input_shape):
+#        print(input_shape)
+        # print(type(input_shape[1]))
+        output_shape = []
+        if self.axis<0:
+            self.axis = len(input_shape[0])+self.axis 
+        for i in range(len(input_shape[0])):            
+            if not i == self.axis:
+                output_shape.append(input_shape[0][i])
+        if self.keep_dims:
+            output_shape.append(1)
+#        print('Input shape of L2Norm layer:{}'.format(input_shape))
+#        print(output_shape)
+        return([tuple(output_shape)])
+
+
+if __name__ == '__main__':
+    from keras.layers import Input, Dense
+
+#    encoding_dim = 50
+
+#    input_img = Input(shape=(300,))
+#    n = Dense(20)(input_img)
+#    print(n.shape)
+#    new_code = L2Norm(axis = 1, keep_dims =False)(n)
+##    output = Dense(2)(new_code) #,
+##    print(output.shape)
+#    print(new_code.shape)
+#    encoder = Model(input_img, new_code)
+#    
+#    encoder.compile(loss = 'mean_squared_error',
+#            optimizer = 'rmsprop',
+#            metrics=['accuracy'])
+#    
+#    a = np.random.random((5,300))
+#    print(encoder.predict(x = a))
+#    b = np.random.random((5))
+#    encoder.fit(x=a, y=b, epochs = 10)
+
+
+    x =  Input(shape=(2,10))
+    y =  Input(shape=(2,10))
+
+    output = Cosinse()([x,y])
+
+    encoder = Model([x,y], output)
+    encoder.compile(loss = 'mean_squared_error',
+            optimizer = 'rmsprop',
+            metrics=['accuracy'])
+#    
+    a = np.random.random((5,300))
+    b = np.random.random((5,300))
+    c = np.random.random((5,1))
+    a = np.ones((5,300))
+#    b = np.ones((5,300))
+#    encoder.fit(x=[a,b], y=c, epochs = 10)
+
+    a= np.array([[1,1],[3,4]])
+    b= np.array([[1,0],[4,3]])
+    print(encoder.predict(x = [a,b]))
+
+#    b = np.random.random((5,2,2))
+#    encoder.fit(x=a, y = b, epochs = 10)
+#    print(b)
+#    print(np.linalg.norm(a,axis=1))
+
+
+
+    # # y = K.sum(K.square(x), axis=None, keepdims = False)
+    # x = x/np.linalg.norm(x, ord = 2, axis = (1,2))
+    # # print(np.linalg.norm(x[0], ord = 2))
+    #     # # print(np.linalg.norm(x))
+    # y = model.predict(x)
+    # model.fit(x,y)
+    # for i in range(100):
+    #     x = np.random.random((1,10,2))
+    #     x = x/np.linalg.norm(x, ord = 2, axis = (1,2))
+    #     y = model.predict(x)
+    #     print(y)
+
+
+
+
diff --git a/complexnn/__init__.py b/complexnn/__init__.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+from complexnn.embedding import phase_embedding_layer, amplitude_embedding_layer
+from complexnn.multiply import ComplexMultiply
+from complexnn.superposition import ComplexSuperposition
+from complexnn.dense import ComplexDense
+from complexnn.mixture import ComplexMixture
+from complexnn.measurement import ComplexMeasurement
+from complexnn.concatenation import Concatenation
+from complexnn.index import Index
+from complexnn.ngram import NGram
+from complexnn.utils import GetReal
+from complexnn.projection import Complex1DProjection
+from complexnn.l2_norm import L2Norm
+from complexnn.l2_normalization import L2Normalization
+from complexnn.utils import *
+from complexnn.reshape import reshape
+from complexnn.lambda_functions import *
+from complexnn.cosine import Cosinse
+from complexnn.marginLoss import MarginLoss
+from complexnn.AESD import AESD
+#def get
+import os,sys,traceback
+def import_class(import_str):
+    dirname, filename = os.path.split(os.path.abspath(__file__))
+    sys.path.insert(0,dirname)
+    mod_str, _sep, class_str = import_str.rpartition('.')
+    __import__(mod_str)
+    try:
+        return getattr(sys.modules[mod_str], class_str)
+    except AttributeError:
+        raise ImportError('Class %s cannot be found (%s)' %
+                (class_str,
+                    traceback.format_exception(*sys.exc_info())))
+
+
+def getScore(import_str = "", *args, **kwargs):
+    return import_class(import_str)(*args, **kwargs)