In [1]:
import os,sys,subprocess
from collections import defaultdict

In [3]:
class Perceptron():
    def __init__(self,input_f=None):
        """if input_f is given, train the perception based on this file
        if input_f not given, use self.load_model() to read the model
        model file line shoud be in the format of:
            class\tline"""
        
        self.w=defaultdict(lambda:0.0)
        self.phi=defaultdict(lambda:0)
        self.lines=[]
        self.ys=[]
        
        if input_f:
            with open(input_f,"r",encoding="utf-8") as f:
                for line_ in f:
                    class_,line=line_.split("\t")
                    for i in line.split():
                        self.phi[i]+=1
                    self.lines.append(line)
                    self.ys.append(int(class_))
            for i in self.phi:
                self.w[i]=0
            
            # train the data 10 iters on all the lines
            self.model_flag=True
            self.train_model(10)
        else:
            self.model_flag=False
    
        
    def gen_phi(self,line):
        output=defaultdict(lambda:0)
        for i in line.split():
            output[i]+=1
        return output
    
    def train_model(self,n_iter,lines=None,ys=None):
        """train model for several more iterations if the model is loaded"""
        if not lines and not ys:
            lines=self.lines
            ys=self.ys
        if self.model_flag:
            for i in range(n_iter):
                for line,y in zip(lines,ys):
                    phi=self.gen_phi(line)
                    y_=self.predict_1(phi)
                    if y_!=y:
                        self.update_weights(phi,y)
        else:
            print("please either load model or init a modle with training corpus.")
    def update_weights(self,phi,y):
        for i,value in phi.items():
            self.w[i]+=value*y
    def save_model(self,f_name):
        with open(f_name, "w",encoding="utf-8") as f:
            for i in self.w:
                f.write("{word}\t{value}\n".format(word=i,value=self.w[i]))
        with open(f_name, "r",encoding="utf-8") as f:
            for i in range(5):
                print(f.readline())
    
    def load_model(self,f_name):
        """load the model based on the given file"""
        if not self.model_flag:
            with open(f_name,"r",encoding="utf-8") as f:
                for line in f:
                    line=line[:-1]
                    word,value = line.split("\t")
                    self.w[word]=float(value)
        else:
            overwrite_flag=input("model already exist, overwrite? (input y or n)")
            if overwrite_flag=="y":
                self.model_flag=False
                self.load_model(f_name)
            elif overwrite_flag=="n":
                pass
            else:
                print("please input y or n")
                self.load_model(f_name)
    
    def predict_1(self,phi):
        """predict using self model"""
        output=0
        for i in phi:
            output+=self.w[i]*phi[i]
        if output>=0:
            _=1
        else:
            _=-1
        return _
    def predict(self, f_name):
        with open(f_name,"r",encoding="utf-8") as f:
            with open(f_name+"_classification","w",encoding="utf-8") as fo:
                for line in f:
                    class_=self.predict_line(line)
                    fo.write(str(class_)+"\t"+line)
    
    def predict_line(self,line):
        phi=self.gen_phi(line)
        output=self.predict_1(phi)
        print(output)
        return output

                

In [4]:
if __name__=="__main__":
    percep=Perceptron("./../../data/titles-en-train.labeled")
    

In [22]:
percep.train_model(10)
percep.predict("data/titles-en-test.word1")

-1
1
1
-1
-1
-1
-1
-1
1
-1
1
1
1
-1
-1
1
1
-1
1
1
-1
-1
1
1
-1
1
-1
1
-1
1
-1
1
-1
-1
-1
1
-1
1
1
1
1
-1
1
-1
1
1
-1
1
1
1
-1
1
-1
1
-1
1
1
1
-1
-1
1
1
1
1
1
-1
1
1
1
1
-1
1
1
1
-1
-1
1
-1
-1
1
1
1
-1
1
1
-1
1
1
-1
-1
1
1
-1
1
1
1
1
-1
-1
-1
-1
-1
1
1
1
1
-1
-1
-1
1
1
1
-1
1
1
1
1
1
-1
1
1
-1
1
-1
1
1
1
1
1
1
-1
1
1
1
-1
-1
1
-1
-1
1
1
1
1
1
-1
-1
1
1
1
1
1
-1
-1
1
-1
1
-1
1
-1
-1
1
1
-1
1
-1
1
-1
1
-1
-1
1
-1
-1
1
-1
1
1
-1
-1
1
-1
-1
-1
-1
-1
-1
1
-1
-1
-1
-1
1
1
1
1
-1
1
1
-1
1
1
-1
-1
-1
-1
-1
-1
1
1
1
-1
1
1
-1
1
1
-1
-1
-1
1
-1
1
-1
-1
1
1
1
-1
1
-1
-1
1
1
-1
1
-1
1
1
-1
-1
1
-1
-1
-1
-1
1
1
1
-1
1
1
1
-1
1
1
-1
1
-1
1
-1
1
1
-1
1
1
-1
-1
-1
-1
1
-1
-1
-1
-1
-1
-1
1
1
-1
-1
-1
-1
-1
-1
-1
1
-1
1
-1
-1
1
1
-1
-1
1
1
-1
1
1
-1
-1
-1
1
1
1
-1
-1
1
-1
-1
-1
-1
-1
-1
1
1
-1
-1
-1
-1
1
1
-1
1
-1
-1
-1
-1
-1
1
-1
-1
1
-1
1
1
1
-1
-1
1
1
-1
1
-1
1
-1
1
-1
1
-1
1
-1
1
1
1
-1
1
-1
1
-1
1
1
-1
-1
-1
1
-1
-1
1
-1
1
-1
-1
-1
1
1
1
1
-1
-1
1
1
1
1
-1
-1
1
-1
1
-1
1
-1
1
1
-1
1
-1
-1
1
-1
1
1
1

In [25]:
print(subprocess.check_output("python ./script/grade-prediction.py ./data/titles-en-test.word1 ./data/titles-en-test.word1_classification",shell=True))

CalledProcessError: Command 'python ./script/grade-prediction.py ./data/titles-en-test.word1 ./data/titles-en-test.word1_classification' returned non-zero exit status 1