In [1]:
from __future__ import division
from numpy.random import random, randint
from random import choice
import numpy as np
from numpy import dot, array
from playxo import *
from stratagies import *
import collections
# from testindata import inputs, goal
import matplotlib.pylab as plt
from oneturntesting import inputs, goal
%matplotlib inline
np.random.seed(1337)
print len(goal)



787


In [2]:
# Activation Functions
def rectified_linear(x):
    return np.maximum(0.0, x)

def tanh_sigmoid(x):
    return np.tanh(x)

def fast_sigmoid(x):
    return x / (1 + np.absolute(x))

def maxout_sigmoid(x):
    return np.maximum(0.0, np.minimum(x, 1.0))

def random_sign():
    return choice([1,-1])

def linear(x):
    return x


class NNLayer(object):
    def __init__(self, matrix, offset, activation=rectified_linear):
        self.matrix = np.array(matrix)
        self.offset = np.array(offset)
        self.activation = activation
        
    def __repr__(self):
        return "NNLayer(matrix={}, offset={}, activation={})".format(self.matrix.__repr__(), self.offset.__repr__(), self.activation)
    
    def calcout(self, invector):
        offset = [self.offset]*len(invector)
        line = dot(invector, self.matrix) + self.offset
        return self.activation(line)
    
    @classmethod
    def random(cls, inwidth, outwidth, weightrange=0.5, activation=rectified_linear):
        offset = (np.random.rand(1,outwidth) * 2 - 1) * weightrange
        weights = (np.random.rand(inwidth, outwidth) * 2 - 1) * weightrange
        return cls(weights, offset, activation=activation)
    
class NNLayer_Learn(NNLayer):
    def __init__(self, matrix, offset, activation=rectified_linear, maxdelta=None):
        super(NNLayer_Learn, self).__init__(matrix, offset, activation=activation)
        if maxdelta is None:
            self.maxdelta = random() * 2 - 1
        else:
            self.maxdelta = 0
            self.maxdelta += maxdelta
        self.columns = len(self.matrix[0])
        self.rows = len(self.matrix)
    
    def __repr__(self):
        return "NNLayer_Learn(matrix={}, offset={}, activation={}, maxdelta={})".format(self.matrix.__repr__(), self.offset.__repr__(), self.activation, self.maxdelta)

    def mutate(self,mutaterate=0.33):
        maxdelta  = 0.001
        # find change in max delta
        maxdelta = self.error/10
        # maxdelta changes up or down
        #self.maxdelta += dmaxdelta*random_sign()
        size = np.size(self.matrix)
        # make a copy of the matrix
        nmatrix = np.copy(self.matrix)
        noffset = np.copy(self.offset)
        
        mutatematrix = np.random.binomial(1,mutaterate,(self.rows,self.columns))
        delta = (np.random.rand(self.rows,self.columns)*2-1) * self.maxdelta
        deltamatrix = mutatematrix * delta
        nmatrix += deltamatrix
        
        mutateoffset = np.random.binomial(1,mutaterate,(1,self.columns))
        delta = (np.random.rand(1,self.columns)*2-1) * self.maxdelta
        deltaoffset = delta * mutateoffset
        noffset += deltaoffset
        
        return self.__class__(nmatrix, noffset, activation=self.activation, maxdelta=self.maxdelta)
    
    def breed(self, other, prob=0.33):
        proportionself = np.random.binomial(1,prob,(self.rows,self.columns))
        selfportion = proportionself*self.matrix
        proportionother = (proportionself* -1) + 1
        otherportion = proportionother*other.matrix
        nmatrix = selfportion+otherportion
        
        proportionself = np.random.binomial(1,prob,(1,self.columns))
        selfportion = proportionself*self.offset
        proportionother = (proportionself* -1) + 1
        otherportion = proportionother*other.offset
        noffset = selfportion+otherportion
        return self.__class__(nmatrix, noffset, activation=self.activation, maxdelta=self.maxdelta)


class Perceptron(NNLayer_Learn):
    def __repr__(self):
        return "Perceptron(matrix={}, offset={}, activation={}, maxdelta={})".format(self.matrix.__repr__(), self.offset.__repr__(), self.activation, self.maxdelta)
    
    def calcerror(self, inputs, goal):
        output = self.calcout(inputs)
        error = np.array(goal) - output
        average = np.sqrt((error**2).mean())
        self.error = average
        return average
    
    def graddecent(self, inputs, goal, learningrate=0.01, error=None, see=False):
        if error is None:
            output = self.calcout(inputs)
            error = np.array(goal) - output
        inputs = np.array(inputs).T
        deltamatrix = dot(inputs,error)*learningrate
        deltaoffset = error*learningrate
        nperceptron = self.copy()
        if see:
            print nperceptron.matrix
            print 
            print deltamatrix
            print 
            print dot(inputs,error)*learningrate
        nperceptron.matrix += deltamatrix
        nperceptron.offset += deltaoffset
        return nperceptron
    
    def calcangle(self, inputs, goal):
        output = self.calcout(inputs)
        normalgoals = np.array([go/np.sum(np.sqrt(go**2)) for go in goal])
        normalouts = np.array([out/np.sum(np.sqrt(out**2)) for out in output])
        angles = np.array([dot(out, goal.T) for out, goal in zip(normalouts, normalgoals)])
        aangles = angles.mean()
        self.angle = aangles
        return aangles
    
    def copy(self):
        return self.__class__(matrix=self.matrix, offset=self.offset, activation=self.activation, maxdelta=self.maxdelta)

    @classmethod
    def initalize(cls, inwidth, outwidth, activation=rectified_linear, maxdelta=None):
        offset = np.zeros((1,outwidth)) + 0.5
        weights = (np.random.rand(inwidth, outwidth) * 2 - 1) / np.sqrt(inwidth)
        return cls(weights, offset, activation=activation, maxdelta=maxdelta)
    

class Hidden(object):
    def __init__(self, layers):
        self.layers = list(layers)
        
    def __repr__(self):
        return "Hidden(layers={})".format(self.layers.__repr__())
    
    def calcout(self, inputs):
        for layer in self.layers:
            inputs = layer.calcout(inputs)
        return inputs
    
    def calcerror(self, inputs, goal):
        outputs = self.calcout(inputs)
        error = np.array(goal) - outputs
        average = np.sqrt((error**2).mean())
        self.error = average
        for layer in self.layers:
            layer.error = self.error
        return average
    
    def graddecent(self, inputs, goal):
        nlayers = []
        input = np.array(inputs)
        reinputs = [inputs]
        for layer in self.layers:
            input = layer.calcout(np.array(input))
            reinputs.append(input)
        reinputs = list(reversed(reinputs))
        reinputs.pop(0)
        outputs = self.calcout(inputs)
        error = np.array(goal) - outputs
        for layer, input in zip(list(reversed(self.layers)), reinputs):
            nlayer = layer.graddecent(input, goal, error=error)
            error = dot(error, np.array(layer.matrix).T)
            nlayers.append(nlayer)
        nlayers = reversed(nlayers)
        return self.__class__(nlayers)
    
    def mutate(self):
        nlayers = []
        for layer in self.layers:
            nlayer = layer.mutate()
            nlayers.append(nlayer)
        return self.__class__(nlayers)
    
    def breed(self, other):
        nlayers = []
        for slayer, olayer in zip(self.layers, other.layers):
            nlayer = slayer.breed(olayer)
            nlayers.append(nlayer)
        return self.__class__(nlayers)
    
    def copy(self):
        return self.__class__(layers=self.layers)
    
    @classmethod
    def initalize(cls, inwidth, hiddenwidths, outwidth, activation=rectified_linear, endactivation=fast_sigmoid, maxdelta=None):
        layers = []
        hiddenwidths = list(hiddenwidths)
        hiddenwidths.append(outwidth)
        layer = Perceptron.initalize(inwidth, hiddenwidths[0], activation=activation, maxdelta=maxdelta)
        layers.append(layer)
        if len(hiddenwidths) == 2:
            layer = Perceptron.initalize(hiddenwidths[0], hiddenwidths[1], activation=endactivation, maxdelta=maxdelta)
            layers.append(layer)
        elif len(hiddenwidths) > 2: 
            for i in range(1,len(hiddenwidths)-1):
                layer = Perceptron.initalize(hiddenwidths[i-1], hiddenwidths[i], activation=activation, maxdelta=maxdelta)
                layers.append(layer)
            layer = Perceptron.initalize(hiddenwidths[-2], hiddenwidths[-1], activation=endactivation, maxdelta=maxdelta)
            layers.append(layer)
        return cls(layers)


class XOPerceptron(Perceptron):
    def __call__(self, board, side):
        self.side = side
        self.oside = 'o' if side == 'x' else 'x'
        return self.calcmove(board)
    
    def __repr__(self):
        return "XOPerceptron(matrix={}, offset={}, activation={}, maxdelta={})".format(self.matrix.__repr__(), self.offset.__repr__(), self.activation, self.maxdelta)
    
    def transboard(self, inputs):
        inputs = filter(lambda i: i != '\n', inputs)
        side = [1 if bit == self.side else 0 for bit in inputs]
        oside = [1 if bit == self.oside else 0 for bit in inputs]
        space = [1 if bit == '.' else 0 for bit in inputs]
        return np.array(side+oside+space)
    
    def calcmove(self, input):
        input = self.transboard(input)
        output = self.calcout(input)
        move = np.argmax(output.flatten())
        return move + 1 #board plays 1-9


class XOHidden(Hidden):        
    def __call__(self, board, side):
        self.side = side
        self.oside = 'o' if side == 'x' else 'x'
        return self.calcmove(board)
    
    def __repr__(self):
        return "XOHidden(layers={})".format(self.layers.__repr__())
    
    def transboard(self, inputs):
        inputs = filter(lambda i: i != '\n', inputs)
        side = [1 if bit == self.side else 0 for bit in inputs]
        oside = [1 if bit == self.oside else 0 for bit in inputs]
        space = [1 if bit == '.' else 0 for bit in inputs]
        return np.array(side+oside+space)
    
    def calcmove(self, input):
        input = self.transboard(input)
        output = self.calcout(input)
#         output = output - output.mean()
        move = np.argmax(output.flatten())
        return move + 1 #board plays 1-9


class GeneticEvolution(object):
    def __init__(self, inputs, goal, population, cutoff):
        self.inputs = np.array(inputs)
        self.goal = np.array(goal)
        self.population = np.array(population)
        self.popsize = len(population)
        self.cutoff = int(cutoff)
        for pop in self.population:
            pop.calcerror(self.inputs, self.goal)
    
    def __call__(self, gens, threshold):
        for i in range(gens):
            fittest = sorted(self.population, key=lambda person: person.error)
            if threshold > fittest[0].error:
                print 'I have finished learning',i
                break
            else:
                self.learn(i)
        print fittest[0]
        print fittest[0].calcout(self.inputs[3])
        print self.goal[3]
        print fittest[0].error
        return fittest[0]
    
    def learn(self,i=0,prob=0.33):
        fittest = sorted(self.population, key=lambda person: person.error)
        fittest = fittest[:self.cutoff]
        childrenneeded = self.popsize - self.cutoff
        numchildren = 0
        children = []
        while numchildren != childrenneeded:
            chance = np.random.random_sample()
            if chance > prob:
                child = (choice(fittest).mutate()).breed((choice(fittest).mutate()))
            else:
                child = choice(fittest).mutate()
            children.append(child)
            numchildren += 1
        for child in children:
            child.calcerror(self.inputs,self.goal)
        self.population = children + fittest
    
class MutationLearning(object):
    def __init__(self, inputs, goal, genome):
        self.inputs = np.array(inputs)
        self.goal = np.array(goal)
        self.genome = genome.copy()
        self.genome.calcerror(inputs,goal)
        
    def __call__(self, gens, threshold):
        for i in range(gens):
            if threshold > self.genome.error:
                print 'I have finished learning',i
                break
            else:
                self.learn(i)
        return self.genome
    
    def learn(self,i=0):
        ngenome = self.genome.copy()
        ngenome.calcerror(self.inputs, self.goal)
        ngenome = ngenome.mutate()
        ngenome.calcerror(self.inputs, self.goal)
        if ngenome.error < self.genome.error:
            self.genome = ngenome
            print 'gen',i
            print 'genome.error',self.genome.error
            #print 'genome.angle',self.genome.angle


class GradeintDecent(object):
    def __init__(self, inputs, goal, genome):
        self.inputs = inputs
        self.goal = goal
        self.genome = genome
        self.genome.calcerror(self.inputs, self.goal)
    
    def __call__(self, gens, threshold):
        for i in range(gens):
            if self.genome.error > threshold:
                self.learn()
            else:
                print 'Finished Learning', i
                break
            self.genome.calcerror(self.inputs, self.goal)
        return self.genome
    
    def learn(self):
        for input, goal in zip(self.inputs, self.goal):
            self.genome = self.genome.graddecent([input], goal, learningrate=0.01)
        

class LayeredLearning(object):
    """This Learning Method Works For Two Layers Only"""
    def __init__(self, inputs, goal, genome):
        self.inputs = inputs
        self.goal = goal
        self.genome = genome
        
    def __call__(self, gens, threshold, method):
        for i in range(gens):
            if self.genome.calcerror(inputs, goal) < threshold:
                print 'I have finished learning',i
                break
            elif method == 1:
                self.frontlearn()
            else:
                self.backlearn()
        return self.genome
                
        
    @staticmethod
    def calcdelta(input, error):
        """meaningful"""
        learningrate = 0.01
        deltamatrix = dot(input,error)*learningrate
        deltaoffset = error*learningrate
        return deltamatrix, deltaoffset
        
    
    @staticmethod
    def remeberinputs(layers, input):
        hidden = layers[0].calcout(input)
        output = layers[1].calcout(hidden)
        return hidden, output
    
    @staticmethod
    def backtrain(frontPerceptron, hiddenact, input, fronterror):
        hiddenerror = dot(fronterror, frontPerceptron.matrix.T)
        hiddenerror = hiddenerror * (hiddenact > 0)
        return LayeredLearning.calcdelta(input.T, hiddenerror)
    
    @staticmethod
    def fronttrain(hiddenact, error):
        return LayeredLearning.calcdelta(hiddenact.T, error)
    
    @staticmethod
    def changelayer(layer, delta):
        nperceptron = layer.copy()
        nperceptron.matrix += delta[0]
        nperceptron.offset += delta[1]
        return nperceptron
    
    def frontlearn(self):
        for input, goal in zip(self.inputs, self.goal):
            layers = self.genome.layers
            frontPerceptron = layers[1]
            backPerceptron = layers[0]
            hidden, output = self.remeberinputs(layers, input)
            error = goal - output
            delta = self.fronttrain(hidden, error)
            newlayer = self.changelayer(frontPerceptron, delta)
            self.genome = self.genome.__class__([backPerceptron, newlayer])
        return self.genome
    
    def backlearn(self): # fix
        for input, goal in zip(self.inputs, self.goal):
            layers = list(self.genome.layers)
            frontPerceptron = layers[1]
            backPerceptron = layers[0]
            hidden, output = self.remeberinputs(layers, input)
            error = goal - output
            input = np.array([input])
            delta = self.backtrain(frontPerceptron, hidden, input, error)
            newlayer = self.changelayer(backPerceptron, delta)
            self.genome = self.genome.__class__([newlayer, frontPerceptron])
        return self.genome

In [189]:
print thing

XOHidden(layers=[Perceptron(matrix=array([[-0.05036474,  0.00466839, -0.14546239, ..., -0.02561291,
        -0.07271679, -0.27185355],
       [-0.13078992,  0.15044616,  0.21322273, ...,  0.10471762,
        -0.13053851, -0.08963196],
       [ 0.16817587, -0.0542259 ,  0.25543441, ..., -0.01195401,
         0.14747203,  0.12446942],
       ..., 
       [ 0.13481233, -0.13714578,  0.05228019, ..., -0.00855169,
         0.04343558, -0.08015454],
       [-0.07084448, -0.04242493,  0.19275515, ...,  0.06660101,
         0.12650169, -0.00478674],
       [-0.02737759, -0.02896858, -0.15378881, ..., -0.16832885,
         0.1803281 , -0.21113327]]), offset=array([[ 0.50612307,  0.49970693,  0.48507016,  0.49159097,  0.47628962,
         0.50319663,  0.50665941,  0.48637459,  0.50845198,  0.47974575,
         0.49213519,  0.491422  ,  0.49721868,  0.4972762 ,  0.51066486,
         0.48511008,  0.53005776,  0.49747565,  0.50189388,  0.49804666,
         0.47604559,  0.51426811,  0.50783353,  0.4

In [8]:
gens = 10
threshold = 0.1

# inputs = np.array([[1,1],[1,0],[0,1],[0,0]])
# goal = np.array([[0],[1],[1],[0]])

popsize = 100
population1 = [XOPerceptron.initalize(27,9,maxdelta=0.1) for i in range(popsize)]
population2 = [XOHidden.initalize(27,[300],9,endactivation=linear,maxdelta=0.1) for i in range(popsize)]
cutoff = 50
inputs = np.array(inputs)
goal = np.array(goal)

learningfunc = LayeredLearning(inputs,goal,population2[0])

In [226]:
results = [ play_game(stupidai,thing,newboard(),see=False) for i in range(1000)]
results = array(results)
print 'lose', np.sum(results==-1)
print 'draw', np.sum(results==0)
print 'win', np.sum(results==1)
print 'mistake', np.sum(results==-2)

lose 784
draw 141
win 75
mistake 0


In [227]:
results = [ play_game(stupidai,oneturnai(),newboard(),see=False) for i in range(1000)]
results = array(results)
print 'lose', np.sum(results==-1)
print 'draw', np.sum(results==0)
print 'win', np.sum(results==1)
print 'mistake', np.sum(results==-2)

lose 694
draw 229
win 77
mistake 0


In [28]:
for i in range(10):
    new_genome = learningfunc.frontlearn()
    print i, new_genome.calcerror(inputs, goal)
print new_genome.calcerror(inputs, goal)
new_genome = learningfunc.genome

0 0.0946449339786
1 0.0945973763158
2 0.0945481728289
3 0.0944992650118
4 0.0944508620073
5 0.0944032109672
6 0.0943565617428
7 0.0943111479781
8 0.0942671767582
9 0.0942248238383
0.0942248238383


In [29]:
for i in range(10):
    new_genome = learningfunc.backlearn()
    print i, new_genome.calcerror(inputs, goal)
#     print LayeredLearning.remeberinputs(new_genome.layers, inputs)[0]
print new_genome.calcerror(inputs, goal)
new_genome = learningfunc.genome

0 0.0761301289064
1 0.0758597551326
2 0.0756589883066
3 0.0754322985718
4 0.0752405382066
5 0.0750893757835
6 0.0748806944357
7 0.0747225150317
8 0.0745489644525
9 0.0743840270611
0.0743840270611


In [180]:
stuff =  [[[[b for b in a] for a in i.matrix],[[d for d in c] for c in i.offset]] for i in new_genome.layers]
print 'Perceptron(matrix =',stuff[0][0],',offset =',stuff[0][1],')'
print 
print 'Perceptron(matrix =',stuff[1][0],',offset =',stuff[1][1],')'

AttributeError: 'XOPerceptron' object has no attribute 'layers'

In [3]:
input = np.array([1,1])
goal = np.array([[0]])
layers = [Perceptron.initalize(2,5),Perceptron.initalize(5,1)]
frontPerceptron = layers[1]
backPerceptron = layers[0]
hidden, output = LayeredLearning.remeberinputs(layers, input)
error = goal - output
hiddenerror = dot(error, frontPerceptron.matrix.T)
input = np.array([input])
delta = LayeredLearning.backtrain(frontPerceptron, hidden, input, error)
newlayer = LayeredLearning.changelayer(backPerceptron, delta)
print input
print hidden
print output
print goal
print error
print delta
print hiddenerror
print hiddenerror * (hidden > 0)

[[1 1]]
[[ 0.18946344  0.          0.85950978  0.77171489  0.        ]]
[[ 0.52625015]]
[[0]]
[[-0.52625015]]
(array([[ 0.00053529, -0.        ,  0.00176482, -0.00227603,  0.        ],
       [ 0.00053529, -0.        ,  0.00176482, -0.00227603,  0.        ]]), array([[ 0.00053529, -0.        ,  0.00176482, -0.00227603,  0.        ]]))
[[ 0.05352947 -0.06048453  0.1764824  -0.22760267  0.02672363]]
[[ 0.05352947 -0.          0.1764824  -0.22760267  0.        ]]


In [203]:
print new_genome

XOPerceptron(matrix=array([[ -3.18229109e-01,  -1.24367385e-02,  -9.52496399e-02,
          5.99703383e-02,  -2.88950898e-02,  -5.81820121e-02,
         -3.98365963e-02,  -3.66665508e-02,  -1.06859435e-01],
       [ -4.42625631e-02,  -3.24958194e-01,  -5.71645230e-02,
         -8.59546871e-02,   9.39843124e-02,  -2.01538530e-02,
         -2.99131061e-02,   3.14829630e-02,   1.27836212e-02],
       [ -1.47179251e-02,  -7.82607154e-02,  -4.10843754e-01,
          3.10527097e-03,   4.96831972e-02,  -1.14196921e-01,
         -1.43281630e-01,  -5.16792807e-03,  -3.86464815e-03],
       [ -3.10332345e-02,  -1.10576076e-01,   5.23579175e-03,
         -3.19510290e-01,  -2.55972894e-02,   1.01510126e-01,
         -2.40425875e-02,  -9.49693831e-02,   9.31189041e-02],
       [ -1.38880668e-01,   1.09486763e-01,  -5.80207671e-02,
          9.57558426e-02,  -4.09530858e-01,   3.78654755e-02,
          1.58857305e-02,  -1.95228607e-02,  -8.97327758e-02],
       [ -3.72305019e-02,  -5.95835280e-02,  

In [30]:
results = [play_game(new_genome,oneturnai(),newboard(),see=False) for i in range(1000)]
results = array(results)
print 'lose', np.sum(results==-1)
print 'draw', np.sum(results==0)
print 'win', np.sum(results==1)
print 'mistake', np.sum(results==-2)

lose 29
draw 664
win 307
mistake 0


In [2536]:
genome1 = XOPerceptron.initalize(27,9,activation=fast_sigmoid,maxdelta=0.1)
print genome1.calcerror(inputs, goal)
for i in range(1000):
    for input,go in zip(inputs, goal):
        genome1 = genome1.graddecent([input], go, learningrate=0.01)
    print i,genome1.calcerror(inputs, goal)


# genome2 = XOHidden.initalize(27,[400],9,maxdelta=0.1)
# print genome2.calcerror(inputs, goal)
# for i in range(1):
#     for input,go in zip(inputs, goal):
#         genome2 = genome2.graddecent([input], go)
#     print i,genome2.calcerror(inputs, goal)

results = [play_game(genome1,oneturnai(),newboard(),see=False) for i in range(1000)]
results = array(results)
print 'lose', np.sum(results==-1)
print 'draw', np.sum(results==0)
print 'win', np.sum(results==1)
print 'mistake', np.sum(results==-2)

0.410128585026
0 0.176399077647
1 0.151550228774
2 0.146609294559
3 0.145228387225
4 0.14475703397
5 0.14456820622
6 0.144480699734
7 0.144434685755
8 0.144407969867
9 0.144391311331
10 0.144380396838
11 0.144372993661
12 0.144367844042
13 0.144364192421
14 0.144361563108
15 0.144359646053
16 0.144358233741
17 0.144357184284
18 0.144356398883
19 0.144355807649
20 0.144355360449
21 0.144355020887
22 0.144354762253
23 0.144354564771
24 0.144354413687
25 0.14435429792
26 0.144354209105
27 0.144354140903
28 0.144354088491
29 0.144354048188
30 0.144354017184
31 0.144353993324
32 0.144353974957
33 0.144353960815
34 0.144353949924
35 0.144353941537
36 0.144353935076
37 0.1443539301
38 0.144353926266
39 0.144353923312
40 0.144353921036
41 0.144353919283
42 0.144353917932
43 0.144353916892
44 0.14435391609
45 0.144353915472
46 0.144353914996
47 0.144353914629
48 0.144353914346
49 0.144353914128
50 0.144353913961
51 0.144353913831
52 0.144353913732
53 0.144353913655
54 0.144353913596
55 0.144353

In [44]:
for i in range(1000):
    for input,go in zip(inputs, goal):
        genome2 = genome2.graddecent([input], go)
    print i,genome2.calcerror(inputs, goal)

0 0.152296324301
1 0.152261425984
2 0.152225417943
3 0.152187192709
4 0.152147279984
5 0.152108276743
6 0.152070923864
7 0.15203546207
8 0.15200522711
9 0.151975171799
10 0.151947034913
11 0.151916203917
12 0.151883441796
13 0.151845929301
14 0.151806824838
15 0.151767051392
16 0.151728556228
17 0.15168948062
18 0.151650649019
19 0.151610106509
20 0.151565221498
21 0.151514613873
22 0.151468339154
23 0.151427441635
24 0.151391030195
25 0.151353200159
26 0.15131655697
27 0.151274991066
28 0.15123801927
29 0.151205029546
30 0.151169280986
31 0.151136915787
32 0.151107744383
33 0.151078518296
34 0.151045462254
35 0.151005678303
36 0.150960712644
37 0.150917067626
38 0.150867772527
39 0.150820829781
40 0.150773824622
41 0.1507230107
42 0.150665962844
43 0.150611551017
44 0.150558105347
45 0.150505123203
46 0.150452564506
47 0.150402447605
48 0.150353212214
49 0.150304779082
50 0.150256315906
51 0.150208234831
52 0.150157782149
53 0.150105790394
54 0.150051758249
55 0.149994737765
56 0.1499

In [69]:
results = [ play_game(genome2,oneturnai(),newboard(),see=False) for i in range(1000)]
results = array(results)
print 'lose', np.sum(results==-1)
print 'draw', np.sum(results==0)
print 'win', np.sum(results==1)
print 'mistake', np.sum(results==-2)

lose 566
draw 275
win 159
mistake 0


In [31]:
for layer in genome2.layers:
    print layer

Perceptron(matrix=array([[-0.1105651 , -0.0144251 ,  0.00540171, ...,  0.01556813,
        -0.01845936, -0.02657805],
       [-0.19538557, -0.21631911, -0.06068865, ...,  0.00353185,
        -0.09756482, -0.03260555],
       [-0.09423498,  0.02879951, -0.08408139, ...,  0.01370902,
        -0.09334537, -0.061537  ],
       ..., 
       [ 0.04733606, -0.11092962,  0.08213332, ..., -0.04607225,
        -0.01058984, -0.20558632],
       [-0.00907008,  0.08935414, -0.08277357, ..., -0.12924756,
        -0.184546  , -0.07299518],
       [ 0.02518298,  0.10496885, -0.06740029, ...,  0.0422988 ,
        -0.05789831, -0.05567849]]), offset=array([[ 0.29918894,  0.40779739,  0.42364219,  0.32012807,  0.33478156,
         0.44228438,  0.28448688,  0.43156174,  0.34811057,  0.34164285,
         0.34648488,  0.33798988,  0.36739216,  0.41557836,  0.38134754,
         0.40241339,  0.3591022 ,  0.42351403,  0.46934414,  0.31581757,
         0.44633127,  0.31986568,  0.45129044,  0.40335955,  0.31303

In [None]:
print np.max(goal[3] - new_genome.calcout(inputs[3]))
print new_genome.calcout(inputs[3])
print goal[3]
print new_genome

In [244]:
weights = array([[-1.81037673,  0.23312287,  0.12380398,  0.59271222, -0.21961347,
        -0.24189064, -0.18492549,  0.40942079,  0.62681685],
       [ 0.10507507, -0.47251848, -0.17522381, -0.49585111,  0.64292612,
        -0.56700858,  0.21409164,  0.22750325,  0.13966019],
       [ 0.18662278,  0.57553949, -0.82078332,  0.84414527, -0.58571047,
         0.50511673,  0.15646844,  0.38760069,  0.65468204],
       [ 0.35836048,  0.03055102,  0.12177096, -0.68164249,  0.72977905,
         0.26062311,  0.51862588,  0.11463412,  0.34654182],
       [-0.06039925, -0.14237871, -0.32621089, -0.10693916, -1.89149121,
         0.32593722,  0.2253124 ,  0.43900727,  0.31974643],
       [ 0.04038825, -0.89012302,  0.17433701, -0.91811679,  0.64500178,
        -0.82089268, -0.24630297, -0.21025795, -0.10351606],
       [ 0.28726664, -0.88837443,  0.86301127, -0.04328763,  0.26820321,
         0.16479867, -1.03358411, -0.67552516,  0.07088758],
       [ 0.34697313, -0.25822395,  0.82327927, -0.292892  ,  0.29528382,
        -0.03081845,  1.39212354, -1.48440078,  0.21346592],
       [ 0.24947878,  0.07209573,  0.67391185,  0.1555395 ,  0.4708425 ,
        -0.15400796, -0.3161104 ,  0.0742885 , -0.83727895],
       [-1.75719484,  0.6790507 ,  0.04164357,  0.86369584, -0.61361412,
        -0.04898432, -0.55133523,  0.46849776,  0.29046959],
       [ 0.15962215, -0.020674  , -0.26509194, -0.24965251,  0.24784725,
        -0.40471488, -0.15419607,  0.2902635 , -0.22479026],
       [ 0.23188761,  1.06107406, -0.91959682,  1.1288583 , -0.99581776,
         0.70415068, -0.19810195,  0.44751379,  0.3027325 ],
       [ 0.4036935 ,  0.46841662,  0.00452004, -0.41200115,  0.31411458,
         0.43091827,  0.14158208,  0.14440356, -0.01580276],
       [-0.02311493,  0.3018782 , -0.46552146,  0.17001483, -2.3058165 ,
         0.50074046, -0.11976625,  0.47156255, -0.04739309],
       [ 0.09210043, -0.43019104,  0.05909833, -0.61543668,  0.2313949 ,
        -0.63187856, -0.62571263, -0.17233278, -0.45816605],
       [ 0.30670582, -0.4370403 ,  0.75051324,  0.24001105, -0.14191502,
         0.36608382, -1.40337295, -0.63837671, -0.28176232],
       [ 0.37291697,  0.1735898 ,  0.71705302, -0.02419961, -0.11654743,
         0.11579518,  1.04461903, -1.4368566 , -0.15563134],
       [ 0.28738102,  0.50461871,  0.57058055,  0.44718108,  0.05689595,
         0.03560982, -0.69874678,  0.11366661, -1.2099532 ],
       [ 2.43516   ,  0.4699339 ,  0.10216703,  0.7595714 , -0.39742544,
        -0.13684927, -0.35447127,  0.42720497,  0.45697943],
       [ 0.13558378,  3.14242857, -0.20441237, -0.3531923 ,  0.44131861,
        -0.50476821,  0.03914851,  0.29382188, -0.01870767],
       [ 0.2227648 ,  0.83039352,  2.98731559,  1.01023384, -0.79021568,
         0.63189788, -0.00974823,  0.43606283,  0.47549348],
       [ 0.37586375,  0.26720915,  0.04788561,  2.79238469,  0.53138537,
         0.35297299,  0.34260521,  0.12797431,  0.17365867],
       [-0.06361709,  0.08816399, -0.37817551,  0.04445366,  1.73107437,
         0.42249597,  0.06638788,  0.49249741,  0.1468038 ],
       [ 0.04857904, -0.68010043,  0.08697348, -0.76789917,  0.43959526,
         3.05299428, -0.4423963 , -0.18248278, -0.26444211],
       [ 0.30066625, -0.66211347,  0.81430111,  0.08440001,  0.07876098,
         0.28421749,  2.29981692, -0.65511985, -0.08921622],
       [ 0.37713609, -0.01385977,  0.7893311 , -0.14737346,  0.09740908,
         0.03718125,  1.23053575,  2.30884356,  0.02071235],
       [ 0.26101373,  0.29036808,  0.61590706,  0.32896206,  0.27871855,
        -0.04629746, -0.50906976,  0.09496274,  2.670554  ]])

In [245]:
offset = array([[ 0.13046523, -0.28622871, -0.97931259, -0.33232437,  1.47594874,
        -0.26496522,  0.90230198,  0.49700615,  0.17841406]])

In [258]:
genome = XOPerceptron(weights, offset,activation=fast_sigmoid,maxdelta=0.1)
genome.calcerror(inputs, goal)
genome.error = 0.2
learningfunc = MutationLearning(inputs,goal,genome)

results = [ play_game(genome,stupidai,newboard(),see=False) for i in range(1000)]
results = array(results)
print 'lose', np.sum(results==-1)
print 'draw', np.sum(results==0)
print 'win', np.sum(results==1)
print 'mistake', np.sum(results==-2)

lose 181
draw 188
win 631
mistake 0


In [387]:
def newcollection(): return [0 for i in range(9)]
a = collections.defaultdict(newcollection)

In [394]:
for c in range(100000):
    score = choice([1,0,-1])
    action = choice([i for i in range(9)])
    a[1][action] += score
print a[1]

[-101, -61, 412, 197, 59, -414, 13, -60, -134]


In [2448]:
def playgame(xai,oai,board,see=False):
    turn = 'x'
    while '.' in board:
        if turn == 'x':
            move = xai(board, turn)
            if board[move] != '.':
                print board 
                print move
                return -2
            board, turn = domove(board, move, turn)
        else:
            move = oai(board, turn)
            if board[move] != '.':
                print board
                print move
                return 2
            board, turn = domove(board, move, turn)
        if see:
            printboard(board)
        if didwin(board, 'x'):
            if see:
                print 'x won'
            return 1
        elif didwin(board, 'o'):
            if see:
                print 'o won'
            return -1
    if see:
        print 'draw'
    return 0

def printboard(board):
    print board[0]+board[1]+board[2]+'\n'+board[3]+board[4]+board[5]+'\n'+board[6]+board[7]+board[8]
    print '-----'
    
class oneturnai2(object):
    def __init__(self):
        pass

    def __call__(self, board, side):
        if side == 'x':
            otherside = 'o'
        else:
            otherside = 'x'
        prediction = self.checkmove(board, otherside)
        move = self.checkmove(board, side)
        if move != -1:
            return move  # phone
        elif prediction != -1:
            return prediction  # phone
        else:
            return stupidai2(board, side)  # phone

    def checkmove(self, board, side):
        positions = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]
        for p1, p2, p3 in positions:
            if board[p1] == side and board[p2] == side and board[p3] == '.':
                return p3  # phone
            if board[p1] == side and board[p3] == side and board[p2] == '.':
                return p2  # phone
            if board[p2] == side and board[p3] == side and board[p1] == '.':
                return p1  # phone
        return -1  # error

In [2449]:
print oneturnai2()(list('x..xoo.x.'),'o')

6


In [2451]:
def stupidai2(board,side):
    solution = False
    while not solution:
        play = np.random.randint(0,9) # np.random.randint takes [low, high) ommiting the high number from poping up
        if board[play] == ".":
            solution = True
    return play #phone



def domove(board, move, turn):
    board[move] = turn
    nextturn = 'o' if turn == 'x' else 'x'
    return board, nextturn

def didwin(board, side):
    winning = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]
    for a,b,c in winning:
        if board[a] == side and board[b] == side and board[c] == side:
            return True
    return False

def newboard(): return ['.' for i in range(9)]

class XOQLearning(object):
    def __init__(self, multi=5):
        self.database = datadict()
        self.multi = multi
        
    def __call__(self, board, side):
        state = ''.join(board)
        move = self.findmove(state)
        if board[move] != ".":
            print state, move, self.database[state]
        if side == self.side:
            self.gamekeys[state] = move
        return move
        
    
    def learn(self, ai, side, multi = 0.2):
        self.gamekeys = {}
        self.side = side
        playmulti = self.multi
        self.multi = multi
        if side == 'x':
            score = playgame(self, ai, newboard())
        else:
            score = playgame(ai, self, newboard())
            score *= -1
        if score == 2:
            score = 0        
        for state, move in self.gamekeys.items():
            self.database[state][move][0] += score
            self.database[state][move][1] += 1
        self.multi = playmulti
    
    def findmove(self, state):
        probs = self.database[state]
        winchance = np.array([prob[0]/prob[1] for prob in probs])
        y = np.exp(winchance*self.multi)
        s = np.sum(y)
        prob = y/s
        c = np.cumsum(prob)
        move = np.sum([c < np.random.random()])
        return move
    
    @staticmethod
    def newlist():
        return np.array([0 for i in range(9)])

    
class datadict(dict):
    def __missing__(self, key):
        new = np.array([[0,1] if bit == '.' else [-1e100,1] for bit in key])
        self[key] = new
        return new


class probdict(dict):
    def __missing__(self, key):
        new = np.array([[0,1] if bit == '.' else [-1e100,1] for bit in key])
        winchance = np.array([prob[0]/prob[1] for prob in new])
        y = np.exp(new)
        s = np.sum(y)
        prob = y/s
        c = np.cumsum(prob)
        self[key] = c

In [2442]:
len(a.database.items())

4521

In [2144]:
np.exp(-1e100)
x = np.arange(9)*0
y = np.exp(x)
s = np.sum(y)
p = y/s
p
c = np.cumsum(p)
r = np.random.random()
idx = sum(c < r)
print p
tab = [0]*len(c)
for i in xrange(100000):
    tab[sum(c < np.random.random())] += 1
print tab

[ 0.11111111  0.11111111  0.11111111  0.11111111  0.11111111  0.11111111
  0.11111111  0.11111111  0.11111111]
[11109, 10859, 10973, 11165, 11131, 11232, 11221, 11260, 11050]


In [2258]:
a = XOQLearning(multi=5)
b = XOQLearning(multi=5)
#playgame(a, stupidai2, newboard(), see=True)

In [2188]:
for i in range(10000):
    a.learn(oneturnai2(),'x')

In [2528]:
for i in xrange(100000):
    a.learn(oneturnai2(),'o', multi=0.2)
    a.learn(oneturnai2(),'o', multi=1)
    a.learn(oneturnai2(),'o', multi=5)

In [2460]:
for i in xrange(1000):
    a.learn(stupidai2,'o', multi=0.2)
    a.learn(stupidai2,'o', multi=0.2)
    a.learn(stupidai2,'o', multi=0.2)

In [2399]:
print a.database['........x']

[[  7.28800000e+003   7.88870000e+004]
 [ -1.06770000e+004   1.49380000e+004]
 [ -1.00060000e+004   1.81300000e+004]
 [ -1.07680000e+004   1.51270000e+004]
 [ -9.51400000e+003   1.88880000e+004]
 [ -1.07460000e+004   1.78620000e+004]
 [ -9.91700000e+003   1.74800000e+004]
 [ -1.06690000e+004   1.70950000e+004]
 [ -1.00000000e+100   1.00000000e+000]]


In [2529]:
a.multi = 10
results = [ playgame(oneturnai2(),a,newboard()) for i in xrange(1000)]
results = array(results)
print 'lose', np.sum(results==1)
print 'draw', np.sum(results==0)
print 'win', np.sum(results==-1)
print 'mistake', np.sum(results==2)
print a.multi

lose 44
draw 539
win 417
mistake 0
10


In [2472]:
print playgame(stupidai2,a,newboard(),see=True)

...
x..
...
-----
...
x..
o..
-----
...
x.x
o..
-----
...
xox
o..
-----
.x.
xox
o..
-----
.x.
xox
o.o
-----
.x.
xox
oxo
-----
ox.
xox
oxo
-----
o won
-1


In [2358]:
a.movesprob['x.x.x.o.o']

AttributeError: 'XOQLearning' object has no attribute 'movesprob'

In [2447]:
a.database['.........']

array([[ 46856,  73134],
       [ 21518,  49501],
       [ 44109,  69706],
       [ 22757,  50790],
       [ 99729, 125481],
       [ 22724,  50690],
       [ 45821,  71802],
       [ 21846,  49978],
       [ 47434,  73927]])

In [2468]:
print len(a.database)

4521


In [2502]:
memory = 0
init = '{1:2,3:4,5:6}'
print dict([1,2,3,4,5])

TypeError: cannot convert dictionary update sequence element #0 to a sequence

In [2618]:
def transboard(inputs,side):
    oside = 'x' if side == 'o' else 'o'
    inputs = filter(lambda i: i != '\n', inputs)
    side = [1 if bit == side else 0 for bit in inputs]
    oside = [1 if bit == oside else 0 for bit in inputs]
    space = [1 if bit == '.' else 0 for bit in inputs]
    return np.array(side + oside + space)

In [2613]:
xcoach = oneturnai()
ocoach = oneturnai()
def nboard(): return """
...
...
..."""
for i in xrange(500):
    play_game(xcoach, stupidai, nboard())
for i in xrange(500):
    play_game(stupidai, ocoach, nboard())
for i in xrange(500):
    play_game(xcoach, oneturnai(), nboard())
for i in xrange(500):
    play_game(oneturnai(), ocoach, nboard())
xtrainingdata = xcoach.trainingdata.items()
otrainingdata = ocoach.trainingdata.items()

In [2619]:
inputs = [item[0] for item in xtrainingdata]
inputs = [filter(lambda i: i != '\n', input) for input in inputs]
xgoals = np.array([[0.5 if i == item[1]-1 else 0 for i in range(0,9)] for item in xtrainingdata])
xgoals += np.array([[0.5 if item[i] == '.' else 0 for i in range(0,9)] for item in inputs])
xinputs = [transboard(input,'x') for input in inputs]

In [2620]:
inputs = [item[0] for item in otrainingdata]
inputs = [filter(lambda i: i != '\n', input) for input in inputs]
ogoals = np.array([[0.5 if i == item[1]-1 else 0 for i in range(0,9)] for item in otrainingdata])
ogoals += np.array([[0.5 if item[i] == '.' else 0 for i in range(0,9)] for item in inputs])
oinputs = [transboard(input,'o') for input in inputs]

In [2629]:
goal = [list(goal) for goal in ogoals] + [list(goal) for goal in xgoals]
inputs = [list(input) for input in oinputs] + [list(input) for input in xinputs]

In [2630]:
print goal
print inputs

[[0.5, 0.5, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0], [0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.5, 1.0], [0.0, 0.5, 0.0, 1.0, 0.0, 0.0, 0.5, 0.5, 0.0], [0.0, 0.0, 1.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.0, 0.5, 1.0, 0.0, 0.0, 0.5, 0.5], [1.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5], [0.0, 1.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0], [1.0, 0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 0.5, 0.0], [0.5, 1.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0], [0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0], [0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0], [0.0, 0.0, 0.5, 0.5, 0.0, 0.5, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 1.0, 0.0, 0.5], [0.0, 0.5, 0.5, 0.0, 0.0, 1.0, 0.5, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5, 0.0, 0.0], [0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0], [0.0, 0.0, 0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0], [0.0, 0.5, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0