<a href="https://colab.research.google.com/github/zeroxy/lotto/blob/master/lotto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install pyquery
import numpy as np
import requests
import pyquery
from datetime import datetime as dt
from joblib import Parallel, delayed, cpu_count


class Lottos:
    def __init__(self, filename='lottos_db.npz'):
        self.filename = filename
        try:
            with np.load(self.filename) as npz :
                self.lottos = npz['lottos']
            print("loaded!! ", self.lottos.shape)
        except Exception as e:
            self.lottos=np.zeros((0,45),dtype=np.int16)
            self.update()
        finally:
            self.neg_lottos = (np.ones((1,45), dtype=np.int32) * 
                               np.expand_dims(np.arange(self.lottos.shape[0], dtype=np.int32),-1)) +1
            for t,n in np.argwhere(self.lottos==1):
                self.neg_lottos[t:,n] -= self.neg_lottos[t,n]
            #print(self.lottos[:2], self.neg_lottos[:2])
            self.tomap = np.zeros((45,70,2))
            self.tomap[:,:,1]=0.0001
            for x in range(45):
                for y in range(self.neg_lottos.shape[0]-1):
                    f,t  = self.neg_lottos[y,x], self.neg_lottos[y+1, x]
                    #print(x,y,f,t)
                    if t == 0:
                        self.tomap[x,f,0]+=1
                    else:
                        self.tomap[x,f,1]+=1
        
                    
    def __len__(self):
        return self.lottos.shape[0]
    
    def update(self):
        starttime = dt.now()
        url = 'https://search.naver.com/search.naver?sm=tab_drt&where=nexearch&query=9999회로또'
        body = requests.get(url)
        d = pyquery.PyQuery(body.text)('._lotto-btn-current em')
        limit = int(d.html()[:-1])
        end = self.lottos.shape[0]
        def get_balls(no):
            url = f'https://search.naver.com/search.naver?sm=tab_drt&where=nexearch&query={no+1}회로또'
            d = pyquery.PyQuery(requests.get(url).text)('.num_box .num')
            print(f'\r    {no+1} ', end='')
            if no%180 == 179:
                print(f'  {dt.now()-starttime}')
            return [ int(x.text)-1 for x in d]
            

        if limit>end:
            self.lottos = np.append(self.lottos,np.zeros((limit-end,45)),axis=0)
            print(self.lottos.shape)
            verb=0
            #verb=50
            crawled = Parallel(n_jobs=10,backend='threading',verbose=verb)(
                delayed(get_balls)(x) for x in range(end,limit)
            )
            for rowno,row in enumerate(crawled):
                self.lottos[end+rowno,row]=1
                
            np.savez_compressed(self.filename, lottos=self.lottos)
            print(f'\nwe had {end}. so update to {limit}. now we have {self.lottos.shape[0]} rows.')
        else:
            print('\nno update')
        
        
    
    def get_probability(self, no=None, history=1):
        if no is None:
            no=self.lottos.shape[0]+1
        assert 1< no <= self.lottos.shape[0]+1
        assert history < no
        probability = np.zeros(45)
        predict_seed = self.neg_lottos[no-1-history:no-1]
        #print("###\n", predict_seed, np.cov(predict_seed), np.mean(predict_seed))
        for x in range(45):
            rate = 1.
            for y in range(history):
                percent = (self.tomap[x,predict_seed[y,x],0]/np.sum(self.tomap[x,predict_seed[y,x]]))
                probability[x] += percent *rate
                rate = rate * (1-percent)
        return (probability/np.sum(probability))
    
    def get_real_history(self, no):
        assert 1< no <= self.lottos.shape[0]
        return np.where(self.lottos[no-1]>0.5)[0]+1
    
    def recommend(self, prob=None, count=5):
        if prob is None:
            p = np.ones((45))/45
        result = np.zeros((count,6))
        for i in range(count):
            result[i] = np.sort(np.random.choice(45, 6, replace=False, p=prob)+1)
        result.astype(np.int8)
        return result
      
    def validation_history(self, recommends):
        assert recommends is not None
        assert recommends.ndim == 2
        assert recommends.shape[1] == 6
        rr = np.zeros((recommends.shape[0], 45))
        for i, r in enumerate(recommends):
            rr[i,r.astype(np.int32)-1] = 1
        
        rrr = np.expand_dims(rr,1)
        lll = np.expand_dims(self.lottos, 0)
        temp = np.sum((lll-rrr)**2, axis=2)
        for tt in temp:
            print(np.bincount(tt.astype(np.int32))[1::2])
        #for i in recommends:
        #    for l in self.lottos:
        #        templ = np.argwhere(l==1).reshape(-1)+1
        #        if np.array_equal(i,templ):
        #            print(i)
        #            return False
        #print(" It's not exist in history !! ")
        #return True

lottos = Lottos()
lottos.update()

pb = lottos.get_probability()
r= lottos.recommend(pb)
highpb = np.argsort(-pb)[:15]
print(lottos.get_real_history(len(lottos)))
print(r)
print(np.unique(r))
print(pb[highpb])
print(np.sort(highpb)+1)
print(lottos.validation_history(r))

print("\n\n===================\n\n")
pb_pow=4
print(f'pb_pow is {pb_pow}')
pb=pb**pb_pow
pb=pb/np.sum(pb)
pb = pb + (np.mean(pb)/10)
pb = pb / np.sum(pb)

r= lottos.recommend(pb)
highpb = np.argsort(-pb)[:25]
print(r)
print(np.unique(r))
print(pb[highpb])
print(np.sort(highpb)+1)
print(lottos.validation_history(r))


def correct_test(i, pb_pow=1, testtime = 70000):
    count_pb = np.zeros(7, dtype=np.int64)
    count_no = np.zeros(7, dtype=np.int64)
    pb = lottos.get_probability(i)
    pb=pb**pb_pow
    pb=pb/np.sum(pb)
    pb = pb + (np.mean(pb)/10)
    pb=pb/np.sum(pb)
    y_ = lottos.get_real_history(i)
    
    for r in lottos.recommend(prob=pb, count=testtime):
        count_pb[np.intersect1d(r, y_).shape[0]] +=1

    for r in lottos.recommend(count=testtime):
        count_no[np.intersect1d(r, y_).shape[0]] +=1
    #(np.sort(np.argsort(-pb)[(y_-1)])+1)}\
    print(f"\n\n{i}  ================{np.where(np.isin(np.argsort(-pb),(y_-1)))[0]+1}\
    \n{count_pb}\n{count_no}\n{(count_pb/count_no)[:-1]}\
    \n\n{pb.max()} - {pb.min()} ==> {np.sum(count_pb[2:])/np.sum(count_pb[:2])} 0.30")
    pb_correct_rate = np.sum(count_pb[2:])/np.sum(count_pb[:2])
    no_correct_rate = np.sum(count_no[2:])/np.sum(count_no[:2])
    return (pb_correct_rate / no_correct_rate)


a = cpu_count()
d = len(lottos)
print(a,d)
#if __name__ == '__main__':
prl = Parallel(n_jobs=a,backend='multiprocessing')
returns = prl(delayed(correct_test)(i,pb_pow) for i in range(d-10,d))
returns_filter = [x for x in returns if x >= 1.6]
print(len(returns) , len (returns_filter))
print(returns, returns_filter)
####



(856, 45)
    180   0:00:25.838405
    360   0:00:49.446594
    540   0:01:13.497502
    720   0:01:37.564248
    854 
we had 0. so update to 856. now we have 856 rows.

no update
[10 17 24 40 41 43 44]
[[ 3. 11. 12. 17. 21. 28.]
 [ 3.  7. 23. 24. 30. 39.]
 [10. 13. 19. 20. 39. 45.]
 [13. 17. 30. 36. 39. 42.]
 [12. 16. 18. 39. 41. 44.]]
[ 3.  7. 10. 11. 12. 13. 16. 17. 18. 19. 20. 21. 23. 24. 28. 30. 36. 39.
 41. 42. 44. 45.]
[0.04629603 0.04117986 0.03772272 0.0330687  0.03261256 0.03117899
 0.03117888 0.02910047 0.02910039 0.0291003  0.02839069 0.02645491
 0.02581491 0.02558282 0.02551015]
[ 1  3  4  6 12 13 14 15 21 27 30 39 42 43 45]
[  0   0   2  24 178 373 279]
[  0   0   3  26 165 358 304]
[  0   0   2  33 167 391 263]
[  0   0   1  33 174 360 288]
[  0   0   3  33 156 369 295]
None




pb_pow is 4
[[ 4.  6. 27. 30. 32. 39.]
 [ 3.  6.  9. 35. 38. 39.]
 [ 4. 10. 17. 20. 36. 39.]
 [ 6. 14. 21. 27. 35. 42.]
 [12. 14. 15. 24. 42. 43.]]
[ 3.  4.  6.  9. 10. 12. 14. 15. 17. 20. 21. 24





[28309 29919 10299  1371   102     0     0]
[23697 30298 13184  2603   208    10     0]
[1.19462379 0.98749092 0.78117415 0.52669996 0.49038462 0.        ]    

0.07209774352132553 - 0.002020202020202021 ==> 0.20217077694579927 0.30






[ 2556 20756 32697 12526  1423    42     0]
[23733 30268 13251  2533   208     7     0]
[0.10769814 0.68574072 2.46751189 4.94512436 6.84134615 6.        ]    

0.21609126814590068 - 0.00202020202020202 ==> 2.002745367192862 0.30


[27761 30461 10397  1290    88     3     0]
[23839 30363 13144  2453   197     4     0]
[1.16452032 1.00322761 0.7910073  0.52588667 0.44670051 0.75      ]    

0.2321175200601033 - 0.0020202020202020198 ==> 0.20229466524681392 0.30


[18680 36402 13141  1690    86     1     0]
[23806 29830 13559  2589   208     8     0]
[0.78467613 1.22031512 0.96917177 0.65276168 0.41346154 0.125     ]    

0.11215976268079002 - 0.00202020202020202 ==> 0.2708325768853709 0.30


[ 7762 29764 24538  7134   761    41     0]
[23636 30255 13362  2503   235     9     0]
[0.32839736 0.98377128 1.83640174 2.85017978 3.23829787 4.55555556]    

0.15859619820992538 - 0.00202020202020202 ==> 0.8653733411501359 0.30


[ 2980 24295 30203 11199  1281    42     0]
[23622 30141 13401  2





[   15 24790 31935 11741  1454    64     1]
[23847 30118 13247  2558   222     8     0]
[6.29009938e-04 8.23095823e-01 2.41073451e+00 4.58991400e+00
 6.54954955e+00 8.00000000e+00]    

0.7082815319282035 - 0.00202020202020202 ==> 1.822011691191292 0.30


[  230 34315 28605  6322   514    14     0]
[23537 30414 13282  2535   223     9     0]
[0.00977185 1.1282633  2.15366662 2.4938856  2.30493274 1.55555556]    

0.5271309435847739 - 0.00202020202020202 ==> 1.0263424518743667 0.30


[    4  6866 34816 23323  4699   290     2]
[23624 30196 13449  2512   214     5     0]
[1.69319336e-04 2.27381110e-01 2.58874266e+00 9.28463376e+00
 2.19579439e+01 5.80000000e+01]    

0.6756255671781388 - 0.00202020202020202 ==> 9.189228529839884 0.30


[11708 28903 21975  6549   822    43     0]
[23627 30036 13672  2438   220     7     0]
[0.49553477 0.9622786  1.60729959 2.68621821 3.73636364 6.14285714]    

0.11655187110633583 - 0.0020202020202020206 ==> 0.7236709265962424 0.30


[ 5522 33578 24625 

In [0]:
!rm lottos_db.npz


In [0]:
returns = [0.2806309502488481,
 51.39450524933746,
 0.34831610920966627,
 0.49331583568915965,
 17.048999932991933,
 4.023530551073995,
 1.3745144384473178,
 1.927719032520403,
 0.6971825823836344,
 0.4031484907114949,
 5.28145867587213,
 3.2280020524105244,
 9.954692082021488,
 0.3069753286311153,
 14.309521058734681,
 3.95200516053033,
 3.6384356549457784,
 14661.125893689772,
 3.630124920023082,
 2.762172414004451,
 0.7397162961454543,
 21.472006796978945,
 0.4233139594200732,
 1.4316629609616527,
 6.805442389331069,
 0.4086129186163798,
 1.4956007030095972,
 36.182335635277454,
 5.739065364020302,
 10.360237595020351]


30

18