In [1]:
import numpy as np
import pandas as pd
from rat import ratmath
from rat import ratrestore
from rat import ratedit

# Extract some Data

In [2]:
df_a = pd.read_csv('tables/employment.csv')
df_a.head()

Unnamed: 0,District,Agriculture,Industry,Production,Service
0,East Forests,2063,3644,504,5562
1,North Mountains,1258,3807,862,11540
2,South Deserts,677,4051,2699,6029
3,West Steppes,3309,66,275,11996


In [3]:
df_b = pd.read_csv('tables/environment.csv')
df_b.head()

Unnamed: 0,District,Urban,Suburban,Rural
0,East Forests,3866,1510,6397
1,North Mountains,3438,5779,8250
2,South Deserts,6971,4941,1544
3,West Steppes,3169,7188,5289


In [4]:
df_c = pd.read_csv('tables/family.csv')
df_c.head()

Unnamed: 0,District,Sex,Single,Marriged,Widower
0,East Forests,Female,2545,2248,314
1,East Forests,Male,1702,2920,2044
2,North Mountains,Female,3059,3352,617
3,North Mountains,Male,3207,4578,2654
4,South Deserts,Female,3197,2526,1516


# ratmath

In [5]:
lines_a = ratmath.get_lines(df_a, name_cols=['District'])
lines_a

array([[ 2063,  3644,   504,  5562],
       [ 1258,  3807,   862, 11540],
       [  677,  4051,  2699,  6029],
       [ 3309,    66,   275, 11996]])

In [6]:
lines_b = ratmath.get_lines(df_b, name_cols=['District'])
lines_b

array([[6397, 1510, 3866],
       [8250, 5779, 3438],
       [1544, 4941, 6971],
       [5289, 7188, 3169]])

In [7]:
line_a, line_b = lines_a[0], lines_b[0]

In [8]:
ratmath.get_system(line_a, line_b)

(array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
        [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0],
        [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
        [0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1]]),
 array([2063, 3644,  504, 5562, 6397, 1510, 3866]))

In [9]:
ratmath.get_problem(line_a, line_b)

(Problem(Minimize(Expression(CONVEX, NONNEGATIVE, ())), [Equality(Expression(AFFINE, NONNEGATIVE, (6,)), Constant(CONSTANT, NONNEGATIVE, (6,)))]),
 Variable((12,), nonneg=True))

In [10]:
ratmath.restore_line(line_a, line_b, print_status=True)

Status:  optimal_inaccurate
The optimal value is 0.00011299599752125719
A solution x is
[1120.9556607   264.59950771  677.44483136 1980.01087203  467.3778986
 1196.61122899  273.85441283   64.64282668  165.50276043 3022.17905376
  713.37976685 1826.44117879] 





array([1120.9556607 ,  264.59950771,  677.44483136, 1980.01087203,
        467.3778986 , 1196.61122899,  273.85441283,   64.64282668,
        165.50276043, 3022.17905502,  713.37976685, 1826.44117879])

# ratrestore

In [11]:
name_cols, targets_a, targets_b = ratrestore.get_names_and_targets(df_a, df_b)
name_cols, targets_a, targets_b

(array(['District'], dtype=object),
 array(['Agriculture', 'Industry', 'Production', 'Service'], dtype=object),
 array(['Rural', 'Suburban', 'Urban'], dtype=object))

In [12]:
ratrestore.get_rows_target_names(targets_a, targets_b)

(array(['Agriculture', 'Agriculture', 'Agriculture', 'Industry',
        'Industry', 'Industry', 'Production', 'Production', 'Production',
        'Service', 'Service', 'Service'], dtype=object),
 array(['Rural', 'Suburban', 'Urban', 'Rural', 'Suburban', 'Urban',
        'Rural', 'Suburban', 'Urban', 'Rural', 'Suburban', 'Urban'],
       dtype=object))

In [13]:
df_res = ratrestore.restore_table(df_a, df_b, name_a='Employment', name_b='Environment', name_res='Count')
df_res.head(15)

Unnamed: 0,District,Employment,Environment,Count
0,East Forests,Agriculture,Rural,1120.955661
1,East Forests,Agriculture,Suburban,264.599508
2,East Forests,Agriculture,Urban,677.444831
3,East Forests,Industry,Rural,1980.010872
4,East Forests,Industry,Suburban,467.377899
5,East Forests,Industry,Urban,1196.611229
6,East Forests,Production,Rural,273.854413
7,East Forests,Production,Suburban,64.642827
8,East Forests,Production,Urban,165.50276
9,East Forests,Service,Rural,3022.179055


# ratedit

In [14]:
df_c

Unnamed: 0,District,Sex,Single,Marriged,Widower
0,East Forests,Female,2545,2248,314
1,East Forests,Male,1702,2920,2044
2,North Mountains,Female,3059,3352,617
3,North Mountains,Male,3207,4578,2654
4,South Deserts,Female,3197,2526,1516
5,South Deserts,Male,1899,1693,2625
6,West Steppes,Female,3032,2312,1988
7,West Steppes,Male,3756,3652,906


In [15]:
df_weak = ratedit.roll_weak(df_c, ['Single', 'Marriged', 'Widower'], 
                            value_name='Family Status', res_name='Persons')
df_weak.head(12)

Unnamed: 0,District,Sex,Family Status,Persons
0,East Forests,Female,Single,2545
1,East Forests,Female,Marriged,2248
2,East Forests,Female,Widower,314
3,East Forests,Male,Single,1702
4,East Forests,Male,Marriged,2920
5,East Forests,Male,Widower,2044
6,North Mountains,Female,Single,3059
7,North Mountains,Female,Marriged,3352
8,North Mountains,Female,Widower,617
9,North Mountains,Male,Single,3207


In [16]:
df_strong, cols_strong = ratedit.roll_strong(df_c, ['District'], ['Sex'], ['Single', 'Marriged', 'Widower'])
df_strong, cols_strong

(          District col_0 col_1 col_2 col_3 col_4 col_5
 0     East Forests  2545  1702  2248  2920   314  2044
 1  North Mountains  3059  3207  3352  4578   617  2654
 2    South Deserts  3197  1899  2526  1693  1516  2625
 3     West Steppes  3032  3756  2312  3652  1988   906,
           Sex     Value
 col_0  Female    Single
 col_1  Female  Marriged
 col_2  Female   Widower
 col_3    Male    Single
 col_4    Male  Marriged
 col_5    Male   Widower)

In [17]:
a_columns = ['Agriculture', 'Industry', 'Production', 'Service']
df_a

Unnamed: 0,District,Agriculture,Industry,Production,Service
0,East Forests,2063,3644,504,5562
1,North Mountains,1258,3807,862,11540
2,South Deserts,677,4051,2699,6029
3,West Steppes,3309,66,275,11996


In [18]:
df_noncorect = df_a.copy()
df_noncorect.loc[df_noncorect.index, a_columns] += np.random.normal(size=[4, 4])
df_noncorect

Unnamed: 0,District,Agriculture,Industry,Production,Service
0,East Forests,2062.471333,3643.690835,504.07132,5562.232924
1,North Mountains,1256.756059,3807.63708,862.662926,11538.968257
2,South Deserts,677.032128,4050.618454,2698.57777,6029.512389
3,West Steppes,3308.067178,66.1414,275.228946,11995.82039


In [19]:
real_sums = df_a[a_columns].values.sum(axis=1)
real_sums

array([11773, 17467, 13456, 15646])

In [20]:
correct_table = ratedit.correct_table(df_noncorect, a_columns, real_sums, by_div=True, by_add=True)
correct_table

Unnamed: 0,District,Agriculture,Industry,Production,Service
0,East Forests,2062.564815,3643.855986,504.094167,5562.485033
1,North Mountains,1256.826263,3807.84978,862.711115,11539.612841
2,South Deserts,677.045172,4050.6965,2698.629765,6029.628563
3,West Steppes,3308.224086,66.144537,275.242001,11996.389376


In [21]:
correct_table[a_columns].values.sum(axis=1) == real_sums

array([ True,  True,  True,  True])

# ratrestore.restore_alot

In [22]:
df_c_short, df_c_short_names = ratedit.roll_strong(df_c, ['District'], ['Sex'], ['Single', 'Marriged', 'Widower'])
df_c_short

Unnamed: 0,District,col_0,col_1,col_2,col_3,col_4,col_5
0,East Forests,2545,1702,2248,2920,314,2044
1,North Mountains,3059,3207,3352,4578,617,2654
2,South Deserts,3197,1899,2526,1693,1516,2625
3,West Steppes,3032,3756,2312,3652,1988,906


In [23]:
ratrestore.restore_alot([df_a, df_b, df_c_short], ['District'], print_time=True)



Tables Table_0 and Table_1 were disaggregated by 0.3505 seconds.
Table Table_2 was disaggregated and added to result by 0.4655 seconds.


Unnamed: 0,District,Table_0,Table_1,Table_2,X
0,East Forests,Agriculture,Rural,col_0,242.319898
1,East Forests,Agriculture,Rural,col_1,162.054407
2,East Forests,Agriculture,Rural,col_2,214.041309
3,East Forests,Agriculture,Rural,col_3,278.025187
4,East Forests,Agriculture,Rural,col_4,29.897229
...,...,...,...,...,...
283,West Steppes,Service,Rural,col_1,973.484309
284,West Steppes,Service,Rural,col_2,599.226763
285,West Steppes,Service,Rural,col_3,946.529472
286,West Steppes,Service,Rural,col_4,515.252078


# Different obj_type's

In [24]:
line_a, line_b

(array([2063, 3644,  504, 5562]), array([6397, 1510, 3866]))

In [37]:
df_res_sqs = ratrestore.restore_table(df_a, df_b, obj_type='squares')
df_res_dps = ratrestore.restore_table(df_a, df_b, obj_type='dependences')
df_res = df_res_sqs[['District', 'A', 'B']]
df_res['Squares'] = df_res_sqs['X']
df_res['Dependences'] = df_res_dps['X']
df_res['Same'] = df_res['Squares'] == df_res['Dependences']

print('Minimize squares:')
print('Min value:', df_res['Squares'].min())
print('Max value:', df_res['Squares'].max())
print('Zeros count:', (df_res['Squares'] == 0).sum(), '\n')

print('Minimize dependences:')
print('Min value:', df_res['Dependences'].min())
print('Max value:', df_res['Dependences'].max())
print('Zeros count:', (df_res['Dependences'] == 0).sum(), '\n')

df_res.head(12)



Minimize squares:
Min value: 0.0
Max value: 7613.705598348289
Zeros count: 5 

Minimize dependences:
Min value: 13.367888100899599
Max value: 5511.1369048641545
Zeros count: 0 



Unnamed: 0,District,A,B,Squares,Dependences,Same
0,East Forests,Agriculture,Rural,1268.107791,1120.955661,False
1,East Forests,Agriculture,Suburban,244.740815,264.599508,False
2,East Forests,Agriculture,Urban,611.218573,677.444831,False
3,East Forests,Industry,Rural,2161.590402,1980.010872,False
4,East Forests,Industry,Suburban,129.786239,467.377899,False
5,East Forests,Industry,Urban,1504.701185,1196.611229,False
6,East Forests,Production,Rural,9.244168,273.854413,False
7,East Forests,Production,Suburban,380.630354,64.642827,False
8,East Forests,Production,Urban,57.449165,165.50276,False
9,East Forests,Service,Rural,2581.905891,3022.179055,False


In [38]:
df_alot_sqs = ratrestore.restore_alot([df_a, df_b, df_c_short], ['District'], obj_type='squares')
df_alot_dps = ratrestore.restore_alot([df_a, df_b, df_c_short], ['District'], obj_type='dependences')
df_alot = df_alot_sqs[['District', 'Table_0', 'Table_1', 'Table_2']]
df_alot['Squares'] = df_alot_sqs['X']
df_alot['Dependences'] = df_alot_dps['X']
df_alot['Same'] = df_alot['Squares'] == df_alot['Dependences']

print('Minimize squares:')
print('Min value:', df_alot['Squares'].min())
print('Max value:', df_alot['Squares'].max())
print('Zeros count:', (df_alot['Squares'] == 0).sum(), '\n')

print('Minimize dependences:')
print('Min value:', df_alot['Dependences'].min())
print('Max value:', df_alot['Dependences'].max())
print('Zeros count:', (df_alot['Dependences'] == 0).sum(), '\n')

df_alot.head(12)



Minimize squares:
Min value: -37.52709764712699
Max value: 1614.6076222223023
Zeros count: 45 

Minimize dependences:
Min value: 0.7740832557139339
Max value: 1428.561380995879
Zeros count: 0 



Unnamed: 0,District,Table_0,Table_1,Table_2,Squares,Dependences,Same
0,East Forests,Agriculture,Rural,col_0,278.307328,242.319898,False
1,East Forests,Agriculture,Rural,col_1,180.819681,162.054407,False
2,East Forests,Agriculture,Rural,col_2,245.232623,214.041309,False
3,East Forests,Agriculture,Rural,col_3,316.872108,278.025187,False
4,East Forests,Agriculture,Rural,col_4,22.704393,29.897229,False
5,East Forests,Agriculture,Rural,col_5,225.372176,194.617631,False
6,East Forests,Agriculture,Suburban,col_0,66.824392,57.199163,False
7,East Forests,Agriculture,Suburban,col_1,0.91605,38.252643,False
8,East Forests,Agriculture,Suburban,col_2,33.749687,50.524054,False
9,East Forests,Agriculture,Suburban,col_3,105.389172,65.627331,False
