In [0]:
!pip install -U -q PyDrive

In [0]:
import requests
import pandas as pd
import numpy as np
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
!mkdir data

In [0]:
file_id = '1XTHFtpUhkjC45L9Zg8Z5NN_SC46gWXf8'
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile('data/auto.json')

In [0]:
df = pd.read_json('data/auto.json', orient='records')

In [0]:
pd.options.display.float_format = '{:,.2f}'.format

In [8]:
df.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.0,Ford,Focus
1,E432XX77RUS,1,6500.0,Toyota,Camry
2,7184TT36RUS,1,2100.0,Ford,Focus
3,X582HE161RUS,2,2000.0,Ford,Focus
4,92918M178RUS,1,5700.0,Ford,Focus


In [9]:
len(df)

825

In [0]:
np.random.seed(21)

In [0]:
indices = [np.random.randint(825) for i in range(200)]
data = {'CarNumber': [df['CarNumber'][i] for i in indices]}
data.update({col: [np.random.choice(df[col]) for i in range(200)]
             for col in ['Refund', 'Fines']})
data.update({col: [df[col][i] for i in indices] for col in ['Make', 'Model']})

df2 = pd.DataFrame(data)

In [18]:
df2.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,T020MM116RUS,2,10200.0,Skoda,Octavia
1,7361C8197RUS,2,4000.0,Ford,Focus
2,M580CH197RUS,2,18900.0,Ford,Focus
3,T940CC96RUS,2,8238.22,Ford,Focus
4,T6418M116RUS,1,12000.0,Ford,Focus


In [0]:
concat_rows = pd.concat([df, df2])

In [20]:
concat_rows.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.0,Ford,Focus
1,E432XX77RUS,1,6500.0,Toyota,Camry
2,7184TT36RUS,1,2100.0,Ford,Focus
3,X582HE161RUS,2,2000.0,Ford,Focus
4,92918M178RUS,1,5700.0,Ford,Focus


In [21]:
concat_rows.count()

CarNumber    1025
Refund       1025
Fines        1025
Make         1025
Model        1006
dtype: int64

In [0]:
fines = concat_rows

In [0]:
fines['Year'] = pd.Series([np.random.randint(1980, 2020) for i in range(1025)])

In [24]:
fines.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Y163O8161RUS,2,3200.0,Ford,Focus,1983
1,E432XX77RUS,1,6500.0,Toyota,Camry,1996
2,7184TT36RUS,1,2100.0,Ford,Focus,1983
3,X582HE161RUS,2,2000.0,Ford,Focus,1997
4,92918M178RUS,1,5700.0,Ford,Focus,2007


In [25]:
fines.count()

CarNumber    1025
Refund       1025
Fines        1025
Make         1025
Model        1006
Year         1025
dtype: int64

In [0]:
url = 'https://api.census.gov/data/2010/surname?get=NAME,COUNT&RANK=1:100'
chars = '""[],0123456789'
page = requests.get(url).text
surnames = [line.strip(chars).capitalize() for line in page.split('\n')]

In [0]:
owners = pd.DataFrame(
    {'CarNumber': fines['CarNumber'][:825],
     'NAME': [np.random.choice(surnames) for i in range(825)]}
     )

In [28]:
owners.head()

Unnamed: 0,CarNumber,NAME
0,Y163O8161RUS,Patel
1,E432XX77RUS,Wright
2,7184TT36RUS,Rodriguez
3,X582HE161RUS,Young
4,92918M178RUS,Cox


In [0]:
digits = list('0123456789')
letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')

In [0]:
def generate_car_number():
    res = np.random.choice(digits + letters)
    for i in range(3):
        res += np.random.choice(digits)
    for i in range(2):
        res += np.random.choice(digits + letters)
    for i in range(3):
        res += np.random.choice(digits)
    res += 'RUS'
    return res

In [0]:
data = {'CarNumber': [generate_car_number() for i in range(5)]}
data.update({col: [np.random.choice(fines[col]) for i in range(5)]
             for col in ['Refund', 'Fines', 'Make', 'Model', 'Year']})

fines = pd.concat([fines, pd.DataFrame(data)])

In [32]:
len(fines)

1030

In [0]:
owners = owners.drop(owners.tail(20).index)

In [0]:
data = {'CarNumber': [generate_car_number() for i in range(5)],
        'NAME': [np.random.choice(surnames) for i in range(5)]}

owners = pd.concat([owners, pd.DataFrame(data)])

In [35]:
len(owners)

810

In [36]:
pd.pivot_table(fines, values='Fines', index=['Make', 'Model'], columns=['Year'],
               aggfunc=np.sum)

Unnamed: 0_level_0,Year,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Make,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
Ford,Focus,125552.88,227338.22,128352.88,181676.44,133300.0,202500.0,229738.22,251276.44,220214.66,145838.22,91314.66,361138.22,91076.44,164100.0,78914.66,97938.22,176438.22,119614.66,141400.0,75700.0,148838.22,114914.66,151614.66,145100.0,94638.22,113538.22,206200.0,105800.0,88738.22,359214.66,187038.22,217538.22,63538.22,125738.22,162538.22,188538.22,347114.66,137876.44,79700.0,171076.44
Ford,Mondeo,,19800.0,46200.0,1100.0,6700.0,,,,,1100.0,,,,,8600.0,6700.0,34400.0,,,,,,,34400.0,,,,,,10600.0,,,,2200.0,,,26000.0,8600.0,,
Skoda,Octavia,10238.22,,500.0,14200.0,,,,110438.22,2000.0,12000.0,13100.0,,63300.0,100.0,28200.0,21238.22,,3000.0,3800.0,2000.0,2000.0,145000.0,47576.44,300.0,,5100.0,,27138.22,,6300.0,3900.0,,500.0,11000.0,8538.22,37800.0,4000.0,25400.0,11600.0,19600.0
Toyota,Camry,,19800.0,,8238.22,,5200.0,,600.0,26800.0,,,,,,,,6500.0,,2400.0,,8000.0,13000.0,,15400.0,1000.0,,,,,,1500.0,,,12000.0,1000.0,10600.0,,,,8238.22
Toyota,Corolla,8000.0,34300.0,,300.0,,,,7800.0,8900.0,,7600.0,,,3200.0,12238.22,,,,12700.0,9600.0,,,,2000.0,24000.0,6800.0,,,3400.0,30300.0,16000.0,,,2000.0,,,4400.0,900.0,,
Volkswagen,Focus,,,,43600.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Volkswagen,Golf,,10600.0,10100.0,,,10600.0,9000.0,4600.0,,30900.0,,,,9300.0,,200.0,24000.0,,,300.0,,,,,,188800.0,5000.0,,2300.0,32000.0,,5300.0,8538.22,,300.0,24300.0,,,200.0,18400.0
Volkswagen,Jetta,,,,,,,,,,1000.0,6900.0,,,,,,1200.0,46000.0,,,,,,,,,,,,,40600.0,500.0,,,,4000.0,,1600.0,9000.0,
Volkswagen,Passat,,,8600.0,,300.0,600.0,9900.0,,,,23800.0,,500.0,3000.0,10238.22,,,4400.0,16138.22,,,,,9500.0,11400.0,3600.0,,,,22400.0,3900.0,3200.0,29700.0,,,17500.0,,13238.22,8100.0,9000.0
Volkswagen,Touareg,,,,6300.0,,,,,,,,,,,,,,8238.22,,,,,,,,,,5800.0,,,5800.0,,1300.0,500.0,,,,,,


In [0]:
fines.to_csv('data/fines.csv', index=False)
owners.to_csv('data/owners.csv', index=False)

In [0]:
uploaded = drive.CreateFile({'title': 'fines.csv'})
uploaded.SetContentFile('data/fines.csv')
uploaded.Upload()