In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from pprint import pprint
from lifelines import KaplanMeierFitter
from lifelines import CoxPHFitter

In [4]:
### Build pharmacy dataset to run Survival Analysis on insolvency

In [21]:
usecols = ['CompanyName', 'CompanyNumber', 'Sics', 'RegAddress.Country',
           'CompanyStatus', 'CountryOfOrigin', 'IncorporationDate',
            'Mortgages.NumMortCharges', 'Mortgages.NumMortOutstanding',
            'Mortgages.NumMortPartSatisfied', 'Mortgages.NumMortSatisfied']

corig = pd.read_csv('ch_companies_clean_1.csv', usecols=usecols, header=0, index_col=None)

print(corig.shape)

corig.head()

(4400981, 11)


Unnamed: 0,CompanyName,CompanyNumber,Sics,RegAddress.Country,CompanyStatus,CountryOfOrigin,IncorporationDate,Mortgages.NumMortCharges,Mortgages.NumMortOutstanding,Mortgages.NumMortPartSatisfied,Mortgages.NumMortSatisfied
0,! LTD,08209948,99999 - Dormant Company,,Active,United Kingdom,11/09/2012,0,0,0,0
1,!? LTD,11399177,47710 - Retail sale of clothing in specialised...,UNITED KINGDOM,Active,United Kingdom,05/06/2018,0,0,0,0
2,!BIG IMPACT GRAPHICS LIMITED,11743365,"18129 - Printing n.e.c., 59112 - Video product...",UNITED KINGDOM,Active,United Kingdom,28/12/2018,0,0,0,0
3,!NNOV8 LIMITED,11006939,62090 - Other information technology service a...,ENGLAND,Active,United Kingdom,11/10/2017,0,0,0,0
4,!NSPIRED INVESTMENTS LTD,SC606050,68209 - Other letting and operating of own or ...,SCOTLAND,Active,United Kingdom,22/08/2018,1,1,0,0


In [25]:
# 47730 for pharmacies
cedit = corig.copy()

In [26]:
cedit = cedit[cedit.Sics.str.contains('47730') == True]

print(cedit.shape)

cedit.head(2)

(4490, 11)


Unnamed: 0,CompanyName,CompanyNumber,Sics,RegAddress.Country,CompanyStatus,CountryOfOrigin,IncorporationDate,Mortgages.NumMortCharges,Mortgages.NumMortOutstanding,Mortgages.NumMortPartSatisfied,Mortgages.NumMortSatisfied
150,"""RED BAND"" CHEMICAL COMPANY, LIMITED",SC016876,47730 - Dispensing chemist in specialised stores,,Active,United Kingdom,25/06/1932,12,1,0,11
8761,123 LEEDS LIMITED,01873281,47730 - Dispensing chemist in specialised stores,,Active,United Kingdom,19/12/1984,62,4,0,58


In [27]:
# decided to keep non-England pharmacies for now
cedit['RegAddress.Country'].value_counts()

ENGLAND             1070
UNITED KINGDOM       753
SCOTLAND              82
WALES                 37
NORTHERN IRELAND      30
Name: RegAddress.Country, dtype: int64

In [28]:
# decided to keep non-England pharmacies for now
cedit.CountryOfOrigin.value_counts()

United Kingdom    4490
Name: CountryOfOrigin, dtype: int64

In [36]:
# not representative due to lack of dissolved companies
print(cedit.CompanyStatus.value_counts())
print(f'Non-active rate: {round((4490 - 4398) / 4490, 4)}')

Active                             4398
Active - Proposal to Strike off      54
Liquidation                          30
In Administration                     6
Voluntary Arrangement                 2
Name: CompanyStatus, dtype: int64
Non-active rate: 0.0205


In [37]:
# recharge project doable due to distribution
cedit['Mortgages.NumMortOutstanding'].value_counts()[:10]

0    2364
1     679
2     650
3     362
4     165
5      83
6      55
7      29
8      21
9      18
Name: Mortgages.NumMortOutstanding, dtype: int64

In [14]:
test = pd.read_csv('BasicCompanyDataAsOneFile-2019-10-01.csv')

print(test.shape)

test.head()

  interactivity=interactivity, compiler=compiler, result=result)


(4433045, 55)


Unnamed: 0,CompanyName,CompanyNumber,RegAddress.CareOf,RegAddress.POBox,RegAddress.AddressLine1,RegAddress.AddressLine2,RegAddress.PostTown,RegAddress.County,RegAddress.Country,RegAddress.PostCode,...,PreviousName_7.CONDATE,PreviousName_7.CompanyName,PreviousName_8.CONDATE,PreviousName_8.CompanyName,PreviousName_9.CONDATE,PreviousName_9.CompanyName,PreviousName_10.CONDATE,PreviousName_10.CompanyName,ConfStmtNextDueDate,ConfStmtLastMadeUpDate
0,! LTD,8209948,,,METROHOUSE 57 PEPPER ROAD,HUNSLET,LEEDS,YORKSHIRE,,LS10 2RU,...,,,,,,,,,25/09/2020,11/09/2019
1,!? LTD,11399177,,,THE STUDIO HATHERLOW HOUSE,HATHERLOW,ROMILEY,,UNITED KINGDOM,SK6 3DY,...,,,,,,,,,19/06/2020,05/06/2019
2,!BIG IMPACT GRAPHICS LIMITED,11743365,,,372 OLD STREET,335 ROSDEN HOUSE,LONDON,,UNITED KINGDOM,EC1V 9LT,...,,,,,,,,,10/01/2020,
3,!NKED LTD,12234705,,,29 CORRY DRIVE,,LONDON,,UNITED KINGDOM,SW9 8QS,...,,,,,,,,,13/10/2020,
4,!NNOV8 LIMITED,11006939,,,OLD BARN FARM OLD BARN FARM,HARTFIELD ROAD,EDENBRIDGE,KENT,ENGLAND,TN8 5NF,...,,,,,,,,,24/10/2019,10/10/2018


In [15]:
test.columns

Index(['CompanyName', ' CompanyNumber', 'RegAddress.CareOf',
       'RegAddress.POBox', 'RegAddress.AddressLine1',
       ' RegAddress.AddressLine2', 'RegAddress.PostTown', 'RegAddress.County',
       'RegAddress.Country', 'RegAddress.PostCode', 'CompanyCategory',
       'CompanyStatus', 'CountryOfOrigin', 'DissolutionDate',
       'IncorporationDate', 'Accounts.AccountRefDay',
       'Accounts.AccountRefMonth', 'Accounts.NextDueDate',
       'Accounts.LastMadeUpDate', 'Accounts.AccountCategory',
       'Returns.NextDueDate', 'Returns.LastMadeUpDate',
       'Mortgages.NumMortCharges', 'Mortgages.NumMortOutstanding',
       'Mortgages.NumMortPartSatisfied', 'Mortgages.NumMortSatisfied',
       'SICCode.SicText_1', 'SICCode.SicText_2', 'SICCode.SicText_3',
       'SICCode.SicText_4', 'LimitedPartnerships.NumGenPartners',
       'LimitedPartnerships.NumLimPartners', 'URI', 'PreviousName_1.CONDATE',
       ' PreviousName_1.CompanyName', ' PreviousName_2.CONDATE',
       ' PreviousName_2.C

In [16]:
test.CompanyStatus.value_counts()

Active                                              4186478
Active - Proposal to Strike off                      149291
Liquidation                                           90428
In Administration                                      3732
Live but Receiver Manager on at least one charge       1264
Voluntary Arrangement                                  1018
In Administration/Administrative Receiver               317
RECEIVERSHIP                                            199
ADMINISTRATION ORDER                                    145
ADMINISTRATIVE RECEIVER                                 123
In Administration/Receiver Manager                       29
RECEIVER MANAGER / ADMINISTRATIVE RECEIVER               17
VOLUNTARY ARRANGEMENT / RECEIVER MANAGER                  3
VOLUNTARY ARRANGEMENT / ADMINISTRATIVE RECEIVER           1
Name: CompanyStatus, dtype: int64

In [20]:
test[' CompanyNumber'].max()

'ZC000204'

In [38]:
test2 = pd.read_csv('ch_companies_clean_1.csv', header=0)

print(test2.columns)

Index(['Unnamed: 0', 'CompanyName', 'CompanyNumber', 'Sics',
       'RegAddress.AddressLine1', 'RegAddress.AddressLine2',
       'RegAddress.PostTown', 'RegAddress.County', 'RegAddress.Country',
       'RegAddress.PostCode', 'CompanyCategory', 'CompanyStatus',
       'CountryOfOrigin', 'IncorporationDate', 'Accounts.AccountRefDay',
       'Accounts.AccountRefMonth', 'Accounts.NextDueDate',
       'Accounts.LastMadeUpDate', 'Accounts.AccountCategory',
       'Returns.NextDueDate', 'Returns.LastMadeUpDate',
       'Mortgages.NumMortCharges', 'Mortgages.NumMortOutstanding',
       'Mortgages.NumMortPartSatisfied', 'Mortgages.NumMortSatisfied',
       'LimitedPartnerships.NumGenPartners',
       'LimitedPartnerships.NumLimPartners', 'URI', 'PreviousName_1.CONDATE',
       'PreviousName_1.CompanyName', 'ConfStmtNextDueDate',
       'ConfStmtLastMadeUpDate'],
      dtype='object')
