In [1]:
import pandas as pd
import calmap
import numpy as np
import os

In [2]:
from pathlib import Path
from os import environ, write

import re
from datetime import datetime, time
import datetime as dt
from collections import defaultdict

import requests

In [42]:
def load_data():
    if environ.get("USE_LOCAL_FILES"):
        paths = Path("extracted").rglob("*.csv")
        df_parts = []

        for path in sorted(paths):

            print("Reading for analysis:", path)

            with open(path, "r", encoding="utf-8") as fp:
                df_part = pd.read_csv(fp, delimiter=",", dtype=str)
                df_parts.append(df_part)

            # if df is None:
            #     df = df_part
            # else:
            #     df = df.append(df_part, ignore_index=True)

        df = pd.concat(df_parts, ignore_index=True)

    else:
        raise NotImplementedError("Remote data not yet implemented")
    
    df['date_of_birth'] = pd.to_datetime(df['date_of_birth'], errors = 'coerce')
    df['date_of_proceeding'] = pd.to_datetime(df['date_of_proceeding'], errors = 'coerce')
    df['date_of_publication'] = pd.to_datetime(df['date_of_publication'], errors = 'coerce')

    return df

In [43]:
df = load_data()

Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-01-17T08-05-41.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-01-24T08-05-43.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-01-31T03-50-40.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-02-07T03-47-56.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-02-14T03-54-29.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-02-21T04-08-08.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-02-28T04-20-16.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-03-07T07-20-48.jsonl.csv
Reading for analysis: extracted/buckets/insolven

Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-10-20T00-26-46.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-10-20T07-31-07.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-10-20T13-43-00.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-10-20T20-17-09.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-10-21T06-38-42.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-10-21T13-46-12.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-10-21T19-37-11.jsonl.csv
Reading for analysis: extracted/buckets/insolvenzenard/insolvenzbekanntmachungen-scraper/2021-10-22T07-02-46.jsonl.csv
Reading for analysis: extracted/buckets/insolven

In [44]:
df.drop_duplicates(subset ="description_hash", keep = "last", inplace = True)

## Filtern nach Regelinsolvenz

In [45]:
# Funktion in_kind mit dem Argument/Wert df wird definiert

def in_kind(df):
    in_unternehmen = (df[df['kind'] == 'ik'])
    return in_unternehmen

# Funktion def wird ausgeführt
data_ik = in_kind(df)

In [46]:
data_nw_in = data_ik[data_ik["federal_state"].isin(['Nordrhein-Westfalen', "nw"])]

In [47]:
def in_eröffnung(data_nw_in):
    in_eröffnet = data_nw_in[data_nw_in['type_of_proceeding'].isin(['Eröffnungen', 'Eroeffnung', 'Eroeffnung_Insolvenzverfahren'])]
    return in_eröffnet

# Funktion def wird ausgeführt
data_nw_eröffnet = in_eröffnung(data_nw_in)

In [48]:
date_range = data_nw_eröffnet[data_nw_eröffnet.date_of_publication.between('2021-10', '2021-11')]

In [55]:
date_range.tail()

Unnamed: 0,_key,case_nr,court,date_of_birth,date_of_proceeding,date_of_publication,description_hash,federal_state,file_name,kind,title,type_of_proceeding,zipcode,detail_form_name,format,name,register,request_fingerprint
1818639,,70a IK 507/21,Köln,1981-12-28,2021-10-19,2021-10-29,685b236d9cca0e1cfdabc8a2330095dc9caae4c84e6eb8...,Nordrhein-Westfalen,,ik,,Eröffnungen,50829,tbl_ergebnis:5:frm_detail,neu,"Aydin, Önder",,366cb679a912571c7fd6d589951926b7a6f3360dee73e7...
1818640,,70a IK 531/21,Köln,1976-02-17,2021-10-28,2021-10-29,06b67becec59f06ff68f70236390c7a73b207d7572b92f...,Nordrhein-Westfalen,,ik,,Eröffnungen,50939,tbl_ergebnis:4:frm_detail,neu,"Atalan, Seksane",,545e49665b317cf0e2fed2af05a794bbd6e2a2e22afd01...
1818642,,88 IK 1032/21,Bochum,1979-10-19,2021-10-29,2021-10-29,05994129b76c720193b58d6b5ea630060ac75b8050f829...,Nordrhein-Westfalen,,ik,,Eröffnungen,44649,tbl_ergebnis:2:frm_detail,neu,"Akova, Zelal",,f13c172cc5363e74894fd34a6c5805e1e55c783f4ccc38...
1818644,,10 IK 266/21,Detmold,1983-12-23,2021-10-28,2021-10-29,05dcd2c1932ac05cf2fae3476d7c2493319a1dad76f038...,Nordrhein-Westfalen,,ik,,Eröffnungen,32825,tbl_ergebnis:1:frm_detail,neu,"Adam, Katharina",,d3402ac6f3ff705022dc1557536c83bf6fa4953984fef4...
1818646,,70a IK 532/21,Köln,1993-11-26,2021-10-28,2021-10-29,6efeb4efbd2c576b921726b284bd0de908f00b228089bc...,Nordrhein-Westfalen,,ik,,Eröffnungen,50676,tbl_ergebnis:0:frm_detail,neu,"Acar, Jülide Hüsniye",,5c69e6b58a6c996070e66f7a409040ccb77cb4c901e7da...


In [58]:
eröffnung_zipcode_nw = date_range.groupby(['court'], as_index=False).count()
eröffnung_zipcode_nw.sample(10)

Unnamed: 0,court,_key,case_nr,date_of_birth,date_of_proceeding,date_of_publication,description_hash,federal_state,file_name,kind,title,type_of_proceeding,zipcode,detail_form_name,format,name,register,request_fingerprint
13,Köln,0,220,214,220,220,220,220,0,220,0,220,220,220,220,220,0,220
6,Dortmund,0,128,128,128,128,128,128,0,128,0,128,128,128,128,128,0,128
8,Düsseldorf,0,95,94,95,95,95,95,0,95,0,95,95,95,95,95,0,95
14,Mönchengladbach,0,72,72,72,72,72,72,0,72,0,72,72,72,72,72,0,72
16,Paderborn,0,47,46,47,47,47,47,0,47,0,47,47,47,47,47,0,47
17,Siegen,0,18,18,18,18,18,18,0,18,0,18,18,18,18,18,0,18
9,Essen,0,164,163,163,164,164,164,0,164,0,164,164,164,164,164,0,164
12,Krefeld,0,48,48,48,48,48,48,0,48,0,48,48,48,48,48,0,48
5,Detmold,0,27,27,27,27,27,27,0,27,0,27,27,27,27,27,0,27
15,Münster (Westfalen),0,90,88,88,90,90,90,0,90,0,90,90,90,90,90,0,90


In [59]:
eröffnung_zipcode_nw.to_csv('freq_eröffnet_nw_court_2021_10.csv')

In [11]:
publication_frequency_eröffent = data_nw_eröffnet.groupby('date_of_publication').date_of_publication.count().reset_index(name="count")

In [12]:
publication_frequency_eröffent.to_csv('freq_in_eröffnet_neu.csv')

In [51]:
data_nw_week_in = (
    data_nw_eröffnet[["date_of_publication", "description_hash"]]
    .groupby(pd.Grouper(key="date_of_publication", freq="W-MON", label="left"))
    .count()
)

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'

In [41]:
data_nw_week_in.to_csv('freq_ik_eröffnet_week_nrw_zipcode.csv')

#ENDE ENDE ENDE

## SCHLUSS

In [12]:
#print(dups_1)

description_hash
0000705c749cc09185879315e4a1505d6eff70e3bcb975ffb83d4c1e5673c191    1
0000ae5830807ce011028926622863a742056df66c88dfcad228cb731004354d    1
0000fe55a302fc97f436e052e684200c8c95b6f1ea398226927483682b59cdde    1
00011b663dbfd4cf2130616a471e979bd863d4496e0cccf08f533d4bfc6b0175    1
00015337b01dc0850bdae9574662a1558d100ac020fdfeaedbb8f329162a2e28    1
                                                                   ..
fffed66d137397eb6aeb9fb144bb77ebf1b1f9c5da7d6911ad75c70e51a51e06    1
ffff172435556dfc9d799db7e31ee0241a76f05bb225d17112b039000f30b57d    1
ffff1eac3183475e25de9445589d3f665939568c75463c5190255664b1f0e303    1
ffff5bc8f53774792e640e933a23255642e2ceed3f90eb4338ae4acf20b78714    1
ffffcb817d424deb28594100ff9f746c5eac77ce5006a40e46d762d5e00ade11    1
Length: 323586, dtype: int64


In [13]:
df.date_of_publication.head()

0   2019-01-02
1   2019-01-02
2   2019-01-02
3   2019-01-02
4   2019-01-02
Name: date_of_publication, dtype: datetime64[ns]

In [14]:
df.date_of_publication.tail()

477168   2021-10-23
477169   2021-10-23
477170   2021-10-23
477171   2021-10-23
477172   2021-10-23
Name: date_of_publication, dtype: datetime64[ns]

#### Aufteilung in Unternehmensinsolvenzen

In [15]:
# Funktion in_kind mit dem Argument/Wert df wird definiert

def in_kind(df):
    in_unternehmen = (df[df['kind'] == 'in'])
    return in_unternehmen

# Funktion def wird ausgeführt
data_in = in_kind(df)

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 323586 entries, 0 to 477172
Data columns (total 15 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   case_nr              323586 non-null  object        
 1   court                323586 non-null  object        
 2   date_of_birth        214900 non-null  datetime64[ns]
 3   date_of_proceeding   287511 non-null  datetime64[ns]
 4   date_of_publication  323586 non-null  datetime64[ns]
 5   description_hash     323586 non-null  object        
 6   detail_form_name     323586 non-null  object        
 7   federal_state        323586 non-null  object        
 8   format               323586 non-null  object        
 9   kind                 323265 non-null  object        
 10  name                 323586 non-null  object        
 11  register             61702 non-null   object        
 12  request_fingerprint  323586 non-null  object        
 13  type_of_procee

In [16]:
# convert series to df
#data_in_2 = pd.DataFrame(data_in)

In [17]:
#new_data = data_in.replace(['Nordrhein-Westfalen'], 'nw')

In [17]:
data_nw_in = data_in[(data_in.federal_state == 'Nordrhein-Westfalen')]

In [32]:
data_nw_in.tail(30)

Unnamed: 0,case_nr,court,date_of_birth,date_of_proceeding,date_of_publication,description_hash,detail_form_name,federal_state,format,kind,name,register,request_fingerprint,type_of_proceeding,zipcode
475898,43 IN 520/21,Bielefeld,1985-01-10,2021-10-22,2021-10-22,a2040ac76d7b46b10e716716e3fe9dd3010ee53b2e8270...,tbl_ergebnis:0:frm_detail,Nordrhein-Westfalen,neu,in,"Alatas, Gönül",,d138a8ac947f728ac58d56b7c50bf9e53117bf679ba7a1...,Sonstiges,33829
475904,109 IN 53/21,Hagen,1972-03-17,2021-10-19,2021-10-22,33a59cbe7e72129ecc9b6cfe04a554eed178f0400418c4...,tbl_ergebnis:71:frm_detail,Nordrhein-Westfalen,neu,in,"Urumis, Dimitrios",,1e62c725c0d491f29dca0aaad3c576524ebe945ced6f98...,Eröffnungen,58636
475907,70k IN 173/21,Köln,1985-03-19,2021-10-20,2021-10-22,b50281ce44e34562477e3a156bb6cb70fbd2f95682da86...,tbl_ergebnis:68:frm_detail,Nordrhein-Westfalen,neu,in,"Taheri, Aurang",,b2fe5f74f605d3c54d385ff7606db77be87fdc176482ec...,Eröffnungen,50677
475908,70k IN 193/21,Köln,1982-06-21,2021-10-20,2021-10-22,18fdb63f9c936bf83d6f565fe1f3529274c98b6d3709aa...,tbl_ergebnis:67:frm_detail,Nordrhein-Westfalen,neu,in,"Strathe, Elmar",,3626a1fc3ffb521ac787542052046cf53d40d902514b03...,Eröffnungen,50823
475925,2 IN 191/21,Paderborn,1995-04-19,2021-10-18,2021-10-22,2a97693c2d2785c049aed8a88339a63f246d8d0706c13f...,tbl_ergebnis:50:frm_detail,Nordrhein-Westfalen,neu,in,"Özer, Özercan",,3d032548371d6998bbf5d1230c5dad30ef89930252feef...,Eröffnungen,33106
475926,145 IN 449/21,Wuppertal,NaT,2021-10-19,2021-10-22,38f5e793306e1104d737e96454c4ea174b8d71eaaa4b53...,tbl_ergebnis:49:frm_detail,Nordrhein-Westfalen,neu,in,Oelbermann Metallbau UG (haftungsbeschränkt),"Wuppertal, HRB 25394",a9af81e3090c5353214d288932398ecb6c83a924043984...,Eröffnungen,25394
475929,145 IN 465/21,Wuppertal,1985-04-05,2021-10-19,2021-10-22,eb26755d26966bc9d5cc815b76cb07258162d91d2dbf5f...,tbl_ergebnis:45:frm_detail,Nordrhein-Westfalen,neu,in,"Lustig, Armin Stephan",,d923e8a002ad44488b2b1fa8659a14b0b1f657ba8a270f...,Eröffnungen,42281
475932,109 IN 90/21,Hagen,1965-06-26,2021-10-20,2021-10-22,4e9b3d297527cb9a8db2bee7a81e620a83cab25d010cd2...,tbl_ergebnis:42:frm_detail,Nordrhein-Westfalen,neu,in,"Levien, Tim",,f7452bdc9d2392bcd9eb10378edbd52469842821dbb11b...,Eröffnungen,58553
475942,34 IN 11/21,Mönchengladbach,1982-08-12,2021-10-20,2021-10-22,2bef006127494f302a2b6f8d9aa630532d4cc945ff2ecd...,tbl_ergebnis:1:frm_detail,Nordrhein-Westfalen,neu,in,"Oance, Camelia",,e168afe1b351ea8d92af7446e522aade8654af4f6eeb0a...,Abweisungen_mangels_Masse,41812
475949,70d IN 4/21,Köln,NaT,2021-10-18,2021-10-22,154eaf5f9934ed767fcdb0ad87c0c78685bd0c5f6d3538...,tbl_ergebnis:27:frm_detail,Nordrhein-Westfalen,neu,in,IMPS GmbH,"Köln, HRB 78249",76a123078bb33c9c9d1633c025f8e86f007b43480ffbf0...,Eröffnungen,78249


#### Anzahl der Vorgänge bei Unternehmensinsolvenzen

In [19]:
#data_nw_in.type_of_proceeding.unique()
data_nw_in['type_of_proceeding'].value_counts()

Eröffnungen                                              13193
Sonstiges                                                11884
Entscheidungen_im_Restschuldbefreiungsverfahren          11461
Entscheidungen_im_Verfahren                              10989
Sicherungsmaßnahmen                                       3827
Verteilungsverzeichnisse_(§_188_InsO)_d._Verw./Treuh.     2339
Abweisungen_mangels_Masse                                  768
Überwachte_Insolvenzpläne                                   17
Entscheidungen_nach_Aufhebung_des_Verfahrens                13
Name: type_of_proceeding, dtype: int64

In [24]:
data_nw_in.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 54491 entries, 71 to 477167
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   case_nr              54491 non-null  object        
 1   court                54491 non-null  object        
 2   date_of_birth        24720 non-null  datetime64[ns]
 3   date_of_proceeding   54287 non-null  datetime64[ns]
 4   date_of_publication  54491 non-null  datetime64[ns]
 5   description_hash     54491 non-null  object        
 6   detail_form_name     54491 non-null  object        
 7   federal_state        54491 non-null  object        
 8   format               54491 non-null  object        
 9   kind                 54491 non-null  object        
 10  name                 54491 non-null  object        
 11  register             16263 non-null  object        
 12  request_fingerprint  54491 non-null  object        
 13  type_of_proceeding   54491 no

In [25]:
publication_frequency = data_nw_in.groupby('date_of_publication').date_of_publication.count().reset_index(name="count")

In [26]:
publication_frequency.tail(50)

Unnamed: 0,date_of_publication,count
670,2021-08-19,109
671,2021-08-20,98
672,2021-08-23,122
673,2021-08-24,127
674,2021-08-25,141
675,2021-08-26,123
676,2021-08-27,111
677,2021-08-30,115
678,2021-08-31,104
679,2021-09-01,116


In [27]:
publication_frequency.to_csv('freq_in_nw_all_cases.csv')

#### Unternehmensinsolvenz Eröffnungen Zeitraum [...] in NRW

In [28]:
def in_eröffnung(data_nw_in):
    in_eröffnet = (data_nw_in[data_nw_in['type_of_proceeding'] == 'Eröffnungen'])
    return in_eröffnet

# Funktion def wird ausgeführt
data_nw_eröffnet = in_eröffnung(data_nw_in)

In [29]:
data_nw_eröffnet.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13193 entries, 98 to 475975
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   case_nr              13193 non-null  object        
 1   court                13193 non-null  object        
 2   date_of_birth        7918 non-null   datetime64[ns]
 3   date_of_proceeding   13115 non-null  datetime64[ns]
 4   date_of_publication  13193 non-null  datetime64[ns]
 5   description_hash     13193 non-null  object        
 6   detail_form_name     13193 non-null  object        
 7   federal_state        13193 non-null  object        
 8   format               13193 non-null  object        
 9   kind                 13193 non-null  object        
 10  name                 13193 non-null  object        
 11  register             4848 non-null   object        
 12  request_fingerprint  13193 non-null  object        
 13  type_of_proceeding   13193 no

In [30]:
publication_frequency_eröffent = data_nw_eröffnet.groupby('date_of_publication').date_of_publication.count().reset_index(name="count")

In [31]:
publication_frequency_eröffent.to_csv('freq_in_nw_eröffnet.csv')

#### Unternehmensinsolvenzen Sicherungsmaßnahmen in NRW

In [None]:
def in_eröffnung(data_nw_in):
    in_sicherungsmaßnahmen = (data_nw_in[data_nw_in['type_of_proceeding'] == 'Eröffnungen'])
    return in_eröffnet

# Funktion def wird ausgeführt
data_nw_eröffnet = in_sicherungsmaßnahmen(data_nw_in)

#### date_of_publication

In [81]:
freq_in_nw_eröffnung = data_nw_eröffnet.groupby(['date_of_publication'], as_index=False).count() 
freq_in_nw_eröffnung.sample(5)

Unnamed: 0,date_of_publication,case_nr,court,date_of_birth,date_of_proceeding,detail_form_name,federal_state,format,kind,name,register,request_fingerprint,type_of_proceeding,zipcode
21,2021-09-07,22,22,13,21,22,22,22,22,22,7,22,22,22
18,2021-09-02,64,64,40,60,64,64,64,64,64,22,64,64,64
6,2021-08-17,42,42,32,42,42,42,42,42,42,8,42,42,42
7,2021-08-18,38,38,22,38,38,38,38,38,38,12,38,38,38
20,2021-09-06,17,17,9,17,17,17,17,17,17,8,17,17,17


In [82]:
freq_in_nw_eröffnung.to_csv('freq_in_nw_eröffnung.csv')

#### Group_by date_of_birth Eröffnung

In [83]:
# convert str to date
freq_in_nw_eröffnung_birth = data_nw_eröffnet.copy()
freq_in_nw_eröffnung_birth.date_of_birth = freq_in_nw_eröffnung_birth.date_of_birth.dt.year

#### Count Eröffnungen cases by date_of_birth 

In [84]:
freq_in_nw_eröffnung_birth = freq_in_nw_eröffnung_birth.groupby(['date_of_birth'], as_index=False).count()
freq_in_nw_eröffnung_birth.sample(10)

Unnamed: 0,date_of_birth,case_nr,court,date_of_proceeding,date_of_publication,detail_form_name,federal_state,format,kind,name,register,request_fingerprint,type_of_proceeding,zipcode
7,1941.0,2,2,2,2,2,2,2,2,2,0,2,2,2
10,1945.0,4,4,4,4,4,4,4,4,4,0,4,4,4
6,1939.0,2,2,2,2,2,2,2,2,2,0,2,2,2
8,1942.0,2,2,2,2,2,2,2,2,2,0,2,2,2
53,1991.0,12,12,12,12,12,12,12,12,12,0,12,12,12
39,1977.0,12,12,12,12,12,12,12,12,12,0,12,12,12
56,1994.0,10,10,10,10,10,10,10,10,10,0,10,10,10
54,1992.0,6,6,6,6,6,6,6,6,6,0,6,6,6
9,1944.0,2,2,2,2,2,2,2,2,2,0,2,2,2
25,1963.0,6,6,6,6,6,6,6,6,6,0,6,6,6


In [142]:
#freq_in_nw_eröffnung_birth['date_of_birth'] = freq_in_nw_eröffnung_birth['date_of_birth'].replace('\.0', '', regex=True)
#freq_in_nw_eröffnung_birth.sample(5)

Unnamed: 0,date_of_birth,_key,case_nr,court,date_of_proceeding,date_of_publication,federal_state,file_name,kind,title,type_of_proceeding,zipcode,detail_form_name,format,name,register,request_fingerprint
45,1971.0,3,108,108,108,108,108,108,108,108,108,108,0,0,0,0,0
32,1958.0,1,48,48,48,48,48,48,48,48,48,48,0,0,0,0,0
21,1947.0,0,4,4,4,4,4,4,4,4,4,4,0,0,0,0,0
38,1964.0,7,71,71,71,71,71,71,71,71,71,71,0,0,0,0,0
75,2011.0,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0


In [89]:
# Berechnen wie viele NaN 
missing_birth_values = 569 - 790
print(missing_birth_values)
# Dublikate in date_of_birth u.date_of_publication entfernen --> extractor

-221


In [46]:
freq_in_nw_eröffnung_birth.to_csv('freq_in_nw_eröffnung_birth.csv')

#### Eröffnung group_by zipcode 

In [143]:
freq_in_nw_eröffnung_zipcode = data_nw_eröffnet.groupby(['zipcode'], as_index=False).count()
freq_in_nw_eröffnung_zipcode.sample(10)

Unnamed: 0,zipcode,_key,case_nr,court,date_of_birth,date_of_proceeding,date_of_publication,federal_state,file_name,kind,title,type_of_proceeding,detail_form_name,format,name,register,request_fingerprint
462,40668,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
372,32756,0,6,6,4,6,6,6,6,6,6,6,0,0,0,0,0
857,50858,1,4,4,4,4,4,4,4,4,4,4,0,0,0,0,0
176,17812,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0
387,33142,1,9,9,9,9,9,9,9,9,9,9,0,0,0,0,0
863,50939,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
980,53840,0,12,12,10,12,12,12,12,12,12,12,0,0,0,0,0
1121,73106,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0
65,12680,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0
306,29790,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0


In [144]:
freq_in_nw_eröffnung_zipcode.to_csv('freq_in_nw_eröffnung_zipcode.csv')

#### Restschulderlassung von 01.01. - 10.09. in NRW

In [47]:
def in_restschuld(data_nw_in):
    in_restschuld_frei = (data_nw_in[data_nw_in['type_of_proceeding'] == 'Restschuldbefreiung'])
    return in_restschuld_frei

# Funktion def wird ausgeführt
restschuld_in = in_restschuld(data_nw_in)

In [48]:
restschuld_in.describe()

Unnamed: 0,_key,case_nr,court,date_of_birth,date_of_proceeding,date_of_publication,federal_state,file_name,kind,title,type_of_proceeding,zipcode,detail_form_name,format,name,register,request_fingerprint
count,1327,15786,15786,4285,15757,15786,15786,15786,15786,15786,15786,15786.0,0.0,0.0,0.0,0.0,0.0
unique,1327,5488,19,1870,166,135,1,8401,1,5549,1,989.0,0.0,0.0,0.0,0.0,0.0
top,491759/1/10/37449,0073_IN00013_15,agkoeln,1964-05-25 00:00:00,2021-04-20 00:00:00,2021-06-10 00:00:00,nw,gerichte/nw/agdortmd/20/0255_IN00049_20/2021_0...,in,"Kaplan, Baris, Waltrop, 145 IN 673/15",Restschuldbefreiung,50374.0,,,,,
freq,1,12,2407,12,196,212,15786,2,15786,10,15786,100.0,,,,,
first,,,,1932-04-26 00:00:00,2020-12-21 00:00:00,2021-01-04 00:00:00,,,,,,,,,,,
last,,,,1999-06-26 00:00:00,2021-08-06 00:00:00,2021-08-09 00:00:00,,,,,,,,,,,


In [49]:
freq_in_restschuld = restschuld_in.groupby(['date_of_publication'], as_index=False).count() 
print(freq_in_restschuld)

    date_of_publication  _key  case_nr  court  date_of_birth  \
0            2021-01-04    51       51     51             15   
1            2021-01-05    92       92     92             25   
2            2021-01-06   105      105    105             41   
3            2021-01-07    70       70     70             18   
4            2021-01-08    57       57     57             17   
..                  ...   ...      ...    ...            ...   
130          2021-08-03     0       40     40             11   
131          2021-08-04     0       47     47             13   
132          2021-08-05     0       40     40              7   
133          2021-08-06     0       29     29              6   
134          2021-08-09     0        2      2              1   

     date_of_proceeding  federal_state  file_name  kind  title  \
0                    51             51         51    51     51   
1                    92             92         92    92     92   
2                   105          

In [145]:
freq_in_nw_restschuld_zipcode = restschuld_in.groupby(['zipcode'], as_index=False).count()
freq_in_nw_restschuld_zipcode.sample(10)

Unnamed: 0,zipcode,_key,case_nr,court,date_of_birth,date_of_proceeding,date_of_publication,federal_state,file_name,kind,title,type_of_proceeding,detail_form_name,format,name,register,request_fingerprint
734,52353,5,46,46,22,46,46,46,46,46,46,46,0,0,0,0,0
126,33378,2,20,20,2,20,20,20,20,20,20,20,0,0,0,0,0
949,65239,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0
540,47798,5,38,38,12,38,38,38,38,38,38,38,0,0,0,0,0
661,50933,0,8,8,0,8,8,8,8,8,8,8,0,0,0,0,0
501,47119,0,10,10,4,10,10,10,10,10,10,10,0,0,0,0,0
924,59505,0,4,4,0,4,4,4,4,4,4,4,0,0,0,0,0
197,40489,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0
81,32427,2,6,6,2,6,6,6,6,6,6,6,0,0,0,0,0
196,40479,0,12,12,4,12,12,12,12,12,12,12,0,0,0,0,0


In [50]:
freq_in_restschuld.to_csv('freq_in_restschuld.csv')

In [147]:
freq_in_nw_restschuld_zipcode.to_csv('freq_in_restschuld_zipcode.csv')

####  Vorher Geburtstage Fixen !! Restschuld by birth

In [52]:
freq_in_restschuld_birth = restschuld_in.groupby(['date_of_birth'], as_index=False).count() 
freq_in_restschuld_birth.sample(5)

Unnamed: 0,date_of_birth,_key,case_nr,court,date_of_proceeding,date_of_publication,federal_state,file_name,kind,title,type_of_proceeding,zipcode,detail_form_name,format,name,register,request_fingerprint
394,1962-04-13,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0
323,1960-09-22,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
504,1964-03-29,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
1161,1975-11-09,0,4,4,4,4,4,4,4,4,4,4,0,0,0,0,0
1527,1982-09-17,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0


In [53]:
freq_in_restschuld_birth.to_csv('freq_in_restschuld_birth.csv')

#### Abweisung Mangelsmasse in NRW

In [148]:
def in_abweisung(data_nw_in):
    in_abweisung_masse = (data_nw_in[data_nw_in['type_of_proceeding'] == 'Abweisungen_mangels_Masse'])
    return in_abweisung_masse

# Funktion def wird ausgeführt
abweisung_in = in_abweisung(data_nw_in)

In [149]:
abweisung_in.describe()

Unnamed: 0,_key,case_nr,court,date_of_birth,date_of_proceeding,date_of_publication,federal_state,file_name,kind,title,type_of_proceeding,zipcode,detail_form_name,format,name,register,request_fingerprint
count,146,1588,1588,635,1561,1588,1588,1588,1588,1588,1588,1582.0,0.0,0.0,0.0,0.0,0.0
unique,146,798,19,291,130,117,1,818,1,798,1,592.0,0.0,0.0,0.0,0.0,0.0
top,491759/1/11/17266,0085_IN00035_18,agbochum,1971-06-10 00:00:00,2021-06-08 00:00:00,2021-02-17 00:00:00,nw,gerichte/nw/agessen/20/0166_IN00072_20/2021_05...,in,"Ember Watches GmbH, Remscheid, 145 IN 70/20, R...",Abweisungen_mangels_Masse,44866.0,,,,,
freq,1,4,189,6,30,28,1588,2,1588,4,1588,16.0,,,,,
first,,,,1920-09-09 00:00:00,2020-02-20 00:00:00,2021-01-04 00:00:00,,,,,,,,,,,
last,,,,2000-02-09 00:00:00,2021-06-18 00:00:00,2021-06-18 00:00:00,,,,,,,,,,,


In [150]:
freq_abweisung_in = abweisung_in.groupby(['date_of_publication'], as_index=False).count() 
freq_abweisung_in.sample(5)

Unnamed: 0,date_of_publication,_key,case_nr,court,date_of_birth,date_of_proceeding,federal_state,file_name,kind,title,type_of_proceeding,zipcode,detail_form_name,format,name,register,request_fingerprint
67,2021-04-07,0,14,14,6,14,14,14,14,14,14,14,0,0,0,0,0
7,2021-01-13,22,22,22,10,22,22,22,22,22,22,22,0,0,0,0,0
57,2021-03-23,0,16,16,10,16,16,16,16,16,16,16,0,0,0,0,0
100,2021-05-26,0,10,10,4,10,10,10,10,10,10,10,0,0,0,0,0
74,2021-04-16,0,12,12,8,12,12,12,12,12,12,12,0,0,0,0,0


In [151]:
freq_abweisung_in.to_csv('freq_abweisung_in_publ.csv')

In [152]:
freq_in_nw_abweisung_zipcode = abweisung_in.groupby(['zipcode'], as_index=False).count()
freq_in_nw_abweisung_zipcode.sample(10)

Unnamed: 0,zipcode,_key,case_nr,court,date_of_birth,date_of_proceeding,date_of_publication,federal_state,file_name,kind,title,type_of_proceeding,detail_form_name,format,name,register,request_fingerprint
336,44581,1,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
502,58511,2,4,4,2,4,4,4,4,4,4,4,0,0,0,0,0
389,46244,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
423,48366,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
220,32609,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
199,30488,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0
275,41199,0,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
72,15604,1,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0
218,32591,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0
1,10240,0,2,2,0,2,2,2,2,2,2,2,0,0,0,0,0


In [153]:
freq_in_nw_abweisung_zipcode.to_csv('freq_abweisung_in_zipcode.csv')

In [138]:
# freq_in_nw_2['date_of_proceeding'] = pd.to_datetime(freq_in_nw_2['date_of_proceeding'], errors = 'coerce')

In [30]:
data_in_nw.sort_values(by="date_of_publication", ascending=True).head(50)

Unnamed: 0,_key,case_nr,court,date_of_birth,date_of_proceeding,date_of_publication,federal_state,file_name,kind,title,type_of_proceeding,zipcode,detail_form_name,format,name,register,request_fingerprint,"(date_of_birth, date_of_proceeding, date_of_publication)"
36169,491759/1/10/36169,0079_IN00010_19,agmuenst,NaT,2020-12-30,2021-01-04,nw,gerichte/nw/agmuenst/19/0079_IN00010_19/2021_0...,in,"Dirk & Sabine Lingenberg OHG, Lüdinghausen, 79...",Bestimmung_Termine,59348,,,,,,NaT
37573,491759/1/10/37573,0252_IN00078_14,agdortmd,1951-03-04,2020-12-29,2021-01-04,nw,gerichte/nw/agdortmd/14/0252_IN00078_14/2021_0...,in,"Moser, Harald, Selm, 252 IN 78/14",Sonstiges,59379,,,,,,1951-03-04
37595,491759/1/10/37595,0073_IN00073_19,agmuenst,NaT,2020-12-29,2021-01-04,nw,gerichte/nw/agmuenst/19/0073_IN00073_19/2021_0...,in,"Musatowa, Nadine, Gronau, 73 IN 73/19",Entscheidungen_im_Verfahren_mit_Termine,48599,,,,,,NaT
37596,491759/1/10/37596,0073_IN00073_19,agmuenst,1984-01-07,2020-12-29,2021-01-04,nw,gerichte/nw/agmuenst/19/0073_IN00073_19/2021_0...,in,"Musatowa, Nadine, Gronau, 73 IN 73/19",Entscheidungen_im_Verfahren_Aufhebung_Einstellung,48599,,,,,,1984-01-07
37597,491759/1/10/37597,0073_IN00073_19,agmuenst,NaT,2021-01-04,2021-01-04,nw,gerichte/nw/agmuenst/19/0073_IN00073_19/2021_0...,in,"Musatowa, Nadine, Gronau, 73 IN 73/19",Entscheidungen_im_Verfahren,48149,,,,,,NaT
37599,491759/1/10/37599,0502_IN00139_18,agddorf,1983-09-30,2020-12-30,2021-01-04,nw,gerichte/nw/agddorf/18/0502_IN00139_18/2021_01...,in,"Mustafic, Robert, Duisburg, 502 IN 139/18",Bestimmung_Termine,47198,,,,,,1983-09-30
37614,491759/1/10/37614,0072_IN00071_14,agmuenst,1954-04-13,2020-12-30,2021-01-04,nw,gerichte/nw/agmuenst/14/0072_IN00071_14/2021_0...,in,"Mühlnikel, Annette, Telgte, 72 IN 71/14",Restschuldbefreiung,48291,,,,,,1954-04-13
37623,491759/1/10/37623,0109_IN00034_16,aghagen,1955-03-31,2020-12-30,2021-01-04,nw,gerichte/nw/aghagen/16/0109_IN00034_16/2021_01...,in,"Müller, Gertrud, Schwelm, 109 IN 34/16",Bestimmung_Termine,58332,,,,,,1955-03-31
37624,491759/1/10/37624,070g_IN00064_20,agkoeln,NaT,2021-01-04,2021-01-04,nw,gerichte/nw/agkoeln/20/070g_IN00064_20/2021_01...,in,"Müller, Jeanette, Köln, 70g IN 64/20",Entscheidungen_im_Verfahren_mit_Termine,51103,,,,,,NaT
37682,491759/1/10/37682,0094_IN00002_14,agkrefd,1967-08-30,2021-01-04,2021-01-04,nw,gerichte/nw/agkrefd/14/0094_IN00002_14/2021_01...,in,"Neweling, Ralf, Duisburg, 94 IN 2/14",Entscheidungen_im_Verfahren_Aufhebung_Einstellung,47055,,,,,,1967-08-30


#### Groupby_zipcode