In [1]:
import pandas as pd
from dotenv import dotenv_values, load_dotenv
import requests
import json

In [18]:
pd.__version__

'2.1.1'

## German Census Database (Genesis) – Create Table of Contents

for full api documentation (in German), see  
https://ergebnisse2011.zensus2022.de/datenbank/misc/ZENSUS-Webservices_Einfuehrung.pdf

code inspiration thanks to  
https://github.com/sjockers/genesis-api-example

In [160]:
# set preferred language (de|en)
langPref = "en"
# expand text output
pd.set_option('display.max_colwidth', None)

In [161]:
# Load credentials from .env file
load_dotenv()  
USERNAME, PASSWORD = dotenv_values().values()

# Set base path for API calls
BASE_URL = 'https://ergebnisse2011.zensus2022.de/api/rest/2020/'

### Check login credentials

In [162]:
hello = requests.get(BASE_URL + 'helloworld/logincheck', params={
    'username': USERNAME,
    'password': PASSWORD,
    'language': langPref
})
hello.json()["Status"]

'You have been logged in and out successfully!'

### Catalogue Service

In [163]:
variables = requests.get(BASE_URL + 'catalogue/variables', params={
    'username': USERNAME,
    'password': PASSWORD,
    'language': langPref,
    'pagelength': 200
})
variableCodes = variables.json()["List"]

In [164]:
len(variableCodes)

134

In [165]:
variableCodes[-3:]

[{'Code': 'WHGFL3',
  'Content': 'Floor area of the dwelling (20m² intervals)',
  'Type': 'Subject',
  'Values': '10',
  'Information': 'true'},
 {'Code': 'WHGNZ1',
  'Content': 'Type of use of the dwelling',
  'Type': 'Subject',
  'Values': '5',
  'Information': 'true'},
 {'Code': 'WHGRM2',
  'Content': 'Rooms',
  'Type': 'Subject',
  'Values': '7',
  'Information': 'true'}]

In [167]:
tables = requests.get(BASE_URL + 'catalogue/tables', params={
    'username': USERNAME,
    'password': PASSWORD,
    'language': langPref,
    'pagelength': 1500
})
tableCodes = tables.json()["List"]

In [168]:
len(tableCodes)

754

In [169]:
tableCodes[-3:]

[{'Code': '6000F-2004',
  'Content': 'Families: Size of family nucleus/Type of family nucleus\n(by family) - Equipment in dwelling/Floor area of the\ndwelling (20m² intervals)/Rooms',
  'Time': '2011-05-09 to 2011-05-09'},
 {'Code': '6000F-3001',
  'Content': 'Families: Size of private household - Senior citizen status\n- Type of private household (by family)',
  'Time': '2011-05-09 to 2011-05-09'},
 {'Code': '6000F-3002',
  'Content': 'Families: Size of family nucleus - Type of family nucleus\n(by family) - Equipment in dwelling/Floor area of the\ndwelling (20m² intervals)/Rooms',
  'Time': '2011-05-09 to 2011-05-09'}]

### Metadata Service

In [170]:
varCodes=[]

In [171]:
for item in variableCodes:
    varCodes.append(item.get("Code"))

In [172]:
varCodes[0:5]

['ALTER1', 'ALTGR1', 'ALTGR2', 'ALTGR3', 'ALTGR4']

In [173]:
def metaVar(var):

    metadata = requests.get(BASE_URL + 'metadata/variable', params={
        'username': USERNAME,
        'password': PASSWORD,
        'language': langPref,
        'name': var
    })
    
    return(metadata.json()["Object"])

In [178]:
metadataVariable = pd.DataFrame()

In [179]:
for code in varCodes:

    myObject = metaVar(code)
    
    try:
        myInformation = myObject["Information"].replace("wiki","").replace("\n"," ")
    except:
        myInformation = ""
        
    metadataVariable = pd.concat([metadataVariable, pd.DataFrame({"Code": myObject["Code"], \
                                                          "Content": myObject["Content"], \
                                                          "Information": myInformation, \
                                                          "Updated": myObject["Updated"]},\
                                                          index=[0])])

In [180]:
metadataVariable

Unnamed: 0,Code,Content,Information,Updated
0,ALTER1,Age (yearly stages),The age of the person refers to the completed year of life on 9 May 2011. Broken down into individual years of age.,2022-05-09 15:37:41h
0,ALTGR1,Age (five years age groups),The age of the person refers to the completed year of life on 9 May 2011. This variable indicates a person's age group for reference date 9 May 2011. The ages are cited in five years age groups.,2020-10-28 10:42:15h
0,ALTGR2,Age (ten years age groups),The age of the person refers to the completed year of life on 9 May 2011. This variable indicates a person's age group for reference date 9 May 2011. The ages are cited in ten years age groups.,2020-10-28 10:42:19h
0,ALTGR3,Age (age classes of relevance to infrastructure),The age of the person refers to the completed year of life on 9 May 2011. The breakdown into age classes of relevance to infrastructure can serve as a basis for the planning of infrastructure and welfare services of all kinds and thus to establish tailored concepts for action and measures for different age classes.,2020-10-28 10:42:20h
0,ALTGR4,Age (age classes of relevance to market research),"The age of the person is the completed year of life on the reference date, 9 May 2011. The age groups of relevance to market research offer a breakdown based on the requirements and guidelines with specific population definitions. The differentiated view of the under-18s is of special significance for market research studies.",2020-10-28 10:42:21h
...,...,...,...,...
0,WHGEG1,Ownership of the dwelling,"This variable reflects who owns an owner-occupied dwelling in a building divided up pursuant to the Act on Owner- Occupied Dwellings. These can be private citizens or legal persons. The recorded figures for ""Ownership of dwelling"" represent a part of the dwellings of the considered regional unit. The result table only shows commonhold dwellings.",2021-01-13 14:09:23h
0,WHGFL2,Floor area of the dwelling (10m² intervals),"Floor area of the entire dwelling in m². The dwelling includes rooms outside the dwelling unit (e.g. attics) and cellars and compartments which have been developed for habitation. The total floor area is calculated using the following percentages for different parts of the dwelling: - 100%: the floor area of rooms/parts of rooms with a ceiling height of at least 2 metres; - 50%: the floor area of rooms/parts of rooms with a ceiling height of at least 1 metre but less than 2 metres; unheated conservatories, swimming pools and similar fully enclosed rooms; - generally 25% but at most 50%: the areas of balconies, verandas, roof gardens, patios. The floor area data are broken down into 10 m² intervals.",2022-11-03 09:25:08h
0,WHGFL3,Floor area of the dwelling (20m² intervals),"Floor area of the entire dwelling in m². The dwelling includes rooms outside the dwelling unit (e.g. attics) and cellars and compartments which have been developed for habitation. The total floor area is calculated using the following percentages for different parts of the dwelling: - 100%: the floor area of rooms/parts of rooms with a ceiling height of at least 2 metres; - 50%: the floor area of rooms/parts of rooms with a ceiling height of at least 1 metre but less than 2 metres; unheated conservatories, swimming pools and similar fully enclosed rooms; - generally 25% but at most 50%: the areas of balconies, verandas, roof gardens, patios. The floor area data are broken down into 20 m² intervals.",2020-11-27 14:39:47h
0,WHGNZ1,Type of use of the dwelling,The type of use of dwelling describes whether and by whom (tenant or owner) the dwelling is being used on the reference date.,2022-11-03 09:24:29h


In [181]:
tabCodes=[]

In [182]:
for item in tableCodes:
    tabCodes.append(item.get("Code"))

In [183]:
tabCodes[-5:]

['6000F-2002', '6000F-2003', '6000F-2004', '6000F-3001', '6000F-3002']

In [184]:
def metaTab(tab):

    metadata = requests.get(BASE_URL + 'metadata/table', params={
        'username': USERNAME,
        'password': PASSWORD,
        'language': langPref,
        'name': tab
    })
    
    return(metadata.json()["Object"])

In [185]:
metadataTable = pd.DataFrame()

In [186]:
for code in tabCodes:

    myObject = metaTab(code)
    
    try:
        myCol2= myObject["Structure"]["Columns"][1]["Code"]
    except:
        myCol2= ""
    try:
        myCol3= myObject["Structure"]["Columns"][2]["Code"]
    except:
        myCol3= ""
    
    try:
        myRow2= myObject["Structure"]["Rows"][0]["Structure"][0]["Code"]
    except:
        myRow2= ""     
    try:
        myRow3= myObject["Structure"]["Rows"][0]["Structure"][0]["Structure"][0]["Code"]
    except:
        myRow3= ""
        
    metadataTable = pd.concat([metadataTable, pd.DataFrame({"Code": myObject["Code"], \
                                                          "Content": myObject["Content"], \
                                                          "Col1": myObject["Structure"]["Columns"][0]["Code"], \
                                                          "Col2": myCol2, \
                                                          "Col3": myCol3, \
                                                          "Row1": myObject["Structure"]["Rows"][0]["Code"], \
                                                          "Row2": myRow2, \
                                                          "Row3": myRow3, \
                                                          "Updated": myObject["Updated"]},\
                                                          index=[0])])

In [187]:
metadataTable

Unnamed: 0,Code,Content,Col1,Col2,Col3,Row1,Row2,Row3,Updated
0,1000A-0001,Persons: Official population and area (municipalities),PRS018,FLC001,PRS017,GEOGM1,,,2022-01-20 08:17:38h
0,1000A-1001,Persons: Age (ten years age groups),GEODL1,GEOBL1,,ALTGR2,,,2022-03-21 13:56:14h
0,1000A-1002,Persons: Age (eleven classes of years),GEODL1,GEOBL1,,ALTKL2,,,2022-03-21 13:56:14h
0,1000A-1003,Persons: Age (five classes of years),GEODL1,GEOBL1,,ALTKL1,,,2022-03-21 13:56:14h
0,1000A-1004,Persons: Age (five years age groups),GEODL1,GEOBL1,,ALTGR1,,,2022-03-21 13:56:14h
...,...,...,...,...,...,...,...,...,...
0,6000F-2002,Families: Size of private household - Senior citizen\nstatus/Type of private household (by family),GEODL1,,,HSHGR2,HSHSE1,,2021-01-13 16:05:49h
0,6000F-2003,Families: Type of private household (by family) - Senior\ncitizen status of a private household,GEODL1,,,HSHTP1,HSHSE1,,2020-12-16 08:20:34h
0,6000F-2004,Families: Size of family nucleus/Type of family nucleus\n(by family) - Equipment in dwelling/Floor area of the\ndwelling (20m² intervals)/Rooms,GEODL1,,,FAMGR2,WHGAS1,,2021-08-31 11:26:56h
0,6000F-3001,Families: Size of private household - Senior citizen status\n- Type of private household (by family),GEODL1,,,HSHGR2,HSHSE1,HSHTP1,2020-12-16 08:21:09h


In [188]:
with pd.ExcelWriter("census2011_toc_"+langPref+".xlsx") as writer:  

    metadataVariable.to_excel(writer, sheet_name='variables', index=False)
    metadataTable.to_excel(writer, sheet_name='tables', index=False)