In [None]:
import feedparser
import re
import datetime
from datetime import timedelta
import requests

**Genesis API credentials**  
get yours for free if you haven't already  
https://www-genesis.destatis.de/genesis/online?Menu=Registrierung

In [None]:
# insert your credentials here before you proceed
# Genesis Online, Regionaldatenbank and Zensusdatenbank have each their own registration and usernames
KENNUNG = "xxx"
PASSWORT = "xxx"

In [None]:
# api login test - check credentials first
checkUrl = "https://www-genesis.destatis.de/genesisWS/rest/2020/helloworld/logincheck?username=" \
            +KENNUNG+ "&password=" +PASSWORT

response = requests.get(checkUrl, timeout=120)
response.json()

In [None]:
# Latest updates via Genesis rss feed
feed = feedparser.parse("https://www-genesis.destatis.de/genesis/online/news?language=de")

In [None]:
today = datetime.datetime.now()

In [None]:
yesterday_ddmmyyyy = (today - timedelta(days=1)).strftime("%d.%m.%Y")

In [None]:
# create and populate list of updated statistics from newsfeed
updatedStatistics = []

for entry in feed.entries:
    
    # filter feed to new items of the last 24hrs
    if (today - datetime.datetime(*entry.published_parsed[:4])).days < 1:
    
        myCode = re.findall(r"\d{5}",entry.title)[0]
        updatedStatistics.append(myCode)
        
        print(myCode, entry.published, entry.title[17:85])

In [None]:
len(updatedStatistics)

In [None]:
# create and populate list of tables that belong to each updated statistic
updatedTables = []

for statistic in updatedStatistics:
    
    catUrl = "https://www-genesis.destatis.de/genesisWS/rest/2020/catalogue/tables2statistic?username=" \
                +KENNUNG+"&password="+PASSWORT+"&name="+statistic
    
    catResponse = requests.get(catUrl, timeout=120)

    for code in catResponse.json()["List"]:
    
        updatedTables.append(code["Code"])

print(updatedTables)

In [None]:
len(updatedTables)

In [None]:
# Download XLSX-files for each updated Table Nr
tabUrl = "https://www-genesis.destatis.de/genesisWS/rest/2020/data/tablefile?username=" \
                +KENNUNG+"&password="+PASSWORT+"&format=xlsx&name="
# Get Table Title for each updated Table Nr
namUrl = "https://www-genesis.destatis.de/genesisWS/rest/2020/catalogue/tables?username=" \
                +KENNUNG+"&password="+PASSWORT+"&selection="
# Where to download
destination = "newsfeed-download/"

for name in updatedTables:
    
    # only download tables that have been updated since yesterday
    response = requests.get(tabUrl+name+"&stand="+yesterday_ddmmyyyy, timeout=120)
    
    if response.status_code == 200:
        
        # a statistic may have been updated with values for June but tables with yearly data have not 
        if re.search("Keine aktualisierten Daten vorhanden.",str(response.content)):

            print(name, " Keine aktualisierten Daten")

        else:     
            
            # enhance filename with human readable title 
            metaresp = requests.get(namUrl+name, timeout=120)
            # replace characters that are not suitable for filenames 
            filetitle = metaresp.json()["List"][0]["Content"].replace(":","_").replace("\n"," ").replace("/","_")
            # filename (statistics id + title) and directory
            with open(destination+name+"_"+filetitle+".xlsx", 'wb') as f:
                f.write(response.content)