In [8]:
import urllib3
import certifi
import json
import pandas as pd


"""#######################################

USE PRH's BUSINESS INFORMATION API TO FETCH COMPANY DATA
https://avoindata.prh.fi/ytj_en.html

1. The application will fetch data for the companies that are defined
in yritykset.csv -file.

2. Flatten the json data into a dataframe

3. Append the flattened data into a PRH_data dataframe

4. Convert the PRH_data dataframe into a csv-file and save it to the workspace folder

##########################################"""


# Import yritykset.csv that contains name of companies
yritykset = pd.DataFrame(pd.read_csv("yritykset.csv", sep="\;", decimal=","))


# Define dataframe for the flattened data
PRH_data = pd.DataFrame(columns=["dataType",
                                        "businessId",
                                        "name",
                                        "companyForm",
                                        "companyForm2",
                                        "registrationDate",
                                        "endDate",
                                        "businessLines",
                                        "street",
                                        "postCode",
                                        "city",
                                        "country",
                                        "contactDetails",
                                        "contactDetailsType"])


# Create a loop that goes throug the company names one by one
for i in range(len(yritykset)):
        name = yritykset.loc[i, "Nimi"].replace(" ", "%20")
        name = name.replace("&", "%26")

        url = "https://avoindata.prh.fi/bis/v1?totalResults=false&name="+name

        print(url)


        # Fetch data using PRH API
        http = urllib3.PoolManager(
                cert_reqs="CERT_REQUIRED",
                ca_certs=certifi.where())

        r = http.request("GET", url)
        r.status

        data = json.loads(r.data.decode("utf-8"))


        # Use json_normalize() to flatten Json structures into Pandas DataFrames (tabular form)
        # If there are keyerrors (no data), empty dataframe will be provided
        while True:
                try:
                        df_osoitteet = pd.json_normalize(data=data["results"], record_path=["addresses"], meta=["businessId","name","registrationDate","companyForm"], record_prefix="_", errors="ignore")
                except(KeyError):
                        df_osoitteet = pd.DataFrame([])
                break

        while True:
                try:
                        df_yhteystiedot = pd.json_normalize(data=data["results"], record_path=["contactDetails"], meta=["businessId","name","registrationDate","companyForm"], record_prefix="_", errors="ignore")
                except(KeyError):
                        df_yhteystiedot = pd.DataFrame([])
                break

        while True:
                try:
                        df_yritysmuoto = pd.json_normalize(data=data["results"], record_path=["companyForms"], meta=["businessId","name","registrationDate","companyForm"], record_prefix="_", errors="ignore")
                except(KeyError):
                        df_yritysmuoto = pd.DataFrame([])
                break

        while True:
                try:
                        df_liiketoiminta = pd.json_normalize(data=data["results"], record_path=["businessLines"], meta=["businessId","name","registrationDate","companyForm"], record_prefix="_", errors="ignore")
                except(KeyError):
                        df_liiketoiminta = pd.DataFrame([])
                break

        # Create loops that goes through each dataframe and append data into the PRH_data dataframe
        for i in range(len(df_yritysmuoto)):
                PRH_data = PRH_data.append({"dataType":"companyForms" ,
                                                "businessId":df_yritysmuoto.loc[i,"businessId"],
                                                "name":df_yritysmuoto.loc[i,"name"],
                                                "companyForm":df_yritysmuoto.loc[i,"_name"],
                                                "companyForm2":df_yritysmuoto.loc[i,"_type"],
                                                "registrationDate":df_yritysmuoto.loc[i,"_registrationDate"],
                                                "endDate":df_yritysmuoto.loc[i,"_endDate"]},
                                                ignore_index=True)

        for i in range(len(df_liiketoiminta)):
                PRH_data = PRH_data.append({"dataType":"businessLines" ,
                                                "businessId":df_liiketoiminta.loc[i,"businessId"],
                                                "name":df_liiketoiminta.loc[i,"name"],
                                                "businessLines":df_liiketoiminta.loc[i,"_name"],
                                                "registrationDate":df_liiketoiminta.loc[i,"_registrationDate"],
                                                "endDate":df_liiketoiminta.loc[i,"_endDate"]},
                                                ignore_index=True)

        for i in range(len(df_osoitteet)):
                PRH_data = PRH_data.append({"dataType":"addresses" ,
                                                "businessId":df_osoitteet.loc[i,"businessId"],
                                                "name":df_osoitteet.loc[i,"name"],
                                                "registrationDate":df_osoitteet.loc[i,"_registrationDate"],
                                                "endDate":df_osoitteet.loc[i,"_endDate"],
                                                "street":df_osoitteet.loc[i,"_street"],
                                                "postCode":df_osoitteet.loc[i,"_postCode"],
                                                "city":df_osoitteet.loc[i,"_city"],
                                                "country":df_osoitteet.loc[i,"_country"]},
                                                ignore_index=True)

        for i in range(len(df_yhteystiedot)):
                PRH_data = PRH_data.append({"dataType":"contactDetails" ,
                                                "businessId":df_yhteystiedot.loc[i,"businessId"],
                                                "name":df_yhteystiedot.loc[i,"name"],
                                                "registrationDate":df_yhteystiedot.loc[i,"_registrationDate"],
                                                "endDate":df_yhteystiedot.loc[i,"_endDate"],
                                                "contactDetails":df_yhteystiedot.loc[i,"_value"],
                                                "contactDetailsType":df_yhteystiedot.loc[i,"_type"]},
                                                ignore_index=True)


print(PRH_data.head(10))


# Convert PRH_data into csv-file and save it to the workspace folder 
PRH_data.to_csv("prh_data", sep="\t", encoding="utf-8")



  return func(*args, **kwargs)


https://avoindata.prh.fi/bis/v1?totalResults=false&name=3DI%20Akustiikkatuotteet
https://avoindata.prh.fi/bis/v1?totalResults=false&name=ABB%20Oy%20/Onninen
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Ahlsell%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Ahsell/Eurolan
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Algol%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=AM%20Lukkoasema%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Änäkkälän%20teräs%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Asennus-Kaiffarit%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Asennussuunta%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=BE%20Group%20+%20OKR
https://avoindata.prh.fi/bis/v1?totalResults=false&name=BE%20Group%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Berner%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=BOS%20Cleantec%20/%20Bierkühl%20Oy
https://avoindata.