In [4]:
import urllib3
import certifi
import json
import pandas as pd


"""#######################################

USE PRH's BUSINESS INFORMATION API TO FETCH COMPANY DATA
https://avoindata.prh.fi/ytj_en.html

1. The application will fetch data for the companies that are defined
in yritykset.csv -file.

2. Flatten the json data into a dataframe

3. Append the flattened data into a PRH_data dataframe

4. Convert the PRH_data dataframe into a csv-file and save it to the workspace folder

##########################################"""


# Import yritykset.csv that contains name of companies
yritykset = pd.DataFrame(pd.read_csv("yritykset.csv", sep="\;", decimal=","))


# Define dataframe for the flattened data
PRH_data = pd.DataFrame(columns=["dataType",
                                        "businessId",
                                        "name",
                                        "companyForm",
                                        "companyForm2",
                                        "registrationDate",
                                        "endDate",
                                        "businessLines",
                                        "street",
                                        "postCode",
                                        "city",
                                        "country",
                                        "contactDetails",
                                        "contactDetailsType"])


# Create a loop that goes throug the company names one by one
for i in range(len(yritykset)):
        name = yritykset.loc[i, "Nimi"].replace(" ", "%20")
        name = name.replace("&", "%26")

        url = "https://avoindata.prh.fi/bis/v1?totalResults=false&name="+name

        print(url)


        # Fetch data using PRH API
        data = pd.read_json(url)


        # Use json_normalize() to flatten Json structures into Pandas DataFrames (tabular form)
        # If there are keyerrors (no data), empty dataframe will be provided
        while True:
                try:
                        df_hakutulos = pd.json_normalize(data=data["results"], errors="ignore")
                except(KeyError, NameError):
                        df_hakutulos = pd.DataFrame([])
                break

        while True:
                try:
                        df_osoitteet = pd.json_normalize(data=data["results"], record_path=["addresses"], meta=["businessId","name","registrationDate","companyForm"], record_prefix="_", errors="ignore")
                except(KeyError):
                        df_osoitteet = pd.DataFrame([])
                break

        while True:
                try:
                        df_yhteystiedot = pd.json_normalize(data=data["results"], record_path=["contactDetails"], meta=["businessId","name","registrationDate","companyForm"], record_prefix="_", errors="ignore")
                except(KeyError):
                        df_yhteystiedot = pd.DataFrame([])
                break

        while True:
                try:
                        df_yritysmuoto = pd.json_normalize(data=data["results"], record_path=["companyForms"], meta=["businessId","name","registrationDate","companyForm"], record_prefix="_", errors="ignore")
                except(KeyError):
                        df_yritysmuoto = pd.DataFrame([])
                break

        while True:
                try:
                        df_liiketoiminta = pd.json_normalize(data=data["results"], record_path=["businessLines"], meta=["businessId","name","registrationDate","companyForm"], record_prefix="_", errors="ignore")
                except(KeyError):
                        df_liiketoiminta = pd.DataFrame([])
                break

        # Create loops that goes through each dataframe and append data into the PRH_data dataframe
        for i in range(len(df_hakutulos)):
                PRH_data = PRH_data.append({"dataType":"results" ,
                                                "businessId":df_hakutulos.loc[i,"businessId"],
                                                "name":df_hakutulos.loc[i,"name"],
                                                "registrationDate":df_hakutulos.loc[i,"registrationDate"]},
                                                ignore_index=True)
        
        for i in range(len(df_yritysmuoto)):
                PRH_data = PRH_data.append({"dataType":"companyForms" ,
                                                "businessId":df_yritysmuoto.loc[i,"businessId"],
                                                "name":df_yritysmuoto.loc[i,"name"],
                                                "companyForm":df_yritysmuoto.loc[i,"_name"],
                                                "companyForm2":df_yritysmuoto.loc[i,"_type"],
                                                "registrationDate":df_yritysmuoto.loc[i,"_registrationDate"],
                                                "endDate":df_yritysmuoto.loc[i,"_endDate"]},
                                                ignore_index=True)

        for i in range(len(df_liiketoiminta)):
                PRH_data = PRH_data.append({"dataType":"businessLines" ,
                                                "businessId":df_liiketoiminta.loc[i,"businessId"],
                                                "name":df_liiketoiminta.loc[i,"name"],
                                                "businessLines":df_liiketoiminta.loc[i,"_name"],
                                                "registrationDate":df_liiketoiminta.loc[i,"_registrationDate"],
                                                "endDate":df_liiketoiminta.loc[i,"_endDate"]},
                                                ignore_index=True)

        for i in range(len(df_osoitteet)):
                PRH_data = PRH_data.append({"dataType":"addresses" ,
                                                "businessId":df_osoitteet.loc[i,"businessId"],
                                                "name":df_osoitteet.loc[i,"name"],
                                                "registrationDate":df_osoitteet.loc[i,"_registrationDate"],
                                                "endDate":df_osoitteet.loc[i,"_endDate"],
                                                "street":df_osoitteet.loc[i,"_street"],
                                                "postCode":df_osoitteet.loc[i,"_postCode"],
                                                "city":df_osoitteet.loc[i,"_city"],
                                                "country":df_osoitteet.loc[i,"_country"]},
                                                ignore_index=True)

        for i in range(len(df_yhteystiedot)):
                PRH_data = PRH_data.append({"dataType":"contactDetails" ,
                                                "businessId":df_yhteystiedot.loc[i,"businessId"],
                                                "name":df_yhteystiedot.loc[i,"name"],
                                                "registrationDate":df_yhteystiedot.loc[i,"_registrationDate"],
                                                "endDate":df_yhteystiedot.loc[i,"_endDate"],
                                                "contactDetails":df_yhteystiedot.loc[i,"_value"],
                                                "contactDetailsType":df_yhteystiedot.loc[i,"_type"]},
                                                ignore_index=True)


print(PRH_data.head(10))


# Convert PRH_data into csv-file and save it to the workspace folder 
PRH_data.to_csv("prh_data", sep="\t", encoding="utf-8")



  return func(*args, **kwargs)


https://avoindata.prh.fi/bis/v1?totalResults=false&name=Tieto
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Tietoevry
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Kone
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Kone%20Hissit%20Oy
https://avoindata.prh.fi/bis/v1?totalResults=false&name=BRP%20Finland
https://avoindata.prh.fi/bis/v1?totalResults=false&name=Norrhydro
  dataType businessId                    name companyForm companyForm2  \
0  results  3264596-2               Tietoi Oy         NaN          NaN   
1  results  3254594-5           Tietoleppä Oy         NaN          NaN   
2  results  3242305-9              Tietoon Oy         NaN          NaN   
3  results  3241402-1  Tietokuja 2 Holding Oy         NaN          NaN   
4  results  3234614-5         I & T Ahonen Oy         NaN          NaN   
5  results  3217480-6           Tietohuone Oy         NaN          NaN   
6  results  3207666-7  TietoAkseli Holding Oy         NaN          NaN   
7  resul