In [8]:
import pymongo
import requests
import pandas as pd
from pymongo.errors import ServerSelectionTimeoutError, OperationFailure, PyMongoError

# MongoDB client setup
mongoclient = 'mongodb://localhost:27017/'
# URL of the World Bank API
url= "https://datacatalogapi.worldbank.org/dexapps/fone/api/apiservice?datasetId=DS00975&resourceId=RS00905"
# https://datacatalogapi.worldbank.org/dexapps/fone/api/apiservice?datasetId=DS00047&resourceId=RS00049 --- oct 2024
# https://datacatalogapi.worldbank.org/dexapps/fone/api/apiservice?datasetId=DS00975&resourceId=RS00905-- historical 

def import_data(mySampledatabase, collectionName, serverSelectionTimeoutMS=5000):
    try:
        # Connect to the MongoDB server
        myclient = pymongo.MongoClient(mongoclient, serverSelectionTimeoutMS=serverSelectionTimeoutMS)
        
        # Attempt to get the server info to check the connection
        myclient.server_info()  # This will raise an error if the connection fails

        # List all databases
        existing_dbs = myclient.list_database_names()
        print("Databases:", existing_dbs)

        if mySampledatabase not in existing_dbs:
            # Create a new database
            mydb = myclient[mySampledatabase]
            print(f"{mySampledatabase} created.")
        else:
            mydb = myclient[mySampledatabase]
            print(f"Database {mySampledatabase} already exists. Skipping creation.")

        # List existing collections
        existing_collections = mydb.list_collection_names()

        if collectionName not in existing_collections:
            # Create a new collection
            mycol = mydb[collectionName]
            print(f"Collection '{collectionName}' created.")

            # Fetch data from the API
            response = requests.get(url)

            # Check if the request was successful
            if response.status_code == 200:
                data = response.json()  # Parse the JSON response
                
                # Insert the data into the MongoDB collection
                if isinstance(data, list):
                    x = mycol.insert_many(data)  # Use insert_many for multiple records
                    print(f"Inserted document IDs: {x.inserted_ids}")
                else:
                    x = mycol.insert_one(data)  # Use insert_one if it's a single record
                    print("Inserted document ID:", x.inserted_id)
            else:
                print("Failed to retrieve data:", response.status_code)
        else:
            print(f"Collection '{collectionName}' already exists. Skipping creation.")

    except ServerSelectionTimeoutError:
        print("Could not connect to the server within the timeout period. Please check the server address.")
    except OperationFailure as e:
        print(f"Operation failed: {e}")
    except PyMongoError as e:
        print(f"A MongoDB error occurred: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        myclient.close()  # Close the connection


# Import data into MongoDB
#import_data("WorldBank", "Sep2024")


In [9]:
# connect to sep2024 collection in worldBank db

In [10]:
#data
import_data("WorldBank", "historical")

Databases: ['Car_Info', 'Trump_Donors', 'Weather', 'WorldBank', 'admin', 'config', 'etl_db', 'local', 'mySampledatabase', 'sampleDG']
Database WorldBank already exists. Skipping creation.
Collection 'historical' already exists. Skipping creation.


In [11]:
#print(data)

In [12]:
# Fetch data from the API
response = requests.get(url)
response_data = response.json()  # Parse the JSON response

# Extract the data
data = response_data['data']
#print(data)

# Convert to DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df.head()




Unnamed: 0,end_of_period,loan_number,region,country_code,country,borrower,guarantor_country_code,guarantor,loan_type,loan_status,...,repaid_3rd_party,due_3rd_party,loans_held,first_repayment_date,last_repayment_date,agreement_signing_date,board_approval_date,effective_date_most_recent,closed_date_most_recent,last_disbursement_date
0,30-Apr-2024,IBRD00010,EUROPE AND CENTRAL ASIA,FR,France,CREDIT NATIONAL,FR,France,NPL,Fully Repaid,...,249962000.0,0,0,01-Nov-1952,01-May-1977,09-May-1947,09-May-1947,09-Jun-1947,31-Dec-1947,
1,31-Mar-2024,IBRD00010,EUROPE AND CENTRAL ASIA,FR,France,CREDIT NATIONAL,FR,France,NPL,Fully Repaid,...,249962000.0,0,0,01-Nov-1952,01-May-1977,09-May-1947,09-May-1947,09-Jun-1947,31-Dec-1947,
2,29-Feb-2024,IBRD00010,EUROPE AND CENTRAL ASIA,FR,France,CREDIT NATIONAL,FR,France,NPL,Fully Repaid,...,249962000.0,0,0,01-Nov-1952,01-May-1977,09-May-1947,09-May-1947,09-Jun-1947,31-Dec-1947,
3,31-Jan-2024,IBRD00010,EUROPE AND CENTRAL ASIA,FR,France,CREDIT NATIONAL,FR,France,NPL,Fully Repaid,...,249962000.0,0,0,01-Nov-1952,01-May-1977,09-May-1947,09-May-1947,09-Jun-1947,31-Dec-1947,
4,31-Dec-2023,IBRD00010,EUROPE AND CENTRAL ASIA,FR,France,CREDIT NATIONAL,FR,France,NPL,Fully Repaid,...,249962000.0,0,0,01-Nov-1952,01-May-1977,09-May-1947,09-May-1947,09-Jun-1947,31-Dec-1947,


In [13]:
df.head()

Unnamed: 0,end_of_period,loan_number,region,country_code,country,borrower,guarantor_country_code,guarantor,loan_type,loan_status,...,repaid_3rd_party,due_3rd_party,loans_held,first_repayment_date,last_repayment_date,agreement_signing_date,board_approval_date,effective_date_most_recent,closed_date_most_recent,last_disbursement_date
0,31-Oct-2024,IBRD00010,EUROPE AND CENTRAL ASIA,FR,France,CREDIT NATIONAL,FR,France,NPL,Fully Repaid,...,249962000.0,0,0,01-Nov-1952,01-May-1977,09-May-1947,09-May-1947,09-Jun-1947,31-Dec-1947,
1,31-Oct-2024,IBRD00020,EUROPE AND CENTRAL ASIA,NL,Netherlands,,,,NPL,Fully Repaid,...,87672000.0,0,0,01-Apr-1952,01-Oct-1972,07-Aug-1947,07-Aug-1947,11-Sep-1947,31-Mar-1948,
2,31-Oct-2024,IBRD00021,EUROPE AND CENTRAL ASIA,NL,Netherlands,,,,NPL,Fully Repaid,...,3955788.0,0,0,01-Apr-1953,01-Apr-1954,25-May-1948,07-Aug-1947,01-Jun-1948,30-Jun-1948,
3,31-Oct-2024,IBRD00030,EUROPE AND CENTRAL ASIA,DK,Denmark,,,,NPL,Fully Repaid,...,22229000.0,0,0,01-Feb-1953,01-Aug-1972,22-Aug-1947,22-Aug-1947,17-Oct-1947,31-Mar-1949,
4,31-Oct-2024,IBRD00040,EUROPE AND CENTRAL ASIA,LU,Luxembourg,,,,NPL,Fully Repaid,...,10142000.0,0,0,15-Jul-1949,15-Jul-1972,28-Aug-1947,28-Aug-1947,24-Oct-1947,31-Mar-1949,


In [13]:
# Write the DataFrame to a CSV file
historical = 'C:/Users/prana/Documents/NCI/PythonWorkSpace/WorldBank/Data/historical.csv'  
#Documents/NCI/PythonWorkSpace/WorldBank/Data
df.to_csv(historical, index=False)  

print(f"Data written to {historical}")

Data written to C:/Users/prana/Documents/NCI/PythonWorkSpace/WorldBank/Data/historical.csv


In [None]:
# 