In [96]:
import pandas as pd

import great_expectations as gx
from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core import ExpectationSuite, ExpectationConfiguration

from ydata_profiling import ProfileReport
from ydata_profiling.config import Settings
from ydata_profiling.model import BaseDescription, expectation_algorithms
from ydata_profiling.model.handler import Handler
from ydata_profiling.utils.dataframe import slugify
from ydata_profiling.expectations_report import ExpectationsReport
from great_expectations.checkpoint import SimpleCheckpoint

from typing import Any, Optional

import pandas as pd
from visions import VisionsTypeset
import re
import os

In [97]:
full_path = os.getcwd()
customers = pd.read_csv(full_path.partition('notebooks')[0] + '/data/01_raw/Customers.csv')
loans = pd.read_csv(full_path.partition('notebooks')[0] + '/data/01_raw/loans/Loans_20240131.csv')
funds = pd.read_csv(full_path.partition('notebooks')[0] + '/data/01_raw/Funds_Hist_to_20240331.csv')
transactions = pd.read_csv(full_path.partition('notebooks')[0] + '/data/01_raw/Transactions_to_20240331.csv')
loans_hist = pd.read_csv(full_path.partition('notebooks')[0] + '/data/01_raw/Loans_Hist_to_20240331.csv')

In [None]:
# df.columns = [re.sub(r'(?<!^)(?=[A-Z])', '_', col).lower() for col in df.columns]
# df.columns = df.columns.str.replace('.', '', regex=False)

In [5]:
# loans.columns = [re.sub(r'(?<!^)(?=[A-Z])', '_', col).lower() for col in loans.columns]
# loans.columns = loans.columns.str.replace('.', '', regex=False)
# loans

In [8]:
# funds = pd.read_csv(full_path.partition('notebooks')[0] + 'data/01_raw/Funds_Hist_to_20231231.csv')
# funds.columns = [re.sub(r'(?<!^)(?=[A-Z])', '_', col).lower() for col in funds.columns]
# funds.columns = funds.columns.str.replace('.', '', regex=False)
# funds

## Split the data in one part as your reference dataset and the other as your analysis dataset

In [9]:
# df_ref = df[~df['customer_since'].str.startswith('2025')]
# df_ref

In [10]:
# df_ana = df[df['customer_since'].str.startswith('2025')]
# df_ana

In [36]:
full_path = os.getcwd()
context = gx.get_context(context_root_dir = full_path.partition('notebooks')[0] + 'gx')

In [None]:
datasource_name = "project_data_raw"
try:
    datasource = context.sources.add_pandas(datasource_name)
except:
    print("Data Source already exists.")
    datasource = context.datasources[datasource_name]

In [None]:


# data_asset_name = "project_ana"
# try:
#     data_asset = datasource.add_dataframe_asset(name=data_asset_name, dataframe= df_ana)
# except:
#     print("The data asset alread exists. The required one will be loaded.")
#     data_asset = datasource.get_asset(data_asset_name)

# data_asset_name = "project_ref"
# try:
#     data_asset = datasource.add_dataframe_asset(name=data_asset_name, dataframe= df_ref)
# except:
#     print("The data asset alread exists. The required one will be loaded.")
    # data_asset = datasource.get_asset(data_asset_name)

In [82]:
data_asset_name = "customers_raw"
try:
    data_asset = datasource.add_dataframe_asset(name=data_asset_name, dataframe=customers)
except:
    print("The data asset alread exists. The required one will be loaded.")
    data_asset = datasource.get_asset(data_asset_name)

data_asset_name = "loans_raw"
try:
    data_asset = datasource.add_dataframe_asset(name=data_asset_name, dataframe=loans)
except:
    print("The data asset alread exists. The required one will be loaded.")
    data_asset = datasource.get_asset(data_asset_name)

data_asset_name = "loans_hist_raw"
try:
    data_asset = datasource.add_dataframe_asset(name=data_asset_name, dataframe=loans_hist)
except:
    print("The data asset alread exists. The required one will be loaded.")
    data_asset = datasource.get_asset(data_asset_name)

data_asset_name = "transactions_raw"
try:
    data_asset = datasource.add_dataframe_asset(name=data_asset_name, dataframe=transactions)
except:
    print("The data asset alread exists. The required one will be loaded.")
    data_asset = datasource.get_asset(data_asset_name)
    
data_asset_name = "funds_raw"
try:
    data_asset = datasource.add_dataframe_asset(name=data_asset_name, dataframe=funds)
except:
    print("The data asset alread exists. The required one will be loaded.")
    data_asset = datasource.get_asset(data_asset_name)

The data asset alread exists. The required one will be loaded.
The data asset alread exists. The required one will be loaded.
The data asset alread exists. The required one will be loaded.
The data asset alread exists. The required one will be loaded.
The data asset alread exists. The required one will be loaded.


## Build expectations

In [85]:
def build_expectation_suite(expectation_suite_name: str, feature_group: str) -> ExpectationSuite:
    """
    Builder used to retrieve an instance of the validation expectation suite.
    
    Args:
        expectation_suite_name (str): A dictionary with the feature group name and the respective version.
        feature_group (str): Feature group used to construct the expectations.
             
    Returns:
        ExpectationSuite: A dictionary containing all the expectations for this particular feature group.
    """
    
    expectation_suite_bank = ExpectationSuite(
        expectation_suite_name=expectation_suite_name
    )
    

    # customer features
    if feature_group == 'customers_features':

        for i in ['NoOfDependents', 'SegmentId', 'IndustryId', 'LegalDocName1Id', 'YrNetMonthlyIn']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "float64"},
                )
            )

        # NewId
        expectation_suite_bank.add_expectation(
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_not_be_null",
                kwargs={"column": "NewId"}
            )
        )
        expectation_suite_bank.add_expectation(
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_of_type",
                kwargs={"column": "NewId", "type_": "int64"}
            )
        )

        for i in ['SegmentId', 'IndustryId', 'LegalDocName1Id', 'NewId']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_between",
                    kwargs={
                        "column": i,
                        "min_value": 1,
                        "strict_min": False,
                        "max_value": None
                    }
                )
            )

        for i in ['YrNetMonthlyIn', 'NoOfDependents']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_between",
                    kwargs={
                        "column": i,
                        "min_value": 0,
                        "strict_min": False,
                        "max_value": None
                    }
                )
            )

        for i in ['YrNetMonthlyIn', 'NoOfDependents', 'SegmentId', 'IndustryId', 'LegalDocName1Id', 'NewId']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_to_exist",
                    kwargs={"column": i}
                )
            )

        for i in ['CustomerStatus', 'EmploymentStatus', 'Gender', 'MaritalStatus', 'Placebrth', 'CustType', 
                'Nationality', 'OcupationDesc', 'ResidenceCode', 'ResidenceStatus', 'ResidenceType',
                'SegGroup', 'Title', 'TownCountry', 'CustType.1', 'Habliter', 'Province', 'District', 
                'LegalDocName1IdDescription', 'LegalIssDate', 'LegalIssAuth', 'AMLRiskRating']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_to_exist",
                    kwargs={"column": i}
                )
            )

    if feature_group == 'loans_features':

        for i in ['CustomerNewId', 'ContractId', 'HasDefault']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "int64"},
                )
            )
        
        for i in ['CreditAmount', 'Outstanding', 'NumberOfInstallmentsToPay', 'Arreas']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "float64"},
                )
            )

        for i in ['SegmentDesc', 'CreditType', 'CreditEOMStartDate', 'CreditEOMEndDate', 'PaymentFrequency']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "object"},
                )
            )

    if feature_group == 'funds_features':

        expectation_suite_bank.add_expectation(
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_of_type",
                kwargs={"column": 'CustomerId', "type_": "int64"},
            )
        )
        
        expectation_suite_bank.add_expectation(
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_of_type",
                kwargs={"column": 'FundsBalance', "type_": "float64"},
            )
        )

        expectation_suite_bank.add_expectation(
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_of_type",
                kwargs={"column": 'Date', "type_": "object"},
            )
        )

    if feature_group == 'transactions_features':
        expectation_suite_bank.add_expectation(
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_of_type",
                kwargs={"column": 'TransactionId', "type_": "int64"},
            )
        )

        for i in ['CustomerIdDebitNew', 'CustomerIdCreditNew', 'Amount', 'AmountMZN']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "float64"},
                )
            )

        for i in ['Date', 'TransactionType', 'TransactionCategory', 'Currency']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "object"},
                )
            )

    if feature_group == 'loans_hist_features':
        for i in ['CustomerNewId', 'ContractId', 'HasDefault']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "int64"},
                )
            )
        for i in ['CreditAmount', 'Outstanding', 'Arreas']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "float64"},
                )
            )
        for i in ['SnapshotDate', 'SegmentDesc', 'CreditType', 'CreditEOMStartDate', 'CreditEOMEndDate', 'NumberOfInstallmentsToPay', 'PaymentFrequency']:
            expectation_suite_bank.add_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_of_type",
                    kwargs={"column": i, "type_": "object"},
                )
            )

    return expectation_suite_bank

In [None]:
# validation_expectation_suite_numerical = build_expectation_suite("numerical_expectations_raw", "numerical_features")
# validation_expectation_suite_categorical = build_expectation_suite("categorical_expectations_raw", "categorical_features")

# context.add_or_update_expectation_suite(expectation_suite=validation_expectation_suite_numerical)
# context.add_or_update_expectation_suite(expectation_suite=validation_expectation_suite_categorical)

In [87]:
validation_expectation_suite_customer = build_expectation_suite("customer_expectations_raw", "customers_features")
validation_expectation_suite_loans = build_expectation_suite("loans_expectations_raw", "loans_features")
validation_expectation_suite_funds = build_expectation_suite("funds_expectations_raw", "funds_features")
validation_expectation_suite_transactions = build_expectation_suite("transactions_expectations_raw", "transactions_features")
validation_expectation_suite_loans_hist = build_expectation_suite("loans_hist_expectations_raw", "loans_hist_features")

context.add_or_update_expectation_suite(expectation_suite=validation_expectation_suite_customer)
context.add_or_update_expectation_suite(expectation_suite=validation_expectation_suite_loans)
context.add_or_update_expectation_suite(expectation_suite=validation_expectation_suite_funds)
context.add_or_update_expectation_suite(expectation_suite=validation_expectation_suite_transactions)
context.add_or_update_expectation_suite(expectation_suite=validation_expectation_suite_loans_hist)

{
  "expectation_suite_name": "loans_hist_expectations_raw",
  "ge_cloud_id": null,
  "expectations": [
    {
      "expectation_type": "expect_column_values_to_be_of_type",
      "kwargs": {
        "column": "CustomerNewId",
        "type_": "int64"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_be_of_type",
      "kwargs": {
        "column": "ContractId",
        "type_": "int64"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_be_of_type",
      "kwargs": {
        "column": "HasDefault",
        "type_": "int64"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_be_of_type",
      "kwargs": {
        "column": "CreditAmount",
        "type_": "float64"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_be_of_type",
      "kwargs": {
        "column": "Outstanding",
        "type_": "float64"
      },
      "meta": {}
   

## Analysis

In [88]:
def get_validation_results(checkpoint_result):
    # validation_result is a dictionary containing one key-value pair
    validation_result_key, validation_result_data = next(iter(checkpoint_result["run_results"].items()))

    # Accessing the 'actions_results' from the validation_result_data
    validation_result_ = validation_result_data.get('validation_result', {})

    # Accessing the 'results' from the validation_result_data
    results = validation_result_["results"]
    meta = validation_result_["meta"]
    use_case = meta.get('expectation_suite_name')
    
    
    df_validation = pd.DataFrame({},columns=["Success","Expectation Type","Column","Column Pair","Max Value",\
                                       "Min Value","Element Count","Unexpected Count","Unexpected Percent","Value Set","Unexpected Value","Observed Value"])
    
    
    for result in results:
        # Process each result dictionary as needed
        success = result.get('success', '')
        expectation_type = result.get('expectation_config', {}).get('expectation_type', '')
        column = result.get('expectation_config', {}).get('kwargs', {}).get('column', '')
        column_A = result.get('expectation_config', {}).get('kwargs', {}).get('column_A', '')
        column_B = result.get('expectation_config', {}).get('kwargs', {}).get('column_B', '')
        value_set = result.get('expectation_config', {}).get('kwargs', {}).get('value_set', '')
        max_value = result.get('expectation_config', {}).get('kwargs', {}).get('max_value', '')
        min_value = result.get('expectation_config', {}).get('kwargs', {}).get('min_value', '')

        element_count = result.get('result', {}).get('element_count', '')
        unexpected_count = result.get('result', {}).get('unexpected_count', '')
        unexpected_percent = result.get('result', {}).get('unexpected_percent', '')
        observed_value = result.get('result', {}).get('observed_value', '')
        if type(observed_value) is list:
            #sometimes observed_vaue is not iterable
            unexpected_value = [item for item in observed_value if item not in value_set]
        else:
            unexpected_value=[]
        
        df_validation = pd.concat([df_validation, pd.DataFrame.from_dict( [{"Success" :success,"Expectation Type" :expectation_type,"Column" : column,"Column Pair" : (column_A,column_B),"Max Value" :max_value,\
                                           "Min Value" :min_value,"Element Count" :element_count,"Unexpected Count" :unexpected_count,"Unexpected Percent":unexpected_percent,\
                                                  "Value Set" : value_set,"Unexpected Value" :unexpected_value ,"Observed Value" :observed_value}])], ignore_index=True)
        
    return df_validation

In [89]:
customers = customers.reset_index()
loans = loans.reset_index()
funds = funds.reset_index()
loans_hist = loans_hist.reset_index()
transactions = transactions.reset_index()

In [None]:
# data_asset_analysis = datasource.get_asset("project_ref")
# batch_request = data_asset_analysis.build_batch_request(df_ref)

In [30]:
# checkpoint_cat = gx.checkpoint.SimpleCheckpoint(
#     name="checkpoint_categorical",
#     data_context=context,
#     validations=[
#         {
#             "batch_request": batch_request,
#             "expectation_suite_name": "categorical_expectations_raw",
#         },
#     ],
# )
# checkpoint_result = checkpoint_cat.run()

In [90]:
batch_request = data_asset.build_batch_request(dataframe=customers)

checkpoint_customers = gx.checkpoint.SimpleCheckpoint(
        name="checkpoint_customers_raw",
        data_context=context,
        validations=[
            {
                "batch_request": batch_request,
                "expectation_suite_name": "customer_expectations_raw",
            },
        ],
)
checkpoint_result = checkpoint_customers.run()
get_validation_results(checkpoint_result)

Calculating Metrics:   0%|          | 0/48 [00:00<?, ?it/s]

Unnamed: 0,Success,Expectation Type,Column,Column Pair,Max Value,Min Value,Element Count,Unexpected Count,Unexpected Percent,Value Set,Unexpected Value,Observed Value
0,True,expect_column_values_to_be_of_type,NoOfDependents,"(, )",,,,,,,[],float64
1,True,expect_column_values_to_be_between,NoOfDependents,"(, )",,0.0,122212.0,0.0,0.0,,[],
2,True,expect_column_to_exist,NoOfDependents,"(, )",,,,,,,[],
3,True,expect_column_values_to_be_of_type,SegmentId,"(, )",,,,,,,[],float64
4,True,expect_column_values_to_be_between,SegmentId,"(, )",,1.0,122212.0,0.0,0.0,,[],
5,True,expect_column_to_exist,SegmentId,"(, )",,,,,,,[],
6,True,expect_column_values_to_be_of_type,IndustryId,"(, )",,,,,,,[],float64
7,True,expect_column_values_to_be_between,IndustryId,"(, )",,1.0,122212.0,0.0,0.0,,[],
8,True,expect_column_to_exist,IndustryId,"(, )",,,,,,,[],
9,True,expect_column_values_to_be_of_type,LegalDocName1Id,"(, )",,,,,,,[],float64


In [91]:
batch_request = data_asset.build_batch_request(dataframe=loans)

checkpoint_customers = gx.checkpoint.SimpleCheckpoint(
        name="checkpoint_loans_raw",
        data_context=context,
        validations=[
            {
                "batch_request": batch_request,
                "expectation_suite_name": "loans_expectations_raw",
            },
        ],
)
checkpoint_result = checkpoint_customers.run()
get_validation_results(checkpoint_result)

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Success,Expectation Type,Column,Column Pair,Max Value,Min Value,Element Count,Unexpected Count,Unexpected Percent,Value Set,Unexpected Value,Observed Value
0,True,expect_column_values_to_be_of_type,CustomerNewId,"(, )",,,,,,,[],int64
1,True,expect_column_values_to_be_of_type,ContractId,"(, )",,,,,,,[],int64
2,True,expect_column_values_to_be_of_type,HasDefault,"(, )",,,,,,,[],int64
3,True,expect_column_values_to_be_of_type,CreditAmount,"(, )",,,,,,,[],float64
4,True,expect_column_values_to_be_of_type,Outstanding,"(, )",,,,,,,[],float64
5,True,expect_column_values_to_be_of_type,NumberOfInstallmentsToPay,"(, )",,,,,,,[],float64
6,True,expect_column_values_to_be_of_type,Arreas,"(, )",,,,,,,[],float64
7,True,expect_column_values_to_be_of_type,SegmentDesc,"(, )",,,,,,,[],object_
8,True,expect_column_values_to_be_of_type,CreditType,"(, )",,,,,,,[],object_
9,True,expect_column_values_to_be_of_type,CreditEOMStartDate,"(, )",,,,,,,[],object_


In [92]:
batch_request = data_asset.build_batch_request(dataframe=funds)

checkpoint_funds = gx.checkpoint.SimpleCheckpoint(
        name="checkpoint_funds_raw",
        data_context=context,
        validations=[
            {
                "batch_request": batch_request,
                "expectation_suite_name": "funds_expectations_raw",
            },
        ],
)
checkpoint_result = checkpoint_funds.run()
get_validation_results(checkpoint_result)

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Success,Expectation Type,Column,Column Pair,Max Value,Min Value,Element Count,Unexpected Count,Unexpected Percent,Value Set,Unexpected Value,Observed Value
0,True,expect_column_values_to_be_of_type,CustomerId,"(, )",,,,,,,[],int64
1,True,expect_column_values_to_be_of_type,FundsBalance,"(, )",,,,,,,[],float64
2,True,expect_column_values_to_be_of_type,Date,"(, )",,,,,,,[],object_


In [93]:
batch_request = data_asset.build_batch_request(dataframe=transactions)

checkpoint_transactions = gx.checkpoint.SimpleCheckpoint(
        name="checkpoint_transactions_raw",
        data_context=context,
        validations=[
            {
                "batch_request": batch_request,
                "expectation_suite_name": "transactions_expectations_raw",
            },
        ],
)
checkpoint_result = checkpoint_transactions.run()
get_validation_results(checkpoint_result)

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Success,Expectation Type,Column,Column Pair,Max Value,Min Value,Element Count,Unexpected Count,Unexpected Percent,Value Set,Unexpected Value,Observed Value
0,True,expect_column_values_to_be_of_type,TransactionId,"(, )",,,,,,,[],int64
1,True,expect_column_values_to_be_of_type,CustomerIdDebitNew,"(, )",,,,,,,[],float64
2,True,expect_column_values_to_be_of_type,CustomerIdCreditNew,"(, )",,,,,,,[],float64
3,True,expect_column_values_to_be_of_type,Amount,"(, )",,,,,,,[],float64
4,True,expect_column_values_to_be_of_type,AmountMZN,"(, )",,,,,,,[],float64
5,True,expect_column_values_to_be_of_type,Date,"(, )",,,,,,,[],object_
6,True,expect_column_values_to_be_of_type,TransactionType,"(, )",,,,,,,[],object_
7,True,expect_column_values_to_be_of_type,TransactionCategory,"(, )",,,,,,,[],object_
8,True,expect_column_values_to_be_of_type,Currency,"(, )",,,,,,,[],object_


In [95]:
batch_request = data_asset.build_batch_request(dataframe=loans_hist)

checkpoint_loans_hist = gx.checkpoint.SimpleCheckpoint(
        name="checkpoint_loans_hist_raw",
        data_context=context,
        validations=[
            {
                "batch_request": batch_request,
                "expectation_suite_name": "loans_hist_expectations_raw",
            },
        ],
)
checkpoint_result = checkpoint_loans_hist.run()
get_validation_results(checkpoint_result)

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Success,Expectation Type,Column,Column Pair,Max Value,Min Value,Element Count,Unexpected Count,Unexpected Percent,Value Set,Unexpected Value,Observed Value
0,True,expect_column_values_to_be_of_type,CustomerNewId,"(, )",,,,,,,[],int64
1,True,expect_column_values_to_be_of_type,ContractId,"(, )",,,,,,,[],int64
2,True,expect_column_values_to_be_of_type,HasDefault,"(, )",,,,,,,[],int64
3,True,expect_column_values_to_be_of_type,CreditAmount,"(, )",,,,,,,[],float64
4,True,expect_column_values_to_be_of_type,Outstanding,"(, )",,,,,,,[],float64
5,True,expect_column_values_to_be_of_type,Arreas,"(, )",,,,,,,[],float64
6,True,expect_column_values_to_be_of_type,SnapshotDate,"(, )",,,,,,,[],object_
7,True,expect_column_values_to_be_of_type,SegmentDesc,"(, )",,,,,,,[],object_
8,True,expect_column_values_to_be_of_type,CreditType,"(, )",,,,,,,[],object_
9,True,expect_column_values_to_be_of_type,CreditEOMStartDate,"(, )",,,,,,,[],object_


In [94]:
df_validation = get_validation_results(checkpoint_result)
df_validation

Unnamed: 0,Success,Expectation Type,Column,Column Pair,Max Value,Min Value,Element Count,Unexpected Count,Unexpected Percent,Value Set,Unexpected Value,Observed Value
0,True,expect_column_values_to_be_of_type,TransactionId,"(, )",,,,,,,[],int64
1,True,expect_column_values_to_be_of_type,CustomerIdDebitNew,"(, )",,,,,,,[],float64
2,True,expect_column_values_to_be_of_type,CustomerIdCreditNew,"(, )",,,,,,,[],float64
3,True,expect_column_values_to_be_of_type,Amount,"(, )",,,,,,,[],float64
4,True,expect_column_values_to_be_of_type,AmountMZN,"(, )",,,,,,,[],float64
5,True,expect_column_values_to_be_of_type,Date,"(, )",,,,,,,[],object_
6,True,expect_column_values_to_be_of_type,TransactionType,"(, )",,,,,,,[],object_
7,True,expect_column_values_to_be_of_type,TransactionCategory,"(, )",,,,,,,[],object_
8,True,expect_column_values_to_be_of_type,Currency,"(, )",,,,,,,[],object_


In [25]:
df_validation[df_validation.Success == False].empty

True

In [71]:
df_validation = get_validation_results(checkpoint_result)
df_validation

Unnamed: 0,Success,Expectation Type,Column,Column Pair,Max Value,Min Value,Element Count,Unexpected Count,Unexpected Percent,Value Set,Unexpected Value,Observed Value
0,True,expect_column_values_to_be_of_type,no_of_dependents,"(, )",,,,,,,[],float64
1,True,expect_column_values_to_be_between,no_of_dependents,"(, )",,0.0,112451.0,0.0,0.0,,[],
2,True,expect_column_to_exist,no_of_dependents,"(, )",,,,,,,[],
3,True,expect_column_values_to_be_of_type,segment_id,"(, )",,,,,,,[],float64
4,True,expect_column_values_to_be_between,segment_id,"(, )",,1.0,112451.0,0.0,0.0,,[],
5,True,expect_column_to_exist,segment_id,"(, )",,,,,,,[],
6,True,expect_column_values_to_be_of_type,industry_id,"(, )",,,,,,,[],float64
7,True,expect_column_values_to_be_between,industry_id,"(, )",,1.0,112451.0,0.0,0.0,,[],
8,True,expect_column_to_exist,industry_id,"(, )",,,,,,,[],
9,True,expect_column_values_to_be_of_type,legal_doc_name1_id,"(, )",,,,,,,[],float64


In [72]:
df_validation[df_validation.Success == False]


Unnamed: 0,Success,Expectation Type,Column,Column Pair,Max Value,Min Value,Element Count,Unexpected Count,Unexpected Percent,Value Set,Unexpected Value,Observed Value
