# Perkenalan

Nama    : Yuzal Qushoyyi Wahyudi\
Batch   : RMT-033

# Great Expectation

In [4]:
# Create a data context

from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='D:\yuzal\Belajar\Hacktiv8\PHASE2\TUGAS\Project-M3')

In [5]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'P2M3_yuzal_data_clean'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'M3_clean_data'
path_to_data = 'D:\yuzal\Belajar\Hacktiv8\PHASE2\TUGAS\Project-M3\dags\P2M3_yuzal_data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

In [6]:
# Creat an expectation suite
expectation_suite_name = 'expectation-M3-clean-data'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,loan_id,gender,married,dependents,education,self_employed,applicantincome,coapplicantincome,loanamount,loan_amount_term,credit_history,property_area,loan_status
0,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
1,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
2,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
3,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
4,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y


In [7]:
# Expectation 1 : Column `self_employed` can not contain missing values

validator.expect_column_values_to_not_be_null('self_employed')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "result": {
    "element_count": 553,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true,
  "meta": {}
}

In [9]:
# Expectation 2 : Column `education` must be unique

validator.expect_column_values_to_be_unique('education')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "result": {
    "element_count": 553,
    "unexpected_count": 553,
    "unexpected_percent": 100.0,
    "partial_unexpected_list": [
      "Graduate",
      "Graduate",
      "Not Graduate",
      "Graduate",
      "Graduate",
      "Not Graduate",
      "Graduate",
      "Graduate",
      "Graduate",
      "Graduate",
      "Graduate",
      "Graduate",
      "Graduate",
      "Graduate",
      "Graduate",
      "Not Graduate",
      "Graduate",
      "Not Graduate",
      "Not Graduate",
      "Graduate"
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 100.0,
    "unexpected_percent_nonmissing": 100.0
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": false,
  "meta": {}
}

In [10]:
# Expectation 3 : Column `loanamount` must be less than $ 1000

validator.expect_column_values_to_be_between(
    column='loanamount', min_value=0, max_value=1000
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "result": {
    "element_count": 553,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true,
  "meta": {}
}

In [11]:
# Expectation 4 : Column `loan_amount_term` must be exist

validator.expect_column_to_exist(column='loan_amount_term')

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "result": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true,
  "meta": {}
}

In [12]:
# Expectation 5 : Column `property_area` must contain one of the following 3 things :
# Urban
# Rural
# Semiurban

validator.expect_column_values_to_be_in_set('property_area', ['Urban','Rural','Semiurban'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "result": {
    "element_count": 553,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true,
  "meta": {}
}

In [13]:
# Expectation 6 : Column `loan_amount_term` must in form of integer or float

validator.expect_column_values_to_be_in_type_list('loan_amount_term', ['integer', 'float'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "result": {
    "observed_value": "float64"
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true,
  "meta": {}
}

In [17]:
# Expectation 7 : Make sure every column in ordered right place

validator.expect_table_columns_to_match_ordered_list(["loan_id", "gender", "married", "dependents", "education", "self_employed", "applicantincome", "coapplicantincome", "loanamount", "loan_amount_term", "credit_history", "property_area", "loan_status"])

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "result": {
    "observed_value": [
      "loan_id",
      "gender",
      "married",
      "dependents",
      "education",
      "self_employed",
      "applicantincome",
      "coapplicantincome",
      "loanamount",
      "loan_amount_term",
      "credit_history",
      "property_area",
      "loan_status"
    ]
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true,
  "meta": {}
}

In [18]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)