# i. Introduction

Notebook dibuat untuk melakukan validasi data sesuai standar kualitas yang akan dijabarkan dibawah ini

# ii. Import Libraries

In [None]:
import pandas as pd
import great_expectations as ge
from great_expectations.checkpoint import SimpleCheckpoint

# iii. Load Data

In [25]:
df = pd.read_csv('data_cleaned.csv')

df.head()

Unnamed: 0,product,feature,brand,price,rating,color,ear_placement,form_factor,impedance,image_url,reviews
0,Baseus Eli Sport 2 Open-Ear Headphones Wireles...,"Secure Earhook Sport Design,Ultra-Comfort,4-Mi...",Baseus,19.99,4.0,White,Open Ear,Open Ear earbuds /Over the ear earbuds,16 Ohms,https://m.media-amazon.com/images/I/41iRmtTvb9...,"[{'review': 'I\'ve got to be honest, I\'m blow..."
1,Baseus Inspire XP1 Adaptive Noise Cancelling E...,"Sound by Bose, Dolby Audio, Premium Real-Time ...",Baseus,109.99,4.6,Cosmic Black,In Ear,In Ear,Not Specified,https://m.media-amazon.com/images/I/51L6GgSj6p...,"[{'review': ""Pretty good for the price. They s..."
2,Baseus Eli Sport 2 Open-Ear Headphones Wireles...,"Secure Earhook Sport Design,Ultra-Comfort,4-Mi...",Baseus,19.99,4.0,Green,Open Ear,Open Ear earbuds /Over the ear earbuds,16 Ohms,https://m.media-amazon.com/images/I/51MZtudDew...,"[{'review': 'I\'ve got to be honest, I\'m blow..."
3,Baseus Bass BP1 Pro Noise Cancelling Wireless ...,"Real-Time Adaptive Noise Cancelling, Adaptive ...",Baseus,23.99,4.3,Space Black,In Ear,In Ear,Not Specified,https://m.media-amazon.com/images/I/51qpokZ6oO...,[{'review': 'Have you ever felt the gut rumbli...
4,Baseus Bass BP1 NC Hybrid Active Noise Cancell...,"12mm Super-Bass Driver, 41H Playtime, Bluetoot...",Baseus,21.99,4.6,Black,In Ear,In Ear,16 Ohms,https://m.media-amazon.com/images/I/51RyPpxGPH...,"[{'review': 'The sound quality is clean, and t..."


In [2]:
# Load GX context
context = ge.get_context()

# Add datasource
datasource = context.sources.add_or_update_pandas(name="tws_datasource")

# Load CSV and make asset
csv_file = "data_cleaned.csv"  
asset = datasource.add_csv_asset(name="tws_asset", filepath_or_buffer=csv_file)
# Batch Request
batch_request = asset.build_batch_request()

# Expectation Suite
suite_name = "tws_suite"
suite = context.add_or_update_expectation_suite(expectation_suite_name=suite_name)

# Create Validator
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=suite_name)

# iv. Checking Expectation

In [None]:
# 1. To be no null values in 'product' column

validator.expect_column_values_to_not_be_null('product')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 287,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# 2. To be exist in column 'rating'

validator.expect_column_to_exist(column='rating')

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "success": true,
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# 3. To be between min_value and max_value

validator.expect_column_values_to_be_between('rating', min_value=1, max_value=5)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 287,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# 4. To be max value 5

validator.expect_column_max_to_be_between('rating', 5)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 5.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# 5. To be in type list

validator.expect_column_values_to_be_in_type_list('price', ['integer', 'float'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [16]:
# To match regex pattern

validator.expect_column_values_to_match_regex(
    'product',
    r'^[A-Za-z0-9\s\-\(\)&,\./\+]+$'
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 287,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [21]:
# To be length between min_value and max_value
validator.expect_column_value_lengths_to_be_between(
    'product',
    min_value=5,
    max_value=200
)

Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 287,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# To not match regex pattern
validator.expect_column_values_to_not_match_regex(
    'product',
    r'[@#$%^*_=<>|~`]'
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 287,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# v. Saving

In [23]:
# Save into Expectation Suite
validator.save_expectation_suite(discard_failed_expectations=False)

In [None]:
# Create a Checkpoint
checkpoint = SimpleCheckpoint(
    name="tws_checkpoint",
    data_context=context,
    validator=validator
)

result = checkpoint.run()
print(result)