# Milestone 3 Great Expectation
Name : Vania Alya Qonita <br>
Batch : FTDS-029-RMT

Objective : Create a testing suite to validate that the data is aligned with the requirements.

## Import Library

In [1]:
# Install the library

!pip install -q great-expectations

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25h

## Setup

In [2]:
# Create a data context

from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

In [3]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'csv-sales'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'sales-2019'
path_to_data = 'P2M3_vania_alya_data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

## Expectation Suite

In [4]:
# Creat an expectation suite
expectation_suite_name = 'expectation-sales-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,invoice_id,branch,city,customer_type,gender,product_line,unit_price,quantity,tax_5%,total,date,time,payment,cogs,gross_margin_percentage,gross_income,rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,2019-01-05,13:08:00,Ewallet,522.83,4.761905,26.1415,9.1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,2019-03-08,10:29:00,Cash,76.4,4.761905,3.82,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,2019-03-03,13:23:00,Credit card,324.31,4.761905,16.2155,7.4
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,2019-01-27,20:33:00,Ewallet,465.76,4.761905,23.288,8.4
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2019-02-08,10:37:00,Ewallet,604.17,4.761905,30.2085,5.3


### Expectation 1 - to be unique

In [5]:
# Expectation 1 : Column `invoice_id` must be unique

validator.expect_column_values_to_be_unique('invoice_id')




Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_unique",
    "kwargs": {
      "column": "invoice_id",
      "batch_id": "csv-sales-sales-2019"
    },
    "meta": {}
  },
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### Expectation 2 : to be between min_value and max_value

In [6]:
# Expectation 2 : Column `rating` to be between 0 and 10 range.

validator.expect_column_values_to_be_between(
    column='rating', min_value=0, max_value=10
)




Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_between",
    "kwargs": {
      "column": "rating",
      "min_value": 0,
      "max_value": 10,
      "batch_id": "csv-sales-sales-2019"
    },
    "meta": {}
  },
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### Expectation 3 : to be in set

In [7]:
# Expectation 3 : Column `customer_type` must contain either Normal or Member

validator.expect_column_values_to_be_in_set('customer_type', ['Normal', 'Member'])

  and should_run_async(code)




Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_in_set",
    "kwargs": {
      "column": "customer_type",
      "value_set": [
        "Normal",
        "Member"
      ],
      "batch_id": "csv-sales-sales-2019"
    },
    "meta": {}
  },
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### Expectation 4 : to be in type list

In [8]:
# Expectation 4 : Column `gross_income` must in form of integer or float

validator.expect_column_values_to_be_in_type_list('gross_income', ['integer', 'float'])




Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_in_type_list",
    "kwargs": {
      "column": "gross_income",
      "type_list": [
        "integer",
        "float"
      ],
      "batch_id": "csv-sales-sales-2019"
    },
    "meta": {}
  },
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### Expectation 5 : column value in a certain length

In [9]:
# Expectation 5 : Column `invoice_id` to have the length of 11 value
validator.expect_column_value_lengths_to_equal('invoice_id', 11)

  and should_run_async(code)




Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_value_lengths_to_equal",
    "kwargs": {
      "column": "invoice_id",
      "value": 11,
      "batch_id": "csv-sales-sales-2019"
    },
    "meta": {}
  },
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### Expectation 6 : Unique value count

In [18]:
# Expectation 6 : Column `product_line` to be between 6 and 10
validator.expect_column_unique_value_count_to_be_between('product_line', 6, 10)

  and should_run_async(code)




Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_unique_value_count_to_be_between",
    "kwargs": {
      "column": "product_line",
      "min_value": 6,
      "max_value": 10,
      "batch_id": "csv-sales-sales-2019"
    },
    "meta": {}
  },
  "result": {
    "observed_value": 6
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### Expectation 7 : Dateutil parseable

In [20]:
# Expectation 7 : Column `date` is able to be parsed by dateutil

validator.expect_column_values_to_be_dateutil_parseable('date')




Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_dateutil_parseable",
    "kwargs": {
      "column": "date",
      "batch_id": "csv-sales-sales-2019"
    },
    "meta": {}
  },
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Save Expectation

In [21]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)