In [1]:
pip install great_expectations

Note: you may need to restart the kernel to use updated packages.


In [6]:
import great_expectations as gx
context = gx.get_context()
assert type(context).__name__ == "EphemeralDataContext"


In [7]:

import pandas as pd
pizza_data_df = pd.read_csv("pizza_sales.csv")


data_source = context.data_sources.add_pandas(name="pizza_sales_source")


data_asset = data_source.add_dataframe_asset(name="pizza_sales_asset")


In [8]:
batch_definition_name = "pizza_sales_batch"

batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)
assert batch_definition.name == batch_definition_name

In [9]:

batch_parameters = {"dataframe": pizza_data_df}


batch = batch_definition.get_batch(batch_parameters=batch_parameters)

In [10]:
expectation_suite_name = "pizza_data_suite"
suite = gx.ExpectationSuite(name=expectation_suite_name)


suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="pizza_id"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="order_id"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="pizza_name_id"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="pizza_name"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="pizza_size"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="order_date"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="order_time"))


suite.add_expectation(gx.expectations.ExpectColumnValuesToBeBetween(column="quantity", min_value=1))
suite.add_expectation(gx.expectations.ExpectColumnValuesToBeBetween(column="unit_price", min_value=0))
suite.add_expectation(gx.expectations.ExpectColumnValuesToBeBetween(column="total_price", min_value=0))


suite.add_expectation(gx.expectations.ExpectColumnValuesToBeInSet(
    column="pizza_size",
    value_set=["S", "M", "L", "XL", "XXL"]
))


suite.add_expectation(gx.expectations.ExpectColumnValuesToBeInSet(
    column="pizza_category",
    value_set=["Classic", "Veggie", "Supreme", "Chicken"]
))


suite.add_expectation(gx.expectations.ExpectColumnValuesToMatchStrftimeFormat(
    column="order_date",
    strftime_format="%m/%d/%Y"
))


suite.add_expectation(gx.expectations.ExpectColumnValuesToMatchStrftimeFormat(
    column="order_time",
    strftime_format="%H:%M:%S"
))


suite.add_expectation(gx.expectations.ExpectColumnValuesToBeUnique(column="pizza_id"))


context.suites.add(suite)


{
  "name": "pizza_data_suite",
  "id": "0f9026e5-5d2a-4f80-9485-8765cb65ea90",
  "expectations": [
    {
      "type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "pizza_id"
      },
      "meta": {},
      "id": "02cf3163-7d66-4229-af16-6b0b0f03c802"
    },
    {
      "type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "order_id"
      },
      "meta": {},
      "id": "f70d0320-f792-419a-a408-00b663abc357"
    },
    {
      "type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "pizza_name_id"
      },
      "meta": {},
      "id": "d49cee66-4dfb-485d-b0c9-800938610bed"
    },
    {
      "type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "pizza_name"
      },
      "meta": {},
      "id": "9e74f71b-1f95-4cb3-bae4-82a3b8d4f15b"
    },
    {
      "type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "pizza_size"
      },
      "meta"

In [11]:
validation_results = batch.validate(suite)
print(validation_results)

Calculating Metrics:   0%|          | 0/91 [00:00<?, ?it/s]

{
  "success": false,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "batch_id": "pizza_sales_source-pizza_sales_asset",
          "column": "pizza_id"
        },
        "meta": {},
        "id": "02cf3163-7d66-4229-af16-6b0b0f03c802"
      },
      "result": {
        "element_count": 48620,
        "unexpected_count": 0,
        "unexpected_percent": 0.0,
        "partial_unexpected_list": [],
        "partial_unexpected_counts": [],
        "partial_unexpected_index_list": []
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_unique",
        "kwargs": {
          "batch_id": "pizza_sales_source-pizza_sales_asset",
          "column": "pizza_id"
    

In [12]:
context.build_data_docs()

{'local_site': 'file://C:\\Users\\YAHIAZ~1\\AppData\\Local\\Temp\\tmp9dgp0md9\\index.html'}