### PyTest

##### Example 1

In [84]:
class Fruit:
    def __init__(self, name):
        self.name = name

Write a test that verifies the initialization of the Fruit class. Once the test is completed, remove the instance.

In [85]:
class TestFruit:
    def setup_method(self):
        self.fruit = Fruit(name="banana")
    
    def test_init(self):
        assert self.fruit.name == "banana"
    
    def teardown_method(self):
        del self.fruit

### Great Expectations

In [3]:
import json
import pandas as pd
from urllib.request import urlopen

In [4]:
# Load labeled projects
projects = pd.read_csv("https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/projects.csv")
tags = pd.read_csv("https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/tags.csv")

##### Example 1

In [5]:
data = pd.merge(projects, tags, on="id")

In [6]:
type(data)

pandas.core.frame.DataFrame

Create a Pandas DataFrame from the data using `great_expectations`.

In [20]:
import great_expectations as ge

In [21]:
df = ge.dataset.PandasDataset(data)

In [22]:
type(df)

great_expectations.dataset.pandas_dataset.PandasDataset

##### Example 2

In [7]:
type(data)

pandas.core.frame.DataFrame

In [8]:
data.head(1)

Unnamed: 0,id,created_on,title,description,tag
0,6,2020-02-20 06:43:18,Comparison between YOLO and RCNN on real world...,Bringing theory to experiment is cool. We can ...,computer-vision


Verify if the `id` column in the data is unique using `great_expectations`

**Hint**: `values_to_be_unique`

In [9]:
import great_expectations as ge

In [10]:
df = ge.dataset.PandasDataset(data)

In [11]:
result = df.expect_column_values_to_be_unique(column="id")

In [12]:
result

{
  "result": {
    "element_count": 955,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true,
  "meta": {}
}

##### Example 3

In [42]:
ordered_columns = ["id", "created_on", "title", "description", "tag"]

In [45]:
type(df)

great_expectations.dataset.pandas_dataset.PandasDataset

In [46]:
ordered_columns

['id', 'created_on', 'title', 'description', 'tag']

Check if the columns in the DataFrame df are arranged in the specified order of `ordered_columns` using `great_expectations`

**Hint**: `columns_to_match_ordered_list`

In [47]:
df.expect_table_columns_to_match_ordered_list(column_list=ordered_columns)

{
  "success": true,
  "meta": {},
  "result": {
    "observed_value": [
      "id",
      "created_on",
      "title",
      "description",
      "tag"
    ]
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

##### Example 4

In [73]:
type(df)

great_expectations.dataset.pandas_dataset.PandasDataset

In [74]:
suite = df.get_expectation_suite(
    discard_failed_expectations=False # ignore it
)

In [75]:
df.validate(
    expectation_suite=suite,
    only_return_failures=True # ignore it
)

{
  "success": true,
  "meta": {
    "great_expectations_version": "0.16.15",
    "expectation_suite_name": "default",
    "run_id": {
      "run_time": "2023-06-07T14:30:24.829450+07:00",
      "run_name": null
    },
    "batch_kwargs": {
      "ge_batch_id": "79c445e4-0502-11ee-a63d-9a8cfd1e9b3e"
    },
    "batch_markers": {},
    "batch_parameters": {},
    "validation_time": "20230607T073024.829295Z",
    "expectation_suite_meta": {
      "great_expectations_version": "0.16.15"
    }
  },
  "evaluation_parameters": {},
  "statistics": {
    "evaluated_expectations": 2,
    "successful_expectations": 2,
    "unsuccessful_expectations": 0,
    "success_percent": 100.0
  },
  "results": []
}