From 4d74bc02c8e330d97c381f36e3b1d55ce72a8dd9 Mon Sep 17 00:00:00 2001 From: dnth Date: Mon, 3 Jul 2023 17:59:58 +0800 Subject: [PATCH 1/4] add notebook --- README.md | 2 +- examples/analyzing-hf-datasets.ipynb | 2631 ++++++++++++++++++++++++++ 2 files changed, 2632 insertions(+), 1 deletion(-) create mode 100644 examples/analyzing-hf-datasets.ipynb diff --git a/README.md b/README.md index 2048af34..eeac379f 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,7 @@ In short, you'll need 3 lines of code to run fastdup: ```python import fastdup -fd = fastdup.create("IMAGE_FOLDER/") +fd = fastdup.create(input_dir="IMAGE_FOLDER/") fd.run() ``` diff --git a/examples/analyzing-hf-datasets.ipynb b/examples/analyzing-hf-datasets.ipynb new file mode 100644 index 00000000..c5c33198 --- /dev/null +++ b/examples/analyzing-hf-datasets.ipynb @@ -0,0 +1,2631 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d1d92b2e", + "metadata": {}, + "source": [ + "[![image](https://raw.githubusercontent.com/visual-layer/visuallayer/main/imgs/vl_horizontal_logo.png)](https://www.visual-layer.com)" + ] + }, + { + "cell_type": "markdown", + "id": "731484b5", + "metadata": {}, + "source": [ + "# Analyzing Hugging Face Datasets\n", + "\n", + "This notebook shows how you can use fastdup to analyze any datasets from [Hugging Face Datasets](https://huggingface.co/docs/datasets/index).\n", + "\n", + "We will analyze an image classification dataset for potential:\n", + "\n", + "+ Duplicates / near duplicates.\n", + "+ Outliers.\n", + "+ Wrong labels." + ] + }, + { + "cell_type": "markdown", + "id": "34d4d2db", + "metadata": {}, + "source": [ + "## Installation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7176a4bc", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -Uq fastdup datasets" + ] + }, + { + "cell_type": "markdown", + "id": "4dea523f", + "metadata": {}, + "source": [ + "Now, test the installation. If there's no error message, we are ready to go." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "655330c1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/bin/dpkg\n" + ] + }, + { + "data": { + "text/plain": [ + "'1.22'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import fastdup\n", + "fastdup.__version__" + ] + }, + { + "cell_type": "markdown", + "id": "40145087", + "metadata": {}, + "source": [ + "## Load Dataset\n", + "\n", + "Tiny ImageNet contains 100,000 images of 200 classes (500 for each class) downsized to 64×64 colored images. Each class has 500 training images, 50 validation images, and 50 test images." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d455b739", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading and preparing dataset imagefolder/default (download: 153.61 MiB, generated: 212.36 MiB, post-processed: Unknown size, total: 365.97 MiB) to /media/dnth/Active-Projects/dnth-fastdup/examples/images_dir/Maysee___parquet/Maysee--tiny-imagenet-35af7c46a941f08e/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c16829ff53a346a981ec212bec1d2626", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading data files: 0%| | 0/2 [00:00,\n", + " 'label': 0}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e1078a54", + "metadata": {}, + "outputs": [ + { + "data": { + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCABAAEADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDskuERD+8HTPBzSR3aSSr5TNjOGPrWQbmRbMqsy5IxtVdvFMsbtwzBiRkjLYqPrjfw6GWLzWpiYOnZJPc7S3QTKMc59KfcgQr+8JBFZ+j3UhjlZP3ihiPmIAH0qW+uhM4jYgD2Oa7JYmNSm2eQsM6ckjo4Y91hCUJB2Agj3qIJ5jNGSpdMMMZ6f0qSzmQWMYHVFCkfQUyN991xwdnzfn/+uuWUko3Z6kdGkTxzKQxZsADOTWbqk0T6LfTp8p+zsSCMEfKetXDIqSAHl1OxuevoazvFMwi8OXrYALKFJ9iQP60k9LocrWZ5zHdfaozFJMBIPu5GMipYA8Uu/cnyDpn71U7tYoZ1yQHU8Ov3T7GlBEc+4NkHnr3rz4rqccKV5GraQrPvneeRQrHKK2B65qaSbP3ScjjGf8/59Kp21zGkbq7BQT1pHuA5/dce561tG63Oz2MpT0OqsNXEdzB9plCRkbXOfyz/AJ71fl8S6NFLv+1kMD2jY/riuOtbCW5Oe3XJNU9RdbW8S1QbpW4z6ZFNzVrM7IYVzdrnpS6rZzK0sFzHKjEElWBwSOPp0rA8a3oXw0SDkSzBM/TJ/pXnd1e/Zbh3thPthZUlmyF2lugAzlvw9R06Vt381/f6MltdPmONshlAzkZGCO/X2rSE7RszCtQcW7O5gwXJjiMZPDAEjPGRUgl3tknpWaJ4dwPnISemCDmnqLq4DLbrhB959uTj2rne5lFKO5prKZCAe5q9HKIzyOvFc5LdQacPMY/ORgeppbTVLu+kYvCqRg/KwbFU03sdcKsep21tqYhiwMdOtYl80r6pHfRAFM/Nu7HvUUd7BawN5x38HAJxj61zep+NzaxtbW9uMPyRu6UowbN6ddU5XOou9Nsr+8F3vCI+HePzNqkjpuXocVV1rxLDZutrbzRSMuFJUggepJFcTca9c6hpkLQrIsruUfZwvAH88/oadpGlC1vXm1SOQx8bUH8R7Z9qv2a6mFbELojXhtoftyh1UAfxE45roYxdhTGjKEI4G3H8qxbjOC2C7Zz0qW01Ge2xufdjoh6is2rq5wyg3qipd6aJrvEku+aJskY4xTjItsGbI2rzkUpvI5llWVW3k7jIHA5+lUcPcoCsR8sH7xP4f5+ta/CjWnDXUoXN5e6kXKsYIgQFTHJHvVu1sbd1/wBKiR3xwfalldpWZdq5GCSox+B4FLPcyCMhnJQDaMDIX6Z5H4Uc9jZxiNtbnTrORolIKtztx3/zipjqJdwq4aJeEVu1UtisiMNvA5KnJJ605Y/LYkyE44KsuCvPenzXBU0f/9k=", + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset[0]['image']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "07daca49", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset[0]['label']" + ] + }, + { + "cell_type": "markdown", + "id": "61b315c3", + "metadata": {}, + "source": [ + "## Get labels mapping\n", + "\n", + "Tiny ImageNet follows the original ImageNet class names. Let's download the class mappings `classes.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "eb3c000c", + "metadata": {}, + "outputs": [], + "source": [ + "!wget -q https://huggingface.co/datasets/Maysee/tiny-imagenet/raw/main/classes.py" + ] + }, + { + "cell_type": "markdown", + "id": "90212bfd", + "metadata": {}, + "source": [ + "Here's the top 10 lines of `classes.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "79569736", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "i2d = {\r\n", + " \"n00001740\": \"entity\",\r\n", + " \"n00001930\": \"physical entity\",\r\n", + " \"n00002137\": \"abstraction, abstract entity\",\r\n", + " \"n00002452\": \"thing\",\r\n", + " \"n00002684\": \"object, physical object\",\r\n", + " \"n00003553\": \"whole, unit\",\r\n", + " \"n00003993\": \"congener\",\r\n", + " \"n00004258\": \"living thing, animate thing\",\r\n", + " \"n00004475\": \"organism, being\",\r\n" + ] + } + ], + "source": [ + "!head -n 10 classes.py" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ac4fcdb4", + "metadata": {}, + "outputs": [], + "source": [ + "from classes import i2d" + ] + }, + { + "cell_type": "markdown", + "id": "edb6463d", + "metadata": {}, + "source": [ + "Now we can get the class names by providing the class id. For example" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9b000a73", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'entity'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "i2d[\"n00001740\"]" + ] + }, + { + "cell_type": "markdown", + "id": "2f6c5990", + "metadata": {}, + "source": [ + "## Save Images to Disk" + ] + }, + { + "cell_type": "markdown", + "id": "69319cf7", + "metadata": {}, + "source": [ + "The images are downloaded in a parquet format. Let's save them into the local disk." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "2913137d", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1f3b91c9e3374d3da6343675f042f8ea", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/110000 [00:000.990), which are 0.02 %\n", + "2023-07-03 17:43:32 [INFO] Found a total of 0 nearly identical images(d>0.980), which are 0.00 %\n", + "2023-07-03 17:43:32 [INFO] Found a total of 12656 above threshold images (d>0.900), which are 5.75 %\n", + "2023-07-03 17:43:32 [INFO] Found a total of 11001 outlier images (d<0.050), which are 5.00 %\n", + "2023-07-03 17:43:32 [INFO] Min distance found 0.597 max distance 1.000\n", + "2023-07-03 17:43:32 [INFO] Running connected components for ccthreshold 0.960000 \n", + ".0\n", + " ########################################################################################\n", + "\n", + "Dataset Analysis Summary: \n", + "\n", + " Dataset contains 110000 images\n", + " Valid images are 100.00% (110,000) of the data, invalid are 0.00% (0) of the data\n", + " Similarity: 0.03% (31) belong to 1 similarity clusters (components).\n", + " 99.97% (109,969) images do not belong to any similarity cluster.\n", + " Largest cluster has 4 (0.00%) images.\n", + " For a detailed analysis, use `.connected_components()`\n", + "(similarity threshold used is 0.9, connected component threshold used is 0.96).\n", + "\n", + " Outliers: 6.36% (6,991) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n", + " For a detailed list of outliers, use `.outliers()`.\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fd = fastdup.create(input_dir='images_dir/images')\n", + "fd.run()" + ] + }, + { + "cell_type": "markdown", + "id": "676d9175", + "metadata": {}, + "source": [ + "## Inspect Issues" + ] + }, + { + "cell_type": "markdown", + "id": "1017106b", + "metadata": {}, + "source": [ + "There are several methods we can use to inspect the issues found\n", + "\n", + "```python\n", + "fd.vis.duplicates_gallery() # create a visual gallery of duplicates\n", + "fd.vis.outliers_gallery() # create a visual gallery of anomalies\n", + "fd.vis.component_gallery() # create a visualization of connected components\n", + "fd.vis.stats_gallery() # create a visualization of images statistics (e.g. blur)\n", + "fd.vis.similarity_gallery() # create a gallery of similar images\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "8f558b89", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 279.49it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Stored similarity visual view in work_dir/galleries/duplicates.html\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Duplicates Report\n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
\n", + " \n", + " \"logo\"\n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "

Duplicates Report

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/pole/93044.jpg
To/flagpole, flagstaff/104649.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/pop bottle, soda bottle/62847.jpg
To/beer bottle/33675.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/meat loaf, meatloaf/90376.jpg
To/mashed potato/87457.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/flagpole, flagstaff/104606.jpg
To/pole/93443.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/lemon/88577.jpg
To/orange/99767.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/black stork, Ciconia nigra/100906.jpg
To/goose/85379.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/wooden spoon/82149.jpg
To/wok/81771.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/walking stick, walkingstick, stick insect/17643.jpg
To/mantis, mantid/18797.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/banana/89072.jpg
To/lemon/88830.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/beer bottle/33973.jpg
To/pop bottle, soda bottle/62815.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/coral reef/109532.jpg
To/brain coral/6750.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/orange/99657.jpg
To/banana/89263.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/mashed potato/87495.jpg
To/meat loaf, meatloaf/90258.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/sock/69225.jpg
To/iPod/51355.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/pop bottle, soda bottle/62558.jpg
To/beer bottle/33640.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance1.0
From/spider web, spider's web/70895.jpg
To/black widow, Latrodectus mactans/4204.jpg
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fd.vis.duplicates_gallery()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "de484e82", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 27503.63it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Stored outliers visual view in work_dir/galleries/outliers.html\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Outliers Report\n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
\n", + " \n", + " \"logo\"\n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "

Outliers Report

Showing image outliers, one per row

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.598713
Path/slug/99254.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.639867
Path/jellyfish/6152.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.642672
Path/fountain/47232.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.649107
Path/oboe, hautboy, hautbois/58272.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.654982
Path/walking stick, walkingstick, stick insect/17626.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.660399
Path/abacus/27129.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.663107
Path/crane/43785.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.663625
Path/centipede/5240.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.664507
Path/cardigan/39235.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.665014
Path/pretzel/86745.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.666785
Path/chain/98818.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.668765
Path/goldfish, Carassius auratus/497.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.669145
Path/bannister, banister, balustrade, balusters, handrail/29014.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.67446
Path/nail/98207.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.674789
Path/nail/98021.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.676092
Path/chain/98911.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.678443
Path/abacus/27267.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.67851
Path/computer keyboard, keypad/42322.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.679734
Path/neck brace/57203.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.682477
Path/barbershop/29697.jpg
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fd.vis.outliers_gallery()" + ] + }, + { + "cell_type": "markdown", + "id": "a4eb87fa", + "metadata": {}, + "source": [ + "## Wrap Up\n", + "\n", + "That's a wrap! In this notebook we showed how you can run fastdup on a Hugging Face Dataset. You can use similar methods to run on other similar datasets on [Huggging Face Datasets](https://huggingface.co/datasets).\n", + "\n", + "Try it out and let us know what issues you find.\n", + "\n", + "\n", + "We recommend checking out -\n", + "\n", + "- [**Quick Dataset Analysis**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/quick-dataset-analysis.ipynb) - Learn how to quickly analyze a dataset for potential issues. Identify duplicates, outliers, dark/bright/blurry images, and cluster similar images with only a few lines of code.\n", + "\n", + "- [**Cleaning Image Dataset**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/cleaning-image-dataset.ipynb) - Learn how to clean a dataset from broken images, duplicates, outliers, and identify dark/bright/blurry images.\n", + "\n", + "As usual, feedback is welcome! Drop by our [Slack channel](https://visualdatabase.slack.com/join/shared_invite/zt-19jaydbjn-lNDEDkgvSI1QwbTXSY6dlA#/shared-invite/email) if you have questions!\n", + "Happy learning 😀" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From b89387d6d62ee002f6a8dff6dcf3d8e8eb080ad1 Mon Sep 17 00:00:00 2001 From: dnth Date: Mon, 3 Jul 2023 18:14:49 +0800 Subject: [PATCH 2/4] add nb to readme --- README.md | 39 +++++++++++++++++++++++++++ examples/analyzing-hf-datasets.ipynb | 20 +++++++++++++- gallery/hf_thumbnail.jpg | Bin 0 -> 39808 bytes 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 gallery/hf_thumbnail.jpg diff --git a/README.md b/README.md index eeac379f..a2fd2afa 100644 --- a/README.md +++ b/README.md @@ -381,6 +381,45 @@ View the API docs [here](https://visual-layer.readme.io/docs/v1-api). + + + + + + + + + Analyzing Hugging Face Datasets: In this tutorial learn how to load and analyze datasets from Hugging Face Datasets. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/analyzing-hf-datasets.ipynb b/examples/analyzing-hf-datasets.ipynb index c5c33198..5a1f01f2 100644 --- a/examples/analyzing-hf-datasets.ipynb +++ b/examples/analyzing-hf-datasets.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "d1d92b2e", "metadata": {}, @@ -9,6 +10,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "731484b5", "metadata": {}, @@ -25,6 +27,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "34d4d2db", "metadata": {}, @@ -43,6 +46,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "4dea523f", "metadata": {}, @@ -80,13 +84,18 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "40145087", "metadata": {}, "source": [ "## Load Dataset\n", "\n", - "Tiny ImageNet contains 100,000 images of 200 classes (500 for each class) downsized to 64×64 colored images. Each class has 500 training images, 50 validation images, and 50 test images." + "Let's the Tiny ImageNet dataset from Hugging Face Datasets.\n", + "\n", + "Tiny ImageNet contains 100,000 images of 200 classes (500 for each class) downsized to 64×64 colored images. Each class has 500 training images, 50 validation images, and 50 test images.\n", + "\n", + "Check out other datasets [here](https://huggingface.co/datasets)." ] }, { @@ -292,6 +301,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "61b315c3", "metadata": {}, @@ -312,6 +322,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "90212bfd", "metadata": {}, @@ -357,6 +368,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "edb6463d", "metadata": {}, @@ -386,6 +398,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2f6c5990", "metadata": {}, @@ -394,6 +407,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "69319cf7", "metadata": {}, @@ -440,6 +454,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6aac94ea", "metadata": {}, @@ -507,6 +522,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "676d9175", "metadata": {}, @@ -515,6 +531,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1017106b", "metadata": {}, @@ -2585,6 +2602,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a4eb87fa", "metadata": {}, diff --git a/gallery/hf_thumbnail.jpg b/gallery/hf_thumbnail.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ae6263ad5d126fa34b51f8c3ea78f85c19e98879 GIT binary patch literal 39808 zcmb4qbx<5n)a~Mu;F91FbdeAU?ykWWXMtdgFR-|TA1=XVfv`C2;)?~BAW6_baEIV- z!686izOU+4y?@`G>Z+N(RrmH(O`kq<`u5-XziR+e4Hb1200ssCfbs7F{9Ol7EBiay z0{~iD0B!&Pfd8-kv%gCKMF2i7E*>rpJ{}$(0RcYYb4sG;&z?P}A*UdrWT0hYWT2&| zXJ&iN$^43!m7bnU6v+EVKuAc4iBnu!Oi=3eTOq;!34%dDK=Az8^Or7MbzwzT>{LgFrU&F=3!p6bF0O0@gT95)Tu&^+& z@Nfw6o)O~U0I)Frc>y?Nxa2GXcoeVX^{io(f?=seRICah8_)WN-ifzf;c4l`pQ+i{ zITV%jZM`GbUkHg9*uhKsCeJvP4I>+cAwCC5S8WvVxbVdnE?OkV8u$iEG`}^-A;5pX6 zkYre709gR$6r*?K4d-zQ*b7}uoeyzi;m2&Go-U-ytAizDm|lf%Y?8D_5bfdv(#EJd zzWUIDeWc+P8S0=#$;^dR2E|DN#tEH-3WBt{JWhujUD>70s`(j*_U+IbeLL~hiAQ-e zUYDr2AWzaC^TP%xb{*%{MaWV5`4?zW^;JVd3950#$|CPN|TrKl{Za@#f05b|+&h z2Y+zo7FyUCnc4K;LXyNm1+$`?3)k`7~cH2BtdIdiNBM3cl8RxGz)3GSFdM_WQ0 zj8y)!s&NV3jBjaNtTXtUJ>a0yG0amd3TdekznN%B{KYP0Y!j5^f}6#vA`k7Bc2~t3 zu$waWZp~Mp$7d6xwZcsAmb8tMt}=(^QIO37UKhrMK zdV~#?3kYHjy=c*sW2&d$EA4)8ddx2WkjLYop}8uHD^paFN%5fW_!n?JbwwmlMC*Pd zE=mCKQe;@6GO!rzzY|+QfkGC4PNRo0^q^ZahV zPULDV!maW>DCeX!_SMn9dt=cR%bjEhCYdi!2Nh?IbjFzLwE+hGtB%h)7e2>vkpg@Y zvXOZV6(t>3KTm9{1Gq!Bs!zTP2%J2FT~=UWv0)ZU8BM<{hyVO647o0_GarE5#{3Z34AJB_LPLsZOIl`*40<$Ru|(^6UUC2 zQKc_N?L40F^V)A*dOZ;~I_IgnrwTXogW_mxbJhGHqqnD2mv?Ln^FPTKW#aCXeOKgv ze#tfhzN@%4Sj^?)VrRwPl^ie(Hb>89;ti@!7>%qSCWZMM875hBj{I)JZFMpnwwC{_ z_9|B>LN`JQgkP2@QluL}NV!0H9wnkIM?^WP;WWwfsTc#$jXCK_-N7~FTj5|@m=P|l(&q2{rj1LBCu0i7DEUzjtmv+n=YrAq&v;gax&pJTDO zG&3Xl7pFL$6l_ILi$__|7T(zU2T?DN9r4Yr<_LtyX59Pg_cBIcn=?<$1KzE1R^0^|qJtL*^4&Gqk`z z`xY4t(|S{|$@bq?$-(p|tefiE?=4r~9nyuBDe|;!dl;&W=gMkwx^xO|D@t=8a5{FY z4ku3r0RQoA{sp`=bQMya{R;?>3#rvP8nu%u{GQ4SmdXoa=Q5hpq@mw%F`pe+E&dB| zR6R1^=C}FAsB5bU?{JDVpkC50KdQoGpbddjvuidik&&kC zs_lxc)N!knv%B{z_%X2uSess`ab5i`^D$azS(=={u;a%*TMpCnB!T6beB>UlSF$_q zq)$=^@<`WOHK-o~)8Q|gIExeXd-_HnWc;UJqSDJ(<}`lfm9(;Sm<5S_$g5k|h*dCQ z2W|u^dX%fY*=iCU;zse+H_|B#i@ERO9$a!NwDAV0YY!W``9~(zN+5wj<~hJ1&Oi`4 z{kGQ}+9NKJ))f{;F!ed{7h!cr%5lQc9>>grtMRl@B5oa;U)bt|&*$O0{&fNael7r` zxys{UkJeqe2w11B00QtP_PAXT;q?4t4kBRZ$w7QG1JD{F;I1s7_iECcP;ClY{=m5+ zv*?~4%WdED;09w?-l}qv?Eh`Pb%jFcqVr zd1Tzyo~gm=6+Cm|hP?1OaVz4A|F;~qr;pAxbD478o%Dr-w5r-T_OUlpexlXm|7P9O zB;M3GFR7Sq+I##fLV?x~*EV~=*uqI<5FbXW`h}hJEhG(3W@v!UP#F0#!bPPM zO^j>le43uN_*Q5~01KD8+OpV*5K#^9Vx@L{wr8V4%9b(MiA9UDN)p(+>^GfIjPhKg zNXX@uN*J6mLrg!;z%A)v0wpYtyI^S0bRn^4-k-$aS7!t*La9C2$&e3;V$PY?L(b5A z5a=T48BBLj438Cw6YPqDE`Bn0)(tU2i!11B6Gqu2scpKjwmjiye6^^d^(f*@ zeCFx4nxD7WXA8d8r==!FZp)j#d^to@?16|VoA_LM9MK@o^!tU8ys+J|iBS1bWaNy` zqim1+!}U{Ll`f*ToAZ|JG&i}LJ6oF9o*%@bs`ERsX04z?M`49r=ZE_t;XBxat&YcE zKw1Xr24(uW+5JogVsburcMaN|U_3{)R546(n$ar!N7*It$M!#O>K_)<|332w*s>E0a{0-` zkUC?>*Mjp6v6j#4zBqlWwF;YHr=ikTy^vpe75MKegeP&1;`X(Mr zpkJ=!Ajb-mR2Lo@8Fp7TvmhX3=_mf_t~50#8bOOI83h!B<_)iRhFwpyvdW5y580+O zs$=x`P17B)uU2Z3H1D#XQ|EO>U@Svn{aWfNncYJNU(u-k7AR|lVqR(Lk0fn-FFw~d zuR6(pSq-fi$+4SaNDyKD1vFP3&^jndEM*e>d8QFZqh>^cRp&>W>SU&06h4_o&Ns|V zk++0f;#i*8|9Pbib&N-&(Q3swvwNkfj6(rdV}sF8emTIv74jIX3;BI` z-`|42|9CQg_p0uZw%X;zUjSowwTn-JY*5REvgnJ7xY2#pW|t+QTdg4bT>F;`wXRc3 z!lQ^V+J#!1{Dm6VX}}oIlF-R$kbMzJ)@Gj#=ode$V^KA7_od|C?CSy>$yq(xcl{qK z2w%IZo2+8O5+8S>&{|n}Df7%!A^^@QGhSZ??*)2Ck#06Z&7LZUSnb~(A%r8eB`zHj zIFLyvYci*%yIv0Vx6`4~A1!c2W`|I=0{I)vKdw2i!&k=z;p zNHR1z`1)PC>?@UbI3_i@gC?Vx$WvUloL+QzO`LpKs=`%>%h5>jO3byonVTISTP+B$ znKhFw=U&JZ=DH0L@Mkn8@Gs!+>7`89A=64#=~Yxo*L;;MTC7w z1P}T4bIJH+>DNgPmml9SbooneX-;zsO3|3kU+!hYR_=!Go!g{&|8XrALD>Y26TEBh z*a1>@#+@%seCZF?UOQFS_Sx1Z%_C2TherA~oav`Vr~OrjamqiQZd9**p5`Zws)eBs zHr-AZ?f((5-p9zgI+MDXoVsydBmetmq&Uo7T2uvE%L)(sz}r+H3fx>V+o_YeqWA+2 zsn@b^ub3lUel;tI{6eha2cg23+s3)mTWAp{BQT5ChT^cg_^DR)=gqyK0v|Phl=|wv z{XINm3#Q62^H!fv?OYj&sZX`a$wZ!BsdmG$=nJYzyWxq+*2RX(3%|Lfs?=B{=5)#} zk!42ww=0l3bSbXgj1mM}E7z&OQro)%H3=tDl%mr%)d`Gx*VGj1k88|n?e_510_sdP zUAM7f@_O``%gR*!(v{!s%zoO%bNGmrhkSDxK#$M_Yv|hbf52>9OWZ9twi6tq+2W84dmyS#OKFlF_ z;kon=&RIiClT^^wI|cHToOm!HgRQkqc`M&C)g&O%?q+lOei(hu9TM1dfn(LK^C%SZ z`y?1*JWfXOXQK9#Oq?&jK1IP~Qsh+qh9nTe$0@diKam@y#Cbx4}{95 zo2FEHO_?gcL*kB_Ge_GtY1GvFj1tkc>ZSa&=n?4Mq4HQSF*p%LxTIJ{NJ!VYg~;a9 zlJx4Vh#@`H0269VbTahE_%tZy7IyB64XdWVZF57h{XSr`PayYFKjUXZ*0(UKZmoL9 z7t7@#4Wfi{s~f?R^?A!Yd$Gz*`?uVVl+C8DNJeJY=RrK%Bh|oXZWrvOahs^n;thO| z+(N%62{c&U);({EdTSSN$n^eW6(qr0Sm8`NfceU+Wo-w@rXR16qo)y#k2LHNL{343 znd9XDi4?J=gB65YLdc1C=;sJ6-Z*(GyIxhEl0*(Tf*I&D$hao&TF%DR*`R|l(I4Ct zZw6{kyWkfS-^ty{1s?S4rpOo@$CS}=auvy}@im>!H@8=RSo`l_ARFg}x(2KP&bNKx zwC03H?bHJ6t{Ii1q1I{l{m8x&`> z;2^!EXb@Kc<&pa^EB6IHW>xLiTZUhhkHSuIHQmW;ejoHNr-%0K1`d?Ls<(YNKi(f5 z@NnJ=zK?%zGKH?X9Xz|^dCXHy@^Z=An45F~RwEA1MnCV?Uc)=u`@lbK-I8x-$gXUp zs~=ELlX;!_;R8b-Li)=U+dKb!d;2v1>xjZsq}_Kd)@TMiY0yyC*P~{vTC`-=TqE06 z6yoY~O;9~xC{xM`jxz&CX$Fhw7;U*1E6B8DaKEL#QgMg6SUc&?bN5Ixz^|L&Tbe}Z zogCtFiJFht36y^N_eE|TQa*ygH$!s`xehWAetqD#Mf5uIxGlq)razFiCT=03p71X~ za_~~RRAb>a8@dpQmH3@-RN55)f?W}4RDrjqHdjzDncexXZ7R|m~+UXiIReMkGO7Z2>NiDkJ z&WAhBz*A~!G;c=thGP$hx%j`nNo1w-AW$5y5b?1%T4?(Hx9X?%3aR`aa}&@j_M-}& z=VG{T3?_9x%M82Aqb~Qq;pIM~pKR;8>+{X%m_xL9_;NU5z9`|BeUfHfbXvhhwmG&@ z=0+~(;B|ObGRE=~@dZJq0ob_1YEpszW!{)fHoyE8`ZT%oC&Qatj=fuKduCoBTRglp zp1X*4Gw#kB2GimGV)5NOVFBvuJF%j=qdx2-{{p5&LmK%HxxYj;xBmsCG+>=zM*;wP zE5=a=-aN1KxJMi|J2Ql)pj^$1eq;$yMvkmTic>CW%DaJ0+Ss)9jf=AH_IWDnVa8?4 zmZ*8e*ADIlPS`r=ppG_xoH{jenBhuY<(7sfC@nLxbupzVr6@u%NeB#6lLEnvhv4bE zn81$$*il}IXy(-JFG-@P6Js_X9_81ge)cGFvYpPIF-eu*?>6%D&R8mC-o@UKl#RZYH<4W?^U;VrrUc}HZYG1~cR zrD~cqChno)l^D)S>&Tu%w4b5YF|ER`7wF_-guD+Fo|R*Uj}^Uv$S=wAAsLl>G-H~r zcs2`Ub71K%ocX;uCoRIyVL>{h7(0nzlrwSAS9XM_c5QJo*75Ae3zDwnzoOFxkx!Cm zV12_5C29Dg0bhLP#~2raEzwbRup)Fao7K{a-W{4MFnm65DUR~{xy2yklgT#&yKU%G zq`w}+$7YnCo)1rID!+9r9iDE20##eF5)7hO)wK!4u>ypg)p0jD1Y5BfuRrI1HxMpT z)6?juS}+FeMhO55H`yN8PN`T$y?c3`(NxMplc_qt*VS2t*4dOf5V{p{T7pOe49YoG zLQuK|gieP=x)N*2s4H%E?S_j^W_Kl8u(`;~=|mFyu_-ADCK0zDMcM-4gC<<+=kqW; zD*gp*yCP^nL>1N4pCqi;Fv_Q{)EZ0b55&$P#NdCJMwr;*x`crk(2| zew0!h8z$k zSHy>s-Qu!ZU<^Y%~|CZ=l;~yNS z!)k-#bO^aWz(mr#Jc+p=ZrPejcSUeX^N)G0*0Pu6@Vt_Q)Vlm(?NGb7Wq~}4&lIx0 zzTD@sT~KX(fxSWtFMGFY7?zH2_ZQ#~JboDx;U;~Nb5CTv*T1Tq^nhw>oLnP{+#TJI z_&TBEXuql``fB5RDV+vHkx!R}sJzTPA#oB8p*MdBe5$W4LWk_|7Nqd(T5vvPNrlwD zpB28;Lhh_E?GU=jf+?~lp0p5Bp$)q5^XD$(l*>Q^i*qzcNjb47|{5c!9%ANSx)Nr zy9D2F z^|wE^)6T)%hZcJSs=ZZ?H>32O{bBu|QzlL2puCu*6s^__86TY5CcVFJ!%VK!%{cOy zJc7$6XT}z9By!0au#Rg>yf-ymG!>jo7BwX~|2sOZP2W$YoE8kd^GXzcImZ);GSPCfV>tzC62Tr*b9}RBWXI`nPi@MreIo?!$(}t}Rb$mowGDbW%`=Atk~_ ztYzl^K!ZfQ@@7uif^At)wln&}h}SRP_}({2t=|9k0qV$Axrnn}O$p0d2ZDfk*3hbF z^6ICQTO1d{`P&XNMwJ0&Ir9OXNe6L--#F|vq0UkxJeK(DVFe8(>FB{?v_6K9fLY_g z*Z)35N}#`tHgWAvyRc^0%~pQOcSSQdh~Sy-=%t%AUAYTeTUUF3GhmmyVn{`d)Ku!z z4$CPt`z|DW&cCMnWm(kx>S^@@gq3LR=9T((1&=P|qmbQd7Q`wOCHI!|cI;-8Os|;dR>J+}AD=wRbjZ!EPmJ_V~C^E{$`Z)?K;^DbD_16)^& z#gq75l!n|6R(}P5>|b(!W7Js9cZ|-kWhFq18uQ$AzCYFq`8{DeTd7|WgN0C;HQF=w z=yR6Aj9nb^!xnHhpgb1Cgg1bQrM~7pZWPQuL*zg!bP>d@jHb*94`iA%nm*0exL_VV zg|GqLKBq^yjinZ|5yvI=?`{vN%IDZ}UxtIb@SOaGsBiT-bAV!F#7FuUrDFNo*2Cr= zP3y&_Eun#`eo#>#?#Ds1q#7KgCsz3LI)b}O z&p$};qzM}4L2H$OT2L?hXF2TVuqaqL!pP3a%<|HtI% zDTD!CBO-Dcn)Cy?CEp1M@bRz?$4U=a*=H1O1Q50B({Gq>ppj?dqu>rqrb$h_cNS;z zk83jNn&anKvv+Y=bZ<$Mq&(8Qmvn{fZ2AknFcfRnQw&rElnE(nE4Pd()Hgy<=ANR( z;gtmHsIjqXzkfoRRxp`^c(ck~_!r-)U!?SLFCQBc7E72?jRy{G8t%#L5hkU* zH4=Ydi?2I3z^)(ILstI+l!X**H&QN+Rors}zBQ+L5GGU1OX_$Ox^#0@Hy&&(nt1B3 zZE~sUU0>?oDHWtER@of-gLV!^m3XPsa=l<#+2o>kVGc+?A7+em=OhC`fx0g@!#M3r zJcFz_L#-9^T(Msv`_ImxoF#V<8Qel~EZt?+AXMe+QKujqzev9 z`I*d7Nh)1$zNm68-M$kF{l>$*KjK3VC3iyN5-w?W0l^zVX?8x6ECH--Sm7VYarAvH zoMJ5#_3WnhVdxjOs#4v377kHVt$|~I0UzEYk-9Tu@0LbNl{Z|bmbEn$k+>5Vx)Q82 z`S<1B7FmwXuQd5k-^J?S#~w7;JEeP{vMdZ;eJ0@jWwSPfeVQUyT&AfDJI?HkuEPPPCRv7P0I zS}&<-bKsd=s@=4kN1E$4xd z7ixFgI$*!0H4)wd#;?D7xygmb?O_qMy zgpn>m7pvBXC*8&RIqGdW`&*w*4)cK22pe8s?WIqWYOd@S=7L15^xlIDKr<5F7xo)p z?X64*>^~^mHbV1?gR$clwz-;wr>`hd_iHmYI}c&GR@yOjJA&dT*d9_mdpS5)iCU%B z=4L)kUna2wJ}NXfGhppWsV0gL03VgM{YVNGmU^b2{a$Qrmz=tsOW6VD6ki`7>iCzW z2~+b^)~Jy>QdF!y*vQ1yhK59x-3nhFEWkr(?3gl14iJ^mxn^YrX>-Rse25{zw_DfY z5}EaX0o3UqjJrzuRgZbn43xmeo90ZvaXjyKL^u8dj=6i-oG<-TzV970CUS zmpQ%(|*H|5rEe7s3Y$~&p4t3vK zdw6yUji~DW(KL>X%AI$KR-Yv)siX|N#usm~DpBvvK~!XFfRv?(fMWurr3HwOx-W&` zFW`z2ypqZno0XkrHZIbJPm`i)7oK~{V!EO|+9u1C?hl$Ix1?WHNbd+tHuT-ti2l^f z6GklZL2PQu{E16dZ-~Xw_>h(_?A6;W)tRBJtabVusSH~N`ioEN+5%qYNm^{90nO7W z7Hc&cKw_t{xnJ%Cp{W{k`~QM7C}o2<@*4w?|EYVP#=g zU&7MGQ)uOQD;$o>ySppRYY=-;GOIuyY8r&Ewk%qiYURY#_Gc0V4{Z)*eoi73$N24r zVS>N;+<~tNB4eg=5T_jrBWzKd*EJf{l^)<;QJ;Y2SnGdv#K+WqNi#s`qSF5#U%Rt~ zhRTFl`sZ1E&S3CAE}O^(bLcE{am0^3T<@G}hgkl8`$s_rtvpFK^FrsB#x%*PuGb-I zKK0cvDh178U=khzU)NZUn0p{+&5^y0-zKdOR+aZO-Hn_ZZytiJ?+-Sxt2sqS1kG-Q zt5O_Ro6Vt?-42d=HsgKVD!vtAd3x$Ft_r)CA9NRw<(>@ zcrkS~;$By8<6dB*rz&>nto?gF1EZsJw`aVw)$275`4!cK2>~;!YpDFj8_@z?h8eQM zjc6s6yV}DSlkbz)_^<(AqZ?GC3%k71`w=WC zJ6Q60Rz27#s-rt#O^Z8#l~_fQE(_3~4WBJ(ujExit0{;~HwerM7-UJ>p5MJQ%xk** zkxGN(;3=4TGm9F0O3!d>lFxfZfuGDxTR?*9LA9bP*qM_GNeiGBvc;~mD~ddz6;wTu z!uB?_^PtZ&B^1*$@N+%`{G*T1058w-vu#n;S2KmS=nP42kIX3x>=PqUQ6$p+fYNX1 zq#(m#>Nn`Y@QpDJ5JdT=u&Y>GmGNc8fZ-j;U^{8P?FXB~2(oIZg!%MWbA5WpM#QFq z=qMOxtD@@rlky;w#4kH_8`^ja0-oOKfK6P3zBJDwrenL*eS?N45dFfwnxoZd zWujKo*BXZC`AEE!eYqgw8SvIC(OTX*yP~Vz{&pA9M z)M{syZ9k4^Y;(80qvmMSs4-v}-uCT^3oQ~_Y~LSNXw{R0K&H)!YTmbTJDMvh_3uXO zwM-KS`5RGr%+OAZz~N(w&o!ZwOFeaC3Ik*V!NK^8A_a0%BGDgsc5-r!PmM>SdiknL z-?dmOq{$B(G^lK@#F1Ami=~&U(-s&Wm0#$kk)oq080s+kXQjdWtcK1dZ5%8=bs^nf zN@G7*tOq zQ;FNLz>wvs7CR2Zj|@*7kjsWH0L6hPJ6`UrJb z9x7xSi0A;a=xjbP6!{=ZT6>Gn1hGdnQ}vsJ1JTDB6?8%V%vSWiw??(Yw7z_qI)By@ zjO43prl3{LrL5bHXjFw98+}TrPh6)T9yZ$Od+$4y8|jSJI?9YAfZ9tYA68I{Ftv+g z+}9+DaLma_Dzfpz%PGXwJ^e zZ#51DeJ@#m=4H4UhwX2mhPttxINYj2qr46X&Vy@WS5@i#9HOLhB1MJ5Yxw1H1;^9M z_Xo^k2OP){a5~zC6z}**Ha6pj2q*z8jWxo6Lqarp5-5=pk zqeJR%0Jg(jTY`Lzw0JzrOEZ^LqIc^lQfd@O8lGk`9lwXLY7F7dv7w)7WHzCgBf5Pt z&MK}Gc^@1P&9fr3TG1`k>kEX^tyMzZ&f$5VM!CTe>x^+S7sgX0-g_iw*J-$RQ?n_hNfNJ9{N9u709EtM-tyo;7dnWHb$g3 zna_PSy1}-qW^ZC=2ZP7drMNPdJG^vd*7S8vl|;r=6$5|9B`+xcST;qJO^=|)oRq{q z=Sv6O??35yFORI;=$UV+=_V7j1awuWd~Wo>v)Y>|V+}M;47t9T|U<`mYB@u5kQ~O_*dls0O~4uRxh#d zfz!W^1fU=5O*aO+QJEh;4TO*TbmiI-OL)w`+)UrAp8Z`zCLN$pVnI#3y+SDtf0!R# zXUa8jj$5zb7gdQ6tRcP=TmE!xU799|NfD+X2DY>ZzppP|7B(ms&?6W@w`hNdU^FT; zA+_M}#9m2oj%xWvME>-d3EXZ8?VK+97ogWA0qNd==Z8xJ#y53W%{()&^?zqfPz5+n zit^Mms#7ba{zkatKU`T0?ez)pbnw*p<>c*;cEESH$$DS&6c=D8ejNvXIzy_9K~OwB zdWW6)Kbl%-_MH<(Gz0iL%`~>!$EAq-REQsl7B-bqy@QM2I)P`~{II4wzQOxlq(@1b zgnVkgO#;4o*=s@|ERuuV(hQHS2Peh$t11+eguuN>lo-C0sGl2MxqK95XL@2dRh!8u zDa&IDG6qygm4@6!9o9S_zirsgQqVr4XNu3gzOgA64kb~w5m;Q}D@QUsvOLjM4iR7C zb7QcDMSNGN+!fzmOd}_++psOJjq z07Bi&02iNNgU5PXP*v0_#*OC3kh~_=y~w@8QvG3dr2_P%1R}4I#i0o0k{{2_4npR%-eng^U zSqWh^0g7FAwNK8U7p0pVTa+O~7tQFV9E(YMV3kjX&|;1r-4KmNKkQLzR(-F|`J_?wdDM;ldcsd0^dSi(RlZxS z5ZyO;$+KV>Ki`>=Gwucl4_;1(uM*Tyt_3=u`$qN&4QGg8FK`dgPm=b$bD&6Kk=0W3 z^3FQ!X#2c}Oy>DzMJd`5o-5P7^$jza`46}Lr-e(zN1)YzwG+Cb13*=KoP-aj#r05s z%nTRK`xi1@uDPXEH;Iuq{CiXhE+}tkL7d+qp&eoUldfSV`Y#~<{W#EUH~7nNQ1Y~v zK`-W;x z?{BTT2bpRrrHAHO$A7=f8ru9PD(!5l!5kcFQBOh}wZ|%i%I!SRqu;P#c9M4Fx%Zs- zkm0Dg$Nj)zTuFh>JhE3Z|Xje z@O+Ol&y0Ldw*2GE<{E^K*1@yy=Zg^YU2`?!PitOW01sjwe5~qIj8xQxN=*gIBN7c-NpZpncy zKdnAr2ngF2ROzEhY8^*7{#o*vWRu97f8g#>(3T{xByM&7sQ-R94d2`2Kh0>xuf3Yf z$%7is2+tw6J%Dy~nzGG7F=y-R2fz4FK*OaFPbJRl32rqmvw3GrIWl0>YnR2U14AnNi z6Z=_T2%?YGrBL5gK+Mg%`LDvG_;>l)oiEF$sf&h!keRV~0iYHVs5AAG7OOIgFc zJo#$=KpVb35#~GaGjCBU3`rm%3mZjx?(=)+8hyxQtRIDr(+HR5s@!Czm|AN!*FCvk zU=(Oo+;Zv-!?_BFTHDZApGo8@MgGgNb<w=D%2MT0I&rEm%C5yce+pY+csOJInAayzVunsX-0ir zXKV#RWV71Gzo4}He6;7TQ*-4UgIO?hxd!TUI4fFS6*hMMM)EVl$_Xv)UT^Ay2^}SP z#G_9uR$pG6NzMD3T<2?WqIpugDgPWI%#znx$C8&`AM=;cYs@%j_*jId+cAi$SApL$3P0Q zB+v-GvQS<5ledbCGj#O*XBMYbN>A>ypS(fO(gyf`am4w%vH9yeNZS?AR@X1<@nyU* z&H~dZQFP;?iM%)2xfTVw@(55kDFO(sz*wAcuYNdfxmS%YluW7O9P4=o_HjA++i6k4 za$_!pde?B*+q-yKh$fKqvjE7C_dpR zlpcpmA#NtO?4zz~bJ}rzWts>|7g5d>AxE+$^q)SGSA>L&F6s+3)E=jqymsh#0e}ctK~-jKGgw8d?Ow z^J7^P^dyD}}fIsYf2C!#!JU%W`FTpYtpdfw?@YOH@6>P?yK(J#v2h09=JUhwP*^ ze2qE{zog~+Lxs8nuu)mkQM~>^KyxN(gRXu=!e)pM!(J_{?b!nS7U{KuwO4LTBmaT` z_!CR-`9%FdL#4v^fp(d8bQZJwuW4Y6eFE&bGW)G3@e^_y>*Y3kkkPU1h4~*ImRa5H z5#=tu60tKyHyq06YZ-(yO$XP(7e)0n^}=a1a}NW6+^h_w{xd77R4rZg#b~v8BmL1W*W9F!2elS+Njgy5Xv$wq z!&ShR3HJu&{PKr2A0T^Or!A`#%-G0frR&qN8iF0AJwRN<+nj+US|A)w3mm`#X%#AZ zxGt5gG%|dV{=z!Rz{5bv*=6DV-g)cExlzOFJ!gL-yFa@&FVKXLlU{D@X%Y7L-;!%| zu~G_H2RlojfeA5=cXg0>{N6WsYAfnfyzzaBoLC6&_J51|@LzP#HF$O$jeM(V->P@H zIDvM)2P=Q?;<1-7Qvaz`g`M~do3^ISGe4TOgLZ-;w!(CggU6J`bj&vs@bkak!q{2K zl?`>^M{dR84OBRaiT?Cz;vKP;z3W954cU4jjAqwJ8q4Dnp_$;eb=2O{hwI=BZkie2 ztbENIP!+7aO+r)?7sCFksFY#&>{es>Km9IsI&CK1F|Jeh^bv|mXQiLs5RqSEjcBks zRtrR$NuHy)EJImhFWIWGd9*u^$ZMZY?B$SApWI&JSI+79=ngiN5=`zjk}2|daUqyd33>ZA<1o`#9tPSL`+*7Uk!V517LO zq~>LYs{^q*!wLwSw~kxS8k>e(i-62ydOe$=a@U|^Up~}1krSRUZu>NssJON$bjN5n zokf|zi!M}bRYmQ}P*Q@0#QE+`o{wfrPvW-ugq~zvGbq{eVnx+FOXgQQ39~IQG$HW2 z-qMNFuf_+C z;%itM6;8t7+Dk4D6E{Uj<*1A1eee1G04MoU!?KLoMw!gZ_y*Og^PjOw3r`T$L)^RF z5qtnUpDNf9@SA{@bJEkf_>a$<0C|076XGRv&Wo&Tee>8+wJ$0OQCOeJ&g&$SX{omf z{~hi5=%pZv|Km<1NJqr$Sg(BD_X`qK8KP)3<0X}xmSdz%h)}>oMH$E_L!$Qfk0QF7 zRVUl4wzY0(wz{>1W_|MlfXjl~w5*Ja?^y*PV6(2?Pf5^U>>78Sr>bfv(lH^#mFY=~ zaX_fD>mMOH=d@@V0y@cyS!Y5GI=WCQ>%h#IyCxQE^@aX}Lh$@ybsZS&aQ_ML>nMki%CI8I(~1FF&SYk&kA13en^E7fG%KR3EGg_>IJZ zcBA1?cw+j%m$XkE8lBlzEV^6uZ?PhJg% zS-488QoO_rH}CA-;E3zR$HM`JmB)-wX}csQ&^ssmLjrG7xy^8Yint{o!_l1J>NhP2 z=nyAF(uH6zS@Poto{EZQi<#g7k(7&3C!1R<6f+OBo59 z{ae`&Tqh z+8BkjjaGr}i8IE%O3Uc{hx;uI4fW0EeJm)x319CY=I`s%y1 zyL7r5(L=-9<22&?Za*oEleDDcS}kWuIHsVS4@0#o|J^U$@j7EeIc)l%eCRep{UJPQ zcB_bko=YRMVK9Nd`EeJILPR3u%$MpcEl8F2&BTjlt6!6gtBKi>jcbl4c;fO`TmKop zNH>}$Ax?z^XN<0La7s}+m;YS$yNa`Hcotu7&vx>xZ)z34s2eDGa}|g4tZ9r5c4#Q? ztF9|~pRU_>uFCjX?KOn6R~TUCqI_)HxPR>QtJYMeS>})HUDds^(|vD{k(Tgn*_SSC z_4+);a;QLN@u$%!zAIu$NyTExpBVenhLqFP6&?}B^>l`dHKKm#W3aUvd{@YmM&9Tr zF=y!I4EmSKdp9G^556TrONk=nYXO&Kc1N4djS2@Z?cVe$zAN)k5DQ5zE$Uz+=4ea5 z&CMTA`Xx}s;EA&$B)kX>=fxH$B5s7^P<3^9diWD&uBNteU_HMWzg~*}kv^6S^$u!v z6ZYTQiU_YJ_jvn`d$XZIB=&dZmu|^x%mMOu($ZyCOCKtKzxstQ+>lYDSm|B88K*x% zHz?UM^>GHFu-saI+BeFF;%F-@Z~r?oOrq>!!CFI-2E*Is60t<9Kv) zmim>JH4}21kWUqA04t*YCY$sj9H6H#h$*YqN9j-Sc878UADa3GOSL35H~05S-b$Xg zct8xSy?3grZDm``;fQ^!dA%0RbO`_5hbLI6 zo(nHaZ;Q&d{Lwcj$7GCejZtirNaR29b)#gj4_=}VNWnz1*D&Ep4oq3`0T_!~0ou-O(f2c8o>&*fPd#^T7W;j!9CZ%}dobA{Qql>Fp-^945RJBMLx#vQyIlt9g68<7{{w(P zf4|HL?n&hS*lKTgYE+N7(WfTRqucMY8)3AgFkQ*(JUE)9fYULr)7IN*O5A5EP#e!oQP>-n ze5^&6G2JxGU^1Al0-|sexg#d7t;{VvhcyLiPdFk>ZzG2Fs+iZVf~!IQ0B2X(Pr`g2 zO-eK7wI}PEJ%L4MB^K!Rtx$&BC2KLBD{zc$91a7h54Nf`1X#$uYA0067W|1TDsf{Z zCvgcKgFDY{T<8aOb%R~(IzLvbH8Q`3ZlkrueFCQTM^qTx-4OSl3`noEj7BReYC zNXYCx^%Ijve}=B0o$H24@97V|nADCH>1zja9QqSc`95^Ff0jDTMQ;3|RDPBKHdKdU zK2Vhvx7$(EJ*fnQoG;>ur^;II1Qc?^wi#Ka^j@?j7{U4iId=OW`L31QvTS;Eh9h%fTTd@?5;*Uq+bgEou3N6rZb(6GTPspR zwiU$qRL9`^FM(EswCA3|nu59=!dFQYjVaTg+FwYdEk-zgzoUHO1drfVxk;7Yi0Sb4WwlCBYzBvn7s2d@=VKfjkev+kabK4wTUgnz} zfN&V2OAjVDD}zaO*7h1u$Z|<32f;Ycuz1##k`s5hF0Df$45^6lA;)1$+byAn!gjc4 z8z+7#3e@)!6qShUO4G8SrrRMyu=IFVu^*BsNkHeP0I%w!qFDX^*3rp1PBNV2=Xj}2IT7WsyHk>$oRx=H<&L08 zTKgL$VM+NQ@Zt#Vp}97DZx;k*Qs!H%OrKqJly8!!YlG^Jq@A6x0oszXMouiU4fPJKznB?GqqE8Mr z&(GCPa;Q2(7Bv;QYb|i)M`f4cu;WYu8cK(l3CYI(+8DSEazNGO0d~b z+UTo0s;xJbARWuuCj=>3^;AhydBL=YIsp2+s4HU*BSkoYsfTOl z=``$zaSKdu-jv4rYAkgF2UDwxQuj#p4d;(#EiK5N{+zePj{DO2winEywH0iQPf)o_t-II}xl#}C9iDb)LxV2w@7LTOYtS_Gw>{f5x)sSI)`5z zlR>0J*!G8-*Y2g(!^kW0C-RV>NEdP~7h?)~_p7VIuXbx%T_CM$~bi!K!Hc#Z~>2SlcI@(P5-wxGN%Tggy4D$f1n zp1O)k><&8>+@i|4M2T&M9q6n=X$1|XxB1BjaLSw@Y^Q+Sl5yH<4uG)2l^R{jW~g@B zqQ56wEhR0h3!F6K*v6p(3e|Q(`y+9l$Oed;YAK|c@(K z@wmebBON{|#uPS;T*y0eLPo1NcKKGnh1Ho^xxCSsM4;&Y5A90yi7(NeBS zZIvlX;3>^AwM%`~tszY$?Qaig;4Hc9HDHGGB|Lr224FwCiOlG#oSc&h^~_APy@ z_9uYfl1>n`tQ;B(jXBjg5VsVi0!B&6J(Ncn^JqCzl;>dJ4RjEeT34z!Kh3A#2+wtN zN=e8H1dgs1U=OH-jGCsrS#6q{3ZhSBD2(hlH!ZW;lhyQ8*W6w*gV9i`iLbdEZA9St zz>eyaLkiV8I(?GzsJT#+=f;%UPRq&a$nEMqW4uvzlT3qUi2c(I$ZVA~+e-WxJV8B* z;#0sHCb&p)TGo{)o;dFR0KCvFTZw8qSy94~(IE8^xYjPKB;yqrjY{_}FLF@e2||<< zr&5vAByaA`RHks)2Rn$(K%X^EA!yp!b7(CJJTeYN4?B4gR$6%fy_h*5u zD;%b`-InYw{{SiLb=1s8d6&vpv_1oac6}fUQ(B5V+^8%e!{@elZ|iL)^v;r*DW~YWyFZXQeIoiWtKo_tQ>+;eBqD|3GS?+i-w}Vxa;#Tk19*a46SVh zw0X3skbqJF3CSlOz5=!_5$|^Rsm3y;9C+^)^UJMHdDKRYAxyNVR+q|74#rM?gHaU^ zWog+r4sLAQ#JLj7NkDu&LD)`kG6>HTy!O!ZHdtx7I~<@o`)b}#Nk^|Iu8OqlnbQ{0 zo!r7mpiNH0@-FvQSdgZ6pJ=G0k+hy~fyY*Ax9(oOjYmmot4xn7dQ4W_djTOz8^9pY zfU`$~dXU;%1T?aq9Cy(dBZ(|&nJlSWN^V1mC%1i8-cHTh`Z~aOADC)UQHNqeIogho zNv#t{#*Cdj%23JzLQc-_lxycEY_a3df}--JH1kR;Ly6p#b46I=zX#~Ap?N(}{l!`b z`wL%GP?c4p$u#MsKam(m)^}7!Yye&xO{d`!2LAvWua~SW@rg;4(v&)oO8$Y5ZCy)H zZ(d#xVM@PJv%77c8%A?)G>4yPC23lR!CE$k?i;t!)RMMa9vNkpn4dssiAa^BV99Gc z4{f-e*xXcWIazKM-&0Bm%2}53_D1Z4jN*;CHCA~#vXAOd)-(rlR!*L3Hk&X&H5}*6+t8Ffzd0Djf2W5Rlebq>0w@rJRe};Y&%Y0Y%vrOe52T1S# z09i-SstiksYAQ(}B`6sr(&_Py1VcLaP#)vhG97+bIH3(c6EEs0$y9p<#fGXf?M=e~R+idoYxbtN& zl}(%%msWw4rLcIU3}XX;;l$K^Z>M!8rMX3!ZL?0320Rv6ZN#!h*2;I_9238~gNoE6 zu*$-c>n-z6AxR*mK}y^~@fDjEIeB#(v*e9SOZFdWcvpGeXtzzdi#DYrkC7wm&{_`q zCe!b1T6Gxs(2ze^)a&=d_mAZndPV-H+5QbblFcJv)EaVK&gjA?OEMPEHsX%U2^hc$ z2Z;wc;`P!w

)pH2Lzc_M!qDT6Tc^p0r$)-l4ixPU z{@R(QWEE?B5|pB&?t^M1oRV|I)@=BsobordG5B&Tj90i{qx8gGO{(9w%6V-^Wez&9 z*#S%0UdRCTai}`$Pe;<)iO5l2TZ|Ua>Nd_)ove+LdiZ;4mAkx)b=k>~| zRtU_$b!dd|@0hJ7ZmcSW3qiE+HWMZvjK#b5Vw1np#`0|nphIgI)Yd)b-Vz{Ql z=`bqMYrJYVLQvl`6ds$a&dBujp5al^%0FyIcO+kotr1 z3RTuHXg&tPl+7%Kr=8qE*k5tre#2B+k4eaV)y=k)msP#SNx)g{2dMoue6UrL23-RS zPsanSch{J?b7|N;G!CQ0Qe_n-OwkucO)y*{1*TkAl7yTnsXl4(s^cLlJV70Xwkh1i zmH9&SJvqe``wn4`+od!lL$=1T2g?-;}~h zL+R-*AUNlf)?3c)zPxZXU2}1$EtlAEI>d;|=A6bdq@Jp0CqlDAKr3a0 zmzH=15t2#n%^chGjg}NdyDYgWWZ)gGkbPZMQKR2XXpYQY1JkzEwcU|{>J=qdH7Y7w zD#pRZBdL~7An2{`S-)%34j4xvR;(yvf`Y57gAt@U}0mO~Rx4M^Hj82j<_pDE8sH{ZVQ_$K0 zwqGNN90I!)k3p>d@T8|uT-2fhedR=+p|G#IzJYzBlC~dg5JnV*41zw|%k|GCkKP-0 zW}c7`^7c!Ro?a5>&TtZ=!;b^Iyj2`;VX>WVJIMT$$g$j3gV8dUM}ax2%lh$szk~7C zJ!q>ANv#(<9j4_H=>_<2rbn3a3UV+vj?vyKr&23tP+w2=pYn=475RZGP-Z=~_4Hdx z6r~>SXx6*QcA>Uh>=CW;B$Y~q83=i$r)67)Hiv3+*Xy7~Sy;JfD{38-%vXbID9UoR z1tX$P?>9K~9Eyj@%OAO2YTVn)=w+3#<3VkxEM-^#s~i*8p9+z~FAsv#%@>d-y5&0R z@Ox69sX(Zb>hB!XD345W*e$m6Z4M439B=uekFRJ6)^}5oLYqkha3f}dVP0Xi8dTUT z34MqQa3m)LI<1zX4nIm4yuBJ*pKuQp`=ON}>u> z6E0{-QhpF@Z&5v&AP-$p!C?!uq$CcK4t#3;77rP@%lFvT@qVpzkdIFd*MGae(A{}V z=;*=ywV~`)Vp`zsD%^fT$Fx?ptmb)lB{rF)6OKBeaWo?pXRNM`xoJkl!d61zyn0EbQyCT=~}!^6xwtwMpOx9Bhg9Ani7kT zxqMiq87X@il3SH2YAv$jwwzM4j?4FxWw?LHLqFiMn%OG-#1-dbOrua>^S z-BOu>neeITnWtW|iSS-b& z5zmDeT-qmR7ywcmv#$;)ypgo&Z0GCPWi^$K3fl-#D# z=j1q#oB%azmvXux%HV+YoneLooku#`VX~k)B#$>efbgj8#f;(>7Ps1Qz0RPwv`~Bo z{CCx}5<{p??uyftgV=hhnQS(_m8olKNZj0%dF^*AwLU-NK{YQdwj`qBwZNfWzS|*v z3(Qonv1eqDK{S^%vy!?^N{;dP$X5RVWmr#iAw1pw-yQaT479bjf;;iOKSe}3Urx(! zo`|QS=Xi1}#YrD`rxhLvc4DK|JE~uTu11u(G7iWjcO2C`n;hhmw2k@B^pQlX)B2VI zdCN`pQkonP;{N=KfbOW&wxhFjwTBQmtxaGb%1-J`v1VK`i>!owacC2M)ip1v;ucm$ z6@_kBjSuR0lO@=t#Y9&ealke}#}GlyYBBZIo=)j@<+gg%-WEQ?Q=6Hku3MXw!c=|> zgJAn1p?# zX+-eZ-4PC+bt!gke1s>!cc$!9spMfp>@G-O!6Njc4*1> z`-R2zV+UnOY#&f3+e8*UAz;&C(`|&N+;rvFl)a}H;-kME+$e;RNLDJ3B-ZJLLNAfA zrfa?-rP1A)RrF5WEyy@`sY%*ci?-ckv#&pf~GC8EW}# zJ`)@N0IIWZ{{YBM?P*WN$@)=P`WivBZS1FO$}ya8#?3v~BH)|Y`mLLjcNKFn?l4_E z0#hxs$FqH9ddV&`c+@SC@0{RocKzffKW%5$X2-hrCen}!=P)-9mOlE?b)&8R=`{g! zYb%&gjh;JUT_0!^61puU;@V*9aniH{^Ej714V{!5!FDWt^!@Da9>n*<~%DuNm1H+2QK&#Q|D3TdxQD zmect-$8By${>UPl@=-ebLYno*X zaE=AVfTQXKBhg(InlQwxmuEU%Jo0rCQuL~3nRIiUtUXl>C^9V%NZMNcPdFfE0LD9;$;PEwnm^vm2dG(?tU*Cp*Hp{~miwEehX$>-KS1a$3tcMokv8m8-TS2ts_ z93SN!>7TF)zqV-0JPa$z((1=r_x&_P+h}RLAt`MM>=*)_9+na04d$0Jub>d#nACTC z8_n3X(c;5LKEZ+X(G9CYTH@hqM%W(-eIx4NDr47Z>|`{ad40SS{xwr@y(uxAkvdWs zirC7Em9T`4`eK4y3-B@CsVjdy!c#KaZXDcZQsaZHZ^x(Zsw>X0L{|{YkF9bZ+{`$s zDs{uy0N?51Ly(t!)z}tB&AQTf-K90X!Tn1|dcJb7DAK{D?V4@1m`$zOj{X{Kg>Am+ z?^jUn7!-CvYHycFt%qT{-Z1U$!v-1NE%DdyiyzjwK7{({jRlx33w^bgn&!ahp*TYz z_KvgN=`|hOf}bm95hM(ya#Avs_JnmQUHk`m-Aip@ros@~z#V6KC$yfk-$==twnmH{ zGUR=Deox95TH8A8>*{I zx^GjaY6Sz#ynL1jJxn0)SLLfGzzSX%>Uy7I{Z36OVvjHIUb>dzhM8dj4KUhJgpLUv zo}eqMcbSr0905u6Vx!4(Y4E`*+6j|9A(t+Pl)?yZeErMDssypF~24&`BOyq zb)(U{y0uZy^TX2YvX>6Sap_C9C8eL`5TCSFJK8SaC1JaC<%bvMwt8dJ*ebGKrW#~y zUex8dAAq#;yJNMydH&3G;-j<6hMKnpq6MmJrAMUepe0N3#}5ilNk${E^slJh$8U2@ z)7o|}orle~dD$x-`uQ8x+y_wZt5T}So8|7<>acCDqdjFtu43}30&9D0u-b=M=Ws`| zuue~wb+O=Zj+C}E61qWM1;^zo%;NTBC2Wt3k`@P}aiVy0;#{I8R2ePEVOxURIT`Fd zhq%%uHijtIB%-QGRUoRFdG^?4$DVNwGTKU(;?x3C2NFr(D>SmQ>LZF(Fy{IjR==^W z-H6Vu^#1@$jf8p$-hEU*c;#9Jy=qHnjP5_JG`V5Tk!*{_q#dg%HA{W4vwFmaidncI zf*d=N@E-+z{fhP0boVZ8BZZ;2K5Ob9WhJg%U@2IB!zBZ(wIM3@5GuHOZm$gWLM7!q zQdK5e^3SGjSLoV#Q5y*nQ22M8$iY!gI0M*gE^k9#1S1%#ILo_xTT1N5Wo}wqmHjVu zbr77IxW&!MZelhtdo#TxdT8YVN9%r6&z$>tB0)ol9eV*yBVNsyA$igjB_Siu-L$FiVYR+J#A3mZV?I6ZZ)Ps(PGAuYV2DT?BA zwS~0iPI_Da0O_G;novlLgq@b0aZ26AWmx*5HLB}9EegYRvbPBjAvZic{BwR9g6Q3---iPik{pjp{Df>Z9B9Il^lf} zl1~L=uBh=o#p9KC1MR=){NDa-ez2(bf?mE!Qn>U7sUT5=Z3i)ku0|J#dkY|vSW!EU zN5ZK;KrFgxU-1xRnic+T1^r1Q6aV>-Bh!@sgMBc+^P}eds`Zl4K&cg5R~`|e7J_w zw6z3|(rS@+H5`V(NbNh#L3UD`zy! zhHrYN-D5~hr&PHM{2XRR)2lsL9bP7pZPQwxS|GW1Te^PEtYRZS=MoZrI=%?*$6pGg zWFM8P1J@I1utQzq>%g|ku+u&6Iluwk?4lblC;aS&?s~FPF0+t!nu-&Mk&nqo!apfg zM`Ezr^yR%Utx(&Hz`-E{de3T)5_`>h`KX}SBeuC^Zb*2P#)!z;dP2Jpp3c&62U)7O z?nS=^l!+`uitL$BHpF&Qw6`2q%1}GaP%7x<<9Kb>Er)_s3LTO=R5+m>-f`=|+-Sp7 z?{gctU&~{fR@`fAO4?>I>QayLj(nrId=b}H+3y~|@vK>Mx3o=I@TI(qR9r)72tz4K zQUFR*xB=J&cimNgMm=>Thgv+ zZ%x2aMEir@PPM1`CbmMIU!epK6^b&pM`9BU#$f>rFo1+4btNNr(OHit@ve7^rOQ$> z({Hj#gB9l&fKruZ#gCq{Pk;lev{IXlXp$X}nBfU&MC=kz1B&TJI$)8>bJdy3h^ktZ zg&wh=UjG2Lo^+D|mHbsHC+Vgfw_Zw!LlO3RiE!|z8jEpFwnS&r!en!(Q(-D4I6Jrc zL8$uyCfv3;4o{OyL%#=tyTvhfhM00m11vexK?B)K7Q44vBl^utZOID?w8>?)$U|Y) z$UBNMM(*%_nzOaIxUEsr#M^EulGu*3mykLc_(QYCq$$zX2&%Q(Zq2$D+f85`j1mZ| z7|)4KER+PuZCu;!ljbWK&w(53HfVpj8+uI5{{W{xdkj35-B4CaTu+*`?og(R)Rd6! z2lc2My(6lnt+IX!J#@~(f!vTh4KE&fLvF*e^w7z_4EgVtNpDEkA*HC6qr+(}jPBo+ z6YD<85VTu`!PP0%lrV#Z6!B5)6{_;nryF)WO+vzUTy05-B_n}J$wIvcBho81wK?SR zwh6&1CXb4;EpZ=;4;&O9V9L*Bupo`QYcCPj@|3@%;jAW1@u+I4L|g_KX_Nv~IPTrfzduGnB&; zn@ZG?7(NS$I0qRR+(^%Gq~7%lvsp2cv(2A8_V-m*k+I0t8jLW=I|@)iUN7`ocFl?bdQ)E)fVx{rb3O2?BMl1?cMYZG2xLOxCH zI(BuGT<)my*=1@R)2jEjz&HbEtoYT<@fK5%&9VXj>P&PGva#zW<@-@Q($6sGNThWh z+P_`&$wts~Iq#=#wie@V#9koV9NNSf!{)RH(LmQbQ!VW-FeoG7XCU}dEV-q5-&8W5 z+9!Qp4Wg2YVBsAT;a_2COUA~HvW2ws`!L!`bt_7?h)xe*Xd}^IC|d2Sy58bj(YVQY zBRz@E^XM5h^#ps}irUK-^-F~al$Er?+zs?fj_?nDX}%;pRf+P`e}NK&lpNO=K{vaQ zzm(_N`RyEf#%dI(R(sXj*>F_Ut?P4z#v4n30jOxkkjr7C;*T!;M)3gO!9MJPNvr|;mJvIBOkux zXMRwO5T8o6N2JjQEB3Upa<*I;KnQuIGQ50boR4C?lZX-vgs$-c>o?0Xy(p${!3Q>64;Mg})^A8&u$ya~8O0ivl~5*hdE1Z3K9n@jd3I0{%B9S@2=H z=VZ?3^fp5hfis-P!XKO~h2NAvI8;o^>O}8EcE0*h^3Uk!{)Ml;m18J;sHos?^pC!? z9eJr9Oj1nV&rAQMl>I zku;r(2A?@kk}#C*gq%2dsb8I%mwA}d>@$?owu)NG03I2jE>Z7tqu(rc_;Orj#|qTx zZCE8ehtpRO^!&*-HO?2AiBL&98)tqirAoIsYa`Us3S;U^T%-br$Wa^8q>x2Nn{nr7 zw#+mw<+#{Vlm_O)R8CHNP5|`wR(|SkO{jr#llk>^1)Iy311=nqB2phMskhaQwBQki z4g_!Y(y}F|%gFAZGM+^lY=cDIqARVyddD_$4`o-*bUSp1J}OpJz$sWM#?ns_LbhuZ z`^DCBZ|YP@B`9(_SXR(eW1>ktpySpl7h5p4PE=;(IO0M<+9ZR&(@c#uzalz^k}cO4 zvov9GhENn=LUYukf;>eELwPLf3$@MCQx;|tqIYbiv4pGDlS0TtiC*ViPI!=`SFCAS z;un_6xrMSD=edxQc7`*8O%(_<(-iuQ&Pl~_xqj%`BMM5AMpC4lg$|-AtoPC^352^s zAj3##HVT?RJAgaNMtBVMbyu8FLW))vlin+tn-j+Aoz9;;b3sW1`nuF_+FI#sPC3V! zC9lROq@QRWy7|U7+lo_aNlU3XS_s~i4kEsrkt1S30QZW@b)Jm^;=8l}w!^0-P5c#g zf;&4amDBZW7Fpw;4V@Q(&MHyl1POHmipPyBD^S5vH6i$SF}Z4OcVMk~ic0Xj4Y$ z&|FH?-orrUS>mr6*4jc?X)T~(DHsB|J!f05$qomsIu`u0dY{O~YNGi9J>u0S45V$a z@(NTw(cU=JteWDE>gUU-kRt?-nJ#v@8glUMrf(D+u#6+B#4DoHNIj$zij>`&MakqY=_SXKjtD!F2Y{`QsDu@^FT#|zNRyzT>_~d@ z>Tr;f%xw4h%Jpvanm?Bo1=<@4n+`mkfS{gn0FX1(c+mSGHqskyAq^m5DFYRvXk z=ckk9u^V|@>aer9KyKhh`p41adAi>b`x-j9EL|pr?q{y(>p)ptRLF+D*zc)6%5j)i z`*Hpc)e{0o1WHJBR*@q^Lsm>V^5dSQsPl>SD1Eh4BSzjB2bq2(q2HFz+AGj(aJMtf zS0hr~EPEFvL6FR61KjH%p=sg{w|XQ10PFPBq(HsCR4m9LGg_Ys zU19jgcc_}QkFK=rusN&5Ku=QFuzy<>P865Xxk+^gyeL&{n;oV}JoUOViQ+?HC)tXW zNR?M8(V*Zl_9^H>WAAw$=AiBiPo8H9*Z*Nwp?7{KoAr)`$WaDXpL{E&B4`L~Fcs7_0rA*k#& zI!?K}&^pQP8|rLG0}SV8CH4zQnyKkS~Wde0S$G+`dz+}P+RDkx|bEcTO(lUZ|h zDjP0MIJ$Q&-vez#e(Q2g(^AY-`KkOHWwIEII~zya{{Z%gP`h{=#ATkN+&!9 z`4^ALj1T;@>Sw>zD)yiKk`Mf~&6MRQZJSgSRUv48-Ur~}57P=!k#FpQ>0AE*bs$fc zcu&5M3vxf{r3sTr{{ZS5gVAnB{WV6zLQIQyTCHQ}83vTv?d#gvRg@H^N+o_#vH%2< zd;lhtB~JeU)b$#F{{UKp_E5IA)J`>Ta?QCYPZ+XA&s-P^R&u4fTz0D2gN|#JMr;rcsCeO(i)XA zJautMbmAWb$b#ps zKYdwRT9I#p<1$%WB~p{KwbzuM9;&@n!Z95gQO|?P`|6KS%xyN8A8ppwxbHCb60lCl zbs=0klxcY(1B{aX4e^&OM>G|lSUI!)F0T8iS^H~~W& z@G0Ajbjyb(BF+-?YHXg&f-_Ncwx@34Vv6jUj3NDimr~-uA!DMiXf;lf=N7$-{6;a0 zP4NE!C3@`F9p}g&{Ms;<+o{uQX|~j-yqF*$4f((ruM4bfZk-VS0D3<9@48Xd{OUKM zuIm&;9!883c^po&v#WEkXa*4UMa4UdXw_9uI}<1 zl#+t2k}=Yz8I%+-a6#Nb>#MD60k^G)yTZboI4jxj>-!A@N1O@cUMu2VJh5hPvpt_l zk~wiEiAr>(=qhbHB}G7W;0m#gD>&alJgY!cty=Dh z8A3LrIC}}{C+JmW=;E^GSmxSHW?9M+v=lMQj**IuhUG5hD1t&c}71(D1=fO2}sAc80wOZ(3H zIkdwJ*W?OD^^s|1%UZhk5PtozzMwd+8@prkRrl4D zU4LtEgvoixYd;2a3TyR<$F_zS8J@UzA?U?NTxjJbCMD3M^%ZWeggWet)U3lUk`IlI zHJg-KZHHSex`H&lkT^GRh!lT4BpTNaw;ShUJi1A)HxK0Kojo9xd!Ck7eiI5PKFX<)piq!ZE3~%z z680P(-StS$xxGclmf2k+t#KXNADXNmZCOU;BG}@?Z^-g@55Sh%nLe|-+gf9m67;h? zu}(aq1idGyV**)JDT(e;Xs0TfnsE}CrC8Z+VUNd z!wx!3>zUk`tsgY}v+#U4_)sIQuKOu7wJprL@^`ltv@4llb^(kZ``&4bZK3E0B05+q zR`s~yQBu53Te8*6a7;7#O8cvwo2hczCqoxbqr8dHp0~D;J=l+b#^1jI(Sd9}F01~U#%VZ{5> z>x+OL%O7R;7Ig`&`ZzyARBsbHb8a#Sw`QQ>)_yL9%4^!Na063hUp)3tuE?mjp9tymZAbTrJV0qDj%KI|oAniD%l1b}PIX=@{V65hKnG|8Ud#0d9 zxv31Q-z`p%P5@R;1zJ00j?;^6Q6ECma8j_KMHA>iPNrkIrJrS9>VzlV)f{gvCqCMC zz4V-~1w39Y_cKLW8R})vQKi9v`!dwjM1(F=N{m$KJ1N`73CCdo(T6lUDW}j9kfe>^ zf+)$_qL`%d%}zY6#^u|2KuXp*=nfRC2VwkuDXj9r7DGC|?QOa+TaX?v7Rkuz?<8ZZ zHN3v|)S#^HeS|zcSU*7Z&=$Rb+-dfmM$!OrhlEK=9|@;C(tH*N=T|=r);!v1^}i9- z)pUKf-()_{mUZS*^ERb8+J;i0*X^hdK#JPbg+&QkP85<3L9HuG$meX~8P7DL2Z7!` znxeX}mhTf1P&c`@H+3ij_ty0`+f4G=dUYIeoL?Y!k;eXjducATBeq>-J3AZCg%k5= z_^I_*3oWT7Ds3ey7$Y1-8#6eTQLR@;^z)3OTyy1ZbeK>jIV0l_HD1cd=O$IA=Ilvq zt(TiPQWeB^_m29|^yTHaQ8)M$zY=NRc%kj>Li9JNQ3dc8_W&u-SKb}N&RCZlD< zZ%W6S;(C;x<52?ZVwj6{{uh$m8V(?DLczk@J*hoZ5p?AGdkQXE-$d4UgeZhKJ$sL! z*I%Xhlrk+jyt4fnlP66jymvsZ-6XE1yJNe@B`;d0q>t)J`YVmuF7GQ1+U679(H~88 zd`MN={-%90_8}E!tn~ef+w|>@k|8ufknrjs6n3clIMv0oL_W0#Zw?6Tjr_xE1#0m; z6YVri?5MCJCM-74%MFYr2phk|_tqYxuS>~?ckkH_$J4jG_!l}uOth9+NNuLl5QHa! zQa1rZ6D4Gw^r^Q3qIcC4iCM|bWa;{M%QQ}DDn+%P67M&*A|(zj1zT>0#z;WpkE7FC zTd;`8L~Gbc-~xsP1KObKT`tPzJ*Fo=8bT7KJsE!bgz>>h1diR{Rg@iPVz@7x6K2PI zI4wzZHbLypG3~9{Fl3KyjB7L`_GQ}Rl<{Ku?4}gL2>c!Jen_WOQc@I#t}X+>q2!$V zDU}1q$gdxr{uGKBFs2H43&uOz6&-0A!S_@2L+`07Ss%@A!^Hmpg)p^~{NCt4FZ?K= zk(SC&!MPt4fl%It%YcQ(nz7id8k)2HRmal0jIe*Bo_{K?fKsZ*6_u67jXFC1GfUY% z!YV^DFR?$(?h{l81j#}i9=4=qU<2qIylTY{T0p^U zf%6qz@}vC(kI|Y^W)b|_SLlsooTVph*BHhskfgt4_(@->bm|_{qs|Tw(pQ_FNAp>K zp*0~lFbDaZ-$N#p?7C>;Wa)9matDVy_tVImr4z;3o`Uz^UPasw<`e$_K~D?fQ~bi= z{_bkK{Qm%{Hav@tk^Y+F=r2>E;iJ0Ux}OC*Ng9O%`Gv>*+|tITY=7mq6aB>s`u_k> z>~qWXasE>8`R@JI0_QHqV{>+iwDp0W=Ujlgx1Dto%RELIl$i+twW zPr_4p1oaP10qR?nhhB0cneyo?OOrx(?2+a$JCZo|sQj9K3|gJO1m>QVOOkGrusN^w zs}RuInYJKpcI>0&KIP z0gMs{Nc{kMXgf-14MdA85{v|Z_~$UEk`;s9nzGh&9Mqc9)n$nt$9#qfO=x)VA8cB_TfM%?Seo1N>=n&m3<}t1xvK<&sS}w!UBdF0@n$5j8rSkkM^zvap~? z+;S?~{Tb=HjZLtWl9q-_loCC)A4Nu*o4d}+it{N9kPC^%Ng|ld_@f&0gJM3G=+7JcfWM2~bu+ z$j^0WT%FIHjZ=TMKM5^NL+X>{%U;G3;0ic9`p$hdr1R7I?XCJGkGi*N;$G(tSQR`{ z;WE42hyMVkW7dA^_qgx>08GcM{ncg9%Imj!pG|Y0kO%pKzf77QBPhB&e=uKDY4;tn zyCAsAoQeyn*(W4?A;YuisoFPDKQbDlDD#sUPIeT34uRG_Q0wg#V<5#`VC*f9g@U));e~Uq!i{Vf` zDyEZjDkptVu4z9Cqd6^SauP}H>aBQw9M=d=POd3#%YJp)Pa&3DTdugEls4K*l%jY5 z2J&lM&|I1>+WS|XM#_At7+h#7-_0JSxzttIo~|c<3=uLUvie>_t8LUArASdpN$w+s zXnI?g3&p4+E$43M2yO~mz4@`+uD=C(#dUCWvB$gQ)nNvekj(2H9x9FkD3)4$t8 zxBVSuzo4q+Y>y#CejE*zkIPQ~0A6Y(n^;-33t?8-=v$b{VSNWv`6rK~HEfw>2PsiW z?lrEg=B$n^&86~PD%-TZH-D29aS`Q?akJgDIDTzVN}sAU>BaiW`;o@)D}>=G@yN*a z;{R9UV#2)N2^!|)t;Wcznj zt{W-ZM9IgvJ6HG37h%0^8J6Y)s^8BtIko4-JbgK!nOvRTEkvtN-jrMFk1Kb#dQYfU zDfAWSX6$`(exz8aERD*!pUWS8M|CbIh`EnWXlhm3{{UliX8CG;r2ha+*P46|^q*Z# zho~*nfDPQ9X=mU$grB;yO9bP}HAgfh!z>6>nF&?MH_wxbjIxjQ z={~BeLk5;X7~7lcsOm~adQ#7cr`57^iIPv`nvSxLxw5}DtbjD2pZ2d^HbZ~y9=cgl zI;BZIDE|NoK_jX|{{Wd*0nu`dJ{tc3M6bG>P1Y#lw?9I^y0|Vd^Zs_<=TnKf!~XzW zZ}Y14@?x*&`~ITQd4@h4zo1{;PNw+`e<=6p7xz*Vb%Hp~Xns+rQ+T(J{K22jHAf%# zzu(v?{MYa9DL43I`Dee~*Y{JY)Nsf1mxun{e{~@@t8n9SXY=XA-mQQ7lz&R-Y5xF= z{r!Wh{!91w6g4yv{O#ZOb^X-hbxa@i$3NZI{{T%l?$xjV08@?nO0T+Hk#SOOkY=SV zEd|+t4Zv~lWnrT+fFl=Q2=-}NJ1wGo(o(o)Ky#8FMXwG4CeI@}}T zpb&g^M_H*{lIgWyH9@<}Npds=2MJ#OapS!p=Z-h1_zv0)lH<4Avk_oNP;8XPha7Jy zPIm*|NhF@qO?@k*Yz%3QKN{N61hzKbaC|opoUgEV0bM*$QB=4`H!R!wu$faDhK;8d zjjUuPOWDmtX8wzV&qMM~X(>ij_<1Wx8Q;~O88tTBE;lx{y6b6L)xljus)RXA)Csle zTbza#kmJY@V-)Vnmyn`76SeHCIU=!(Mss18biHcZ#-z3hp>IB>cZn3}x>%sA4{{YiMT589Cx7pZkBuk@hts!TcO41ZaR6PgVT-1q5PH0*1x`^xf=2~w{rBUe|;@tm-9r; z;!eNZ6loXao22O1F!5Z5!B;*AQj^24)lOTKu@lMMR$YpenRJR=oLPko6ILB>gn(8uc+9u~68{Tcd zC~F*cn>$;*0FO;sX&z8!Ptum>wi$6jklNI&p4hqGTGU2D;)(O@L2BzdJRqF z>MoDErQ1E)*vFT>o?=q`<>%mncq_BEm#M8SUE%X(LAed&u=mA1hvc5I>8#PGUSdm( zx|K62g<)vU^pU`W?FNMMNxOU<&wzB@222m9iqnyI@Amn7ul|HQpz>zDJv6$*K(gUH z+R`wGJF)Or-N4p&+!7Tlpar} zX}OAdu%}_TuX>Y>$Kh&U@{1kK*pG49+aMoE`$ZKdCB`Y&AA@}l6oM1O zvl$eHIE8Ue$?Z?wS!7O7FZP6l->rU7$lc{E^iQmGPqK<_THeKY!daY_oj=J)_qBU> zA3>>UmeVNi5-0_wNCYV-igCxpbe$?81rY<=ATs&6=Y=K_8R9r zpfId0J&WJ%<58quIT|x5S`PVXC00wQeX+rpj=}cjC)rgmUT*f4^LLwchB~>#j054F z$IxqAT9U5R`f@({iuwxMQWSvWXNbnoJ|dGm5SJ*^kExt^W^TpuJiG>{?5pCr(KRT# z>Hc=F%{6Ip(Y7mw(vuvsA3v_)+K(J(u5Qr=Q95QOlWLP$>AoPN@~1Y(0`pw{W+zJYCqQI z{+!iz_ad^vBfHsNBLjf-R1b4o{{UN^{_trkx3H9zDqPT%jFM!X)G2$0-?1j)aBA5Q zyrhLVcQ%Z9%KNBd@cQ7eyJ>T3Yw8Z%Ru9ACmb??h{H@;vQj;YyHm?M@NKr+-4ydf{ z4lbPaC#&USu+&{WW`9v%n1YR>U0z*Z5);;IZY)|?%*B@`r}9RT$qtnOD^W^sq;lLq zM*+c0`v<1A8`5tc=Os2n88S-7(xjxRAw6RQtnR06u$h4kn2aDLrjQVnjsOa=Oz44) zgsD!gIVwv{sYgAVg=(#+$!_cg=QZPAd^;oHb{7%$nhw;nZraaNTkH|-+izPCx1})= zWyF_B-g0rc4UytaY4@!~9tN6~bqze&X{9V7_VyK#xB?FhoO4g*h5jBo!tL@nn zCY0Re>@C$0g6Sz6_W%z8;aYP>dVGmLseH1GSDL~zYSo=k=6;ZW)lII^tFiuNFZ0L! zRVZo;xBj620IYq~h3Xr=H(X!!kGisH>OYo0sh`Eg?=R41(F*?nA5lbI!W6`^lAuQK301&2664X;M`NUL0yGrKc9tgJgiA@`4Rge(0`gxJPm8gohhTgJ}hO zh~d0ejk1f4xyA~xA8ep2u#vd>4iBQW!ZVgOrj`7SJsw!8TwB}R$|5++OG?g+)NbR5 zc6!tgqJ(rOyAm#wH0`zaqPp9wQd*Lnk%Pm>ce1wVPp$Zkx~r&5)kMH>}?bZ!_Y@OvX?smMC7Sx9t zDoF>^-9*8SSF$#z+jEWhiht^!0d{{Z)6{{U4)cEzaOIWyq?fd2s1McJmFdtoNCxS{SB-GH*Rz4<_sJ{Rau-$9y_ zLSCdFFJ;Ki%w;>0+Dc2Q_@^fx6{yRat>bE|6zbgsX1V=GTMREvTQu~&43ySbN}4eE zvX)96WUaSSuC2;KhtNT-%GIc~P1jDci;k4{O_y8QQ|Xe}OX=+?Cy#G`Wo8<;Oh~m$ zcJps+y2FQRLqm#B#vg4bNNcH<2lk?`o`$oRV=V4HWS;;FXJ&J>CQg) z`ZUJ%riq^kPTbLCl@10+;T%Jyto0I;=Jpdpv9$ELYzLZLJZBhN=}8`gUrySucI`=Q z@`$TTjjU|C(!k1lQlb9s+zRpU~tx)QD=>c_kx(>jtJS+otb`JoHf&^?xl8 z>>T%}%|6PnT0(&YWOvklV_@wxqJ!PSne{Vc>Ku43W7suSoaav6k#J+GGl~gc<|yF) zNvDy@1iN&fBh)r|Fys$A2q5_Es|VC-wl&^dLISc`Ff}cz=m3c|rL6n{c2(*%F-sulBpMExrF6y5E<$nZbEke1dB*eL zcaL2|7TsB78G$vsjAZaYR^IQLtP$un2I)`I!=)@Q&q`I_!1~29q#*E7PjbfZSfKVb z!C}D7-rn?AI@FMH>O20LzK42&JvognwV#WeA5o&3B{j)0#_-t-NKy}O3QyTX^R%U+ zc2r|?4j+cW+4gYuRrcP_N=XFqBX(&#;P^=gyly{rN>7p|*9Lwr2eF^h{{V$3T!{EA zpYOlIo)0noGhdu3X?36JTm0c%f3euVn83Q9h|lT2!n^LWzoTdL-{Dd@=6|Md^M!lL z8UCHW&lNWIC)|(js?YLl{+s+N9J!9pQHG+1V}$dP9c_DCEsm}OdEJ55NU4)92#py_ zZN#|g=(dEMlB2_QQC%EVMvc2~ZSuZyW0}re>PCALlj$e0Q?S7|Z3)XdRGALrH_PSo ziAOfd5*uZq!3Z1&+D};|_14wNW|{XU%x;oViBiT=2;P)+R&$^H@aGcQDRfhe1--h7fS$s@MmxaVO9LtG{z$7IvL$ZwdHH zINjZxR&zU0KNjrajG#G`^Fa6USJ*ui@9y5|A~K?-mP(c}x4asprMYOwU|W@Ixc4$8 zrJN_q-`Xi1O{0MHsQW64*RazGNFV?^YM)YE)o|AiI0{ynU^0M4Q#6pPKX#0OLFt3)rxUGb5x&)BJ}^h$R+bCv z_lwI+Jp$UoQ_~BL7$I5g&UkOC!c%fd$MrHS5l;1fhKZ=%@@*>d&M?+Jm5OQggqIXf zGNOI8ta6Pj$n;dVI+EEw%E+{qv$#cume9mCB!TC!6i23=mQzvkEBnyg@QkD8bQ1^C zN1Q!$g}Mmyhpwy})$-g0be<$ny3{Ms%{{H~b9hl(3 z=?`6LI#%K3c6L>@k_HV2IjkjP{{W>~&(4Mq+fwvZ!1MB1At~BfSV{&GGIM}CD^3TI z-niG^%iz-ti%hQmMa0?5w(1GdGLYNBGaAog`YRrVq@T*DmtJE~S!P;g6mxQ$D9^=L z6syocHSK<<;zPIIVgz%{t#MVhkf2iHc825fn;yE(jz#tX)OJCdaq< zWFgdGrj@OOzD;IIxM{_}c_mYAjwy%Ho#O`-aT&LocojCFlepF)ZPZfj8qpUhZu5TX z#X{T|1tnmVD|d=P^wHyb;_ZwU;;it;v2o$oL@yTxfh^l1yil2S%Nq~gsi zQn-&&Yjk7!Z&S*(OiZ{=lsGpnv%y2KirlpRsD-GZbBw4V^pAaHo#;Pk@2rPd=_Rn@ z?zW_K{O4In^M`1myEXObvXLem%4R}4Kks<1<}9(v9Z@y6kE>cTZ8gL9Hceron7!@I zfw0BM4f))MyOr2`{dJU!B$wtr4c(a1**IG-bV&Q_QPbMyCE=t;n54#=!vypkKP2|| z8go)<=$e8!=RM|IV+_ei-k#&yp6^vzG@iyuc$lAumv3kL4hTYY@2JAVoXY!cyCGC=(syldn&Ec@(D_ouzLxs#%Z$u0K;_}btxvkfvu}g zN^TCq-Hn`eJSr63DLvJmZBftn!m8~~5{wdR!Qr@?)q5USJf1AgzC}aPmvlLhhE?F` ztCVQFyPN|F`ObUwbp;$&V|!*ibn= zy)Vsj7wP{1!kb?wxAcD}82COanXPYhXyL=2@M_v8DGw(JeH%%v6Y72;5B8_~`i3~V zSIwRrpWjvdlz-C-@#O0B#HKL*^ZY1=2DrD(P)|F}Ebt1e_O6`ba+d)IuQiq*9LtJY zdYD(x*AIu_n!Iwqxt@OEuT6QSq2Rjmbk3q@J0S=1t44m}n6TWHAscKqHkPD$z;@%W z(NQ5^Y`1(1eaVNGMtQxgEPhGfUCbR+bHiRYXs7V;#VX(eddq)#y@?SJ;(jWCP^w8< zCToPPyGdz%N@Zm}&K0o~(n!TOTQ9+nF=*A0H#FRb5Izg2+8(h}o48!q7@cc<7~-bW Vv^`smY2?yL&f~wvr-;be|Jhc`c_{z@ literal 0 HcmV?d00001 From 4517d583bcbabfdb3497b98dfdd8388a9714faf8 Mon Sep 17 00:00:00 2001 From: dnth Date: Mon, 3 Jul 2023 18:17:45 +0800 Subject: [PATCH 3/4] link to hf webpage --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2fd2afa..cbc322cd 100644 --- a/README.md +++ b/README.md @@ -389,7 +389,7 @@ View the API docs [here](https://visual-layer.readme.io/docs/v1-api). - Analyzing Hugging Face Datasets: In this tutorial learn how to load and analyze datasets from Hugging Face Datasets. + Analyzing Hugging Face Datasets: In this tutorial learn how to load and analyze datasets from Hugging Face Datasets. From 80dd279311ac9aeb695a7f1b600f9712c53f4622 Mon Sep 17 00:00:00 2001 From: dnth Date: Tue, 4 Jul 2023 10:57:37 +0800 Subject: [PATCH 4/4] update notebook to include profiler link --- examples/analyzing-hf-datasets.ipynb | 391 ++++++++++++--------------- 1 file changed, 180 insertions(+), 211 deletions(-) diff --git a/examples/analyzing-hf-datasets.ipynb b/examples/analyzing-hf-datasets.ipynb index 5a1f01f2..f5812f40 100644 --- a/examples/analyzing-hf-datasets.ipynb +++ b/examples/analyzing-hf-datasets.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "d1d92b2e", "metadata": {}, @@ -10,7 +9,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "731484b5", "metadata": {}, @@ -19,15 +17,14 @@ "\n", "This notebook shows how you can use fastdup to analyze any datasets from [Hugging Face Datasets](https://huggingface.co/docs/datasets/index).\n", "\n", - "We will analyze an image classification dataset for potential:\n", + "We will analyze an image classification dataset for:\n", "\n", - "+ Duplicates / near duplicates.\n", + "+ Duplicates / near-duplicates.\n", "+ Outliers.\n", "+ Wrong labels." ] }, { - "attachments": {}, "cell_type": "markdown", "id": "34d4d2db", "metadata": {}, @@ -46,7 +43,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "4dea523f", "metadata": {}, @@ -84,18 +80,17 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "40145087", "metadata": {}, "source": [ "## Load Dataset\n", "\n", - "Let's the Tiny ImageNet dataset from Hugging Face Datasets.\n", + "In this example we load the Tiny ImageNet dataset from [Hugging Face Datasets](https://huggingface.co/datasets)..\n", "\n", "Tiny ImageNet contains 100,000 images of 200 classes (500 for each class) downsized to 64×64 colored images. Each class has 500 training images, 50 validation images, and 50 test images.\n", "\n", - "Check out other datasets [here](https://huggingface.co/datasets)." + "Let's load the dataset into our local directory." ] }, { @@ -108,13 +103,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Downloading and preparing dataset imagefolder/default (download: 153.61 MiB, generated: 212.36 MiB, post-processed: Unknown size, total: 365.97 MiB) to /media/dnth/Active-Projects/dnth-fastdup/examples/images_dir/Maysee___parquet/Maysee--tiny-imagenet-35af7c46a941f08e/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7...\n" + "Downloading and preparing dataset imagefolder/default (download: 153.61 MiB, generated: 212.36 MiB, post-processed: Unknown size, total: 365.97 MiB) to /media/dnth/Active-Projects/dnth-fastdup/examples/images_dir/zh-plus___parquet/Maysee--tiny-imagenet-2eb6c3acd8ebc62a/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7...\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c16829ff53a346a981ec212bec1d2626", + "model_id": "f47d1b0421544101ae7fb91b655b553a", "version_major": 2, "version_minor": 0 }, @@ -128,7 +123,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8ec0d3315dd14607bdcfd37a539268cd", + "model_id": "2fc740ed2cf045ef80361b4d6d71e13f", "version_major": 2, "version_minor": 0 }, @@ -142,7 +137,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e169218ba2f0461d832a7453fc2e0948", + "model_id": "0d454d6396b444f0b4f01d66569e0eb0", "version_major": 2, "version_minor": 0 }, @@ -156,7 +151,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "aa1010646d2c42cc826218b2e1569ff5", + "model_id": "d3b388e21e2240c7893288afb9f89bcf", "version_major": 2, "version_minor": 0 }, @@ -199,13 +194,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "Dataset parquet downloaded and prepared to /media/dnth/Active-Projects/dnth-fastdup/examples/images_dir/Maysee___parquet/Maysee--tiny-imagenet-35af7c46a941f08e/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7. Subsequent calls will reuse this data.\n" + "Dataset parquet downloaded and prepared to /media/dnth/Active-Projects/dnth-fastdup/examples/images_dir/zh-plus___parquet/Maysee--tiny-imagenet-2eb6c3acd8ebc62a/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7. Subsequent calls will reuse this data.\n" ] } ], "source": [ "import datasets\n", - "dataset = datasets.load_dataset(\"Maysee/tiny-imagenet\", split=\"all\", cache_dir='images_dir')" + "dataset = datasets.load_dataset(\"zh-plus/tiny-imagenet\", split=\"all\", cache_dir='images_dir')" + ] + }, + { + "cell_type": "markdown", + "id": "be18cac4", + "metadata": {}, + "source": [ + "We can inspect the `dataset` object." ] }, { @@ -258,19 +261,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "e1078a54", "metadata": {}, "outputs": [ { "data": { - "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCABAAEADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDskuERD+8HTPBzSR3aSSr5TNjOGPrWQbmRbMqsy5IxtVdvFMsbtwzBiRkjLYqPrjfw6GWLzWpiYOnZJPc7S3QTKMc59KfcgQr+8JBFZ+j3UhjlZP3ihiPmIAH0qW+uhM4jYgD2Oa7JYmNSm2eQsM6ckjo4Y91hCUJB2Agj3qIJ5jNGSpdMMMZ6f0qSzmQWMYHVFCkfQUyN991xwdnzfn/+uuWUko3Z6kdGkTxzKQxZsADOTWbqk0T6LfTp8p+zsSCMEfKetXDIqSAHl1OxuevoazvFMwi8OXrYALKFJ9iQP60k9LocrWZ5zHdfaozFJMBIPu5GMipYA8Uu/cnyDpn71U7tYoZ1yQHU8Ov3T7GlBEc+4NkHnr3rz4rqccKV5GraQrPvneeRQrHKK2B65qaSbP3ScjjGf8/59Kp21zGkbq7BQT1pHuA5/dce561tG63Oz2MpT0OqsNXEdzB9plCRkbXOfyz/AJ71fl8S6NFLv+1kMD2jY/riuOtbCW5Oe3XJNU9RdbW8S1QbpW4z6ZFNzVrM7IYVzdrnpS6rZzK0sFzHKjEElWBwSOPp0rA8a3oXw0SDkSzBM/TJ/pXnd1e/Zbh3thPthZUlmyF2lugAzlvw9R06Vt381/f6MltdPmONshlAzkZGCO/X2rSE7RszCtQcW7O5gwXJjiMZPDAEjPGRUgl3tknpWaJ4dwPnISemCDmnqLq4DLbrhB959uTj2rne5lFKO5prKZCAe5q9HKIzyOvFc5LdQacPMY/ORgeppbTVLu+kYvCqRg/KwbFU03sdcKsep21tqYhiwMdOtYl80r6pHfRAFM/Nu7HvUUd7BawN5x38HAJxj61zep+NzaxtbW9uMPyRu6UowbN6ddU5XOou9Nsr+8F3vCI+HePzNqkjpuXocVV1rxLDZutrbzRSMuFJUggepJFcTca9c6hpkLQrIsruUfZwvAH88/oadpGlC1vXm1SOQx8bUH8R7Z9qv2a6mFbELojXhtoftyh1UAfxE45roYxdhTGjKEI4G3H8qxbjOC2C7Zz0qW01Ge2xufdjoh6is2rq5wyg3qipd6aJrvEku+aJskY4xTjItsGbI2rzkUpvI5llWVW3k7jIHA5+lUcPcoCsR8sH7xP4f5+ta/CjWnDXUoXN5e6kXKsYIgQFTHJHvVu1sbd1/wBKiR3xwfalldpWZdq5GCSox+B4FLPcyCMhnJQDaMDIX6Z5H4Uc9jZxiNtbnTrORolIKtztx3/zipjqJdwq4aJeEVu1UtisiMNvA5KnJJ605Y/LYkyE44KsuCvPenzXBU0f/9k=", - "image/png": "", + "image/png": "", "text/plain": [ "" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -281,7 +283,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "07daca49", "metadata": {}, "outputs": [ @@ -291,7 +293,7 @@ "0" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -301,7 +303,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "61b315c3", "metadata": {}, @@ -313,16 +314,15 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "eb3c000c", "metadata": {}, "outputs": [], "source": [ - "!wget -q https://huggingface.co/datasets/Maysee/tiny-imagenet/raw/main/classes.py" + "!wget -q https://huggingface.co/datasets/zh-plus/tiny-imagenet/raw/main/classes.py" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "90212bfd", "metadata": {}, @@ -332,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "79569736", "metadata": {}, "outputs": [ @@ -358,27 +358,26 @@ ] }, { - "cell_type": "code", - "execution_count": 11, - "id": "ac4fcdb4", + "cell_type": "markdown", + "id": "edb6463d", "metadata": {}, - "outputs": [], "source": [ - "from classes import i2d" + "Now we can get the class names by providing the class id. For example" ] }, { - "attachments": {}, - "cell_type": "markdown", - "id": "edb6463d", + "cell_type": "code", + "execution_count": 10, + "id": "ac4fcdb4", "metadata": {}, + "outputs": [], "source": [ - "Now we can get the class names by providing the class id. For example" + "from classes import i2d" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "9b000a73", "metadata": {}, "outputs": [ @@ -388,7 +387,7 @@ "'entity'" ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -398,7 +397,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "2f6c5990", "metadata": {}, @@ -407,7 +405,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "69319cf7", "metadata": {}, @@ -417,14 +414,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "2913137d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1f3b91c9e3374d3da6343675f042f8ea", + "model_id": "c2a25af3fb184591a8d35273b93d2c54", "version_major": 2, "version_minor": 0 }, @@ -454,7 +451,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "6aac94ea", "metadata": {}, @@ -464,7 +460,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "id": "8e90af72", "metadata": {}, "outputs": [ @@ -474,20 +470,20 @@ "text": [ "Warning: fastdup create() without work_dir argument, output is stored in a folder named work_dir in your current working path.\n", "FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n", - "2023-07-03 17:39:16 [INFO] Going to loop over dir images_dir/images\n", - "2023-07-03 17:39:16 [INFO] Found total 110000 images to run on, 110000 train, 0 test, name list 110000, counter 110000 \n", - "2023-07-03 17:42:54 [INFO] Found total 110000 images to run onmated: 0 Minutes\n", - "Finished histogram 21.580\n", - "Finished bucket sort 21.771\n", - "2023-07-03 17:43:18 [INFO] 24691) Finished write_index() NN model\n", - "2023-07-03 17:43:18 [INFO] Stored nn model index file work_dir/nnf.index\n", - "2023-07-03 17:43:32 [INFO] Total time took 255992 ms\n", - "2023-07-03 17:43:32 [INFO] Found a total of 54 fully identical images (d>0.990), which are 0.02 %\n", - "2023-07-03 17:43:32 [INFO] Found a total of 0 nearly identical images(d>0.980), which are 0.00 %\n", - "2023-07-03 17:43:32 [INFO] Found a total of 12656 above threshold images (d>0.900), which are 5.75 %\n", - "2023-07-03 17:43:32 [INFO] Found a total of 11001 outlier images (d<0.050), which are 5.00 %\n", - "2023-07-03 17:43:32 [INFO] Min distance found 0.597 max distance 1.000\n", - "2023-07-03 17:43:32 [INFO] Running connected components for ccthreshold 0.960000 \n", + "2023-07-04 10:46:16 [INFO] Going to loop over dir images_dir/images\n", + "2023-07-04 10:46:17 [INFO] Found total 110000 images to run on, 110000 train, 0 test, name list 110000, counter 110000 \n", + "2023-07-04 10:50:00 [INFO] Found total 110000 images to run onmated: 0 Minutes\n", + "Finished histogram 30.955\n", + "Finished bucket sort 31.144\n", + "2023-07-04 10:50:23 [INFO] 23053) Finished write_index() NN model\n", + "2023-07-04 10:50:23 [INFO] Stored nn model index file work_dir/nnf.index\n", + "2023-07-04 10:50:35 [INFO] Total time took 258529 ms\n", + "2023-07-04 10:50:35 [INFO] Found a total of 54 fully identical images (d>0.990), which are 0.02 %\n", + "2023-07-04 10:50:35 [INFO] Found a total of 0 nearly identical images(d>0.980), which are 0.00 %\n", + "2023-07-04 10:50:35 [INFO] Found a total of 12656 above threshold images (d>0.900), which are 5.75 %\n", + "2023-07-04 10:50:35 [INFO] Found a total of 11001 outlier images (d<0.050), which are 5.00 %\n", + "2023-07-04 10:50:35 [INFO] Min distance found 0.597 max distance 1.000\n", + "2023-07-04 10:50:35 [INFO] Running connected components for ccthreshold 0.960000 \n", ".0\n", " ########################################################################################\n", "\n", @@ -501,7 +497,7 @@ " For a detailed analysis, use `.connected_components()`\n", "(similarity threshold used is 0.9, connected component threshold used is 0.96).\n", "\n", - " Outliers: 6.36% (6,991) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n", + " Outliers: 6.36% (6,992) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n", " For a detailed list of outliers, use `.outliers()`.\n" ] }, @@ -511,7 +507,7 @@ "0" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -522,7 +518,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "676d9175", "metadata": {}, @@ -531,7 +526,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "1017106b", "metadata": {}, @@ -549,7 +543,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "id": "8f558b89", "metadata": { "scrolled": false @@ -559,7 +553,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 279.49it/s]\n" + "100%|███████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 292.24it/s]\n" ] }, { @@ -1081,7 +1075,7 @@ "

\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", "
\n", @@ -1096,11 +1090,11 @@ "\n", "\n", " From\n", - " /pole/93044.jpg\n", + " /mashed potato/87457.jpg\n", "\n", "\n", " To\n", - " /flagpole, flagstaff/104649.jpg\n", + " /meat loaf, meatloaf/90376.jpg\n", "\n", " \n", " \n", @@ -1109,7 +1103,7 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", "
\n", @@ -1124,11 +1118,11 @@ "\n", "\n", " From\n", - " /pop bottle, soda bottle/62847.jpg\n", + " /flagpole, flagstaff/104606.jpg\n", "\n", "\n", " To\n", - " /beer bottle/33675.jpg\n", + " /pole/93443.jpg\n", "\n", " \n", " \n", @@ -1137,7 +1131,7 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", "
\n", @@ -1152,11 +1146,11 @@ "\n", "\n", " From\n", - " /meat loaf, meatloaf/90376.jpg\n", + " /spider web, spider's web/70895.jpg\n", "\n", "\n", " To\n", - " /mashed potato/87457.jpg\n", + " /black widow, Latrodectus mactans/4204.jpg\n", "\n", " \n", " \n", @@ -1165,7 +1159,7 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", "
\n", @@ -1180,11 +1174,11 @@ "\n", "\n", " From\n", - " /flagpole, flagstaff/104606.jpg\n", + " /spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish/8525.jpg\n", "\n", "\n", " To\n", - " /pole/93443.jpg\n", + " /American lobster, Northern lobster, Maine lobster, Homarus americanus/8111.jpg\n", "\n", " \n", " \n", @@ -1221,7 +1215,7 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", "
\n", @@ -1236,11 +1230,11 @@ "\n", "\n", " From\n", - " /black stork, Ciconia nigra/100906.jpg\n", + " /banana/89072.jpg\n", "\n", "\n", " To\n", - " /goose/85379.jpg\n", + " /lemon/88830.jpg\n", "\n", " \n", " \n", @@ -1249,7 +1243,7 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", "
\n", @@ -1264,11 +1258,11 @@ "\n", "\n", " From\n", - " /wooden spoon/82149.jpg\n", + " /computer keyboard, keypad/42227.jpg\n", "\n", "\n", " To\n", - " /wok/81771.jpg\n", + " /desk/44869.jpg\n", "\n", " \n", " \n", @@ -1277,7 +1271,7 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", "
\n", @@ -1292,39 +1286,11 @@ "\n", "\n", " From\n", - " /walking stick, walkingstick, stick insect/17643.jpg\n", - "\n", - "\n", - " To\n", - " /mantis, mantid/18797.jpg\n", - "\n", - " \n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", + " \n", "\n", "\n", " \n", - " \n", + " \n", "\n", " \n", "
Info
Distance1.0
From/banana/89072.jpg/pop bottle, soda bottle/62558.jpg
To/lemon/88830.jpg/beer bottle/33640.jpg
\n", @@ -1333,7 +1299,7 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", "
\n", @@ -1348,11 +1314,11 @@ "\n", "\n", " From\n", - " /beer bottle/33973.jpg\n", + " /sock/69225.jpg\n", "\n", "\n", " To\n", - " /pop bottle, soda bottle/62815.jpg\n", + " /iPod/51355.jpg\n", "\n", " \n", " \n", @@ -1361,10 +1327,10 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -1376,11 +1342,11 @@ "\n", "\n", " \n", - " \n", + " \n", "\n", "\n", " \n", - " \n", + " \n", "\n", " \n", "
From/coral reef/109532.jpg/coral reef/95198.jpg
To/brain coral/6750.jpg/brain coral/6866.jpg
\n", @@ -1389,10 +1355,10 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -1404,11 +1370,11 @@ "\n", "\n", " \n", - " \n", + " \n", "\n", "\n", " \n", - " \n", + " \n", "\n", " \n", "
From/orange/99657.jpg/wooden spoon/82149.jpg
To/banana/89263.jpg/wok/81771.jpg
\n", @@ -1417,10 +1383,10 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -1432,11 +1398,11 @@ "\n", "\n", " \n", - " \n", + " \n", "\n", "\n", " \n", - " \n", + " \n", "\n", " \n", "
From/mashed potato/87495.jpg/banana/89386.jpg
To/meat loaf, meatloaf/90258.jpg/orange/109975.jpg
\n", @@ -1445,10 +1411,10 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -1460,11 +1426,11 @@ "\n", "\n", " \n", - " \n", + " \n", "\n", "\n", " \n", - " \n", + " \n", "\n", " \n", "
From/sock/69225.jpg/snail/7463.jpg
To/iPod/51355.jpg/slug/99073.jpg
\n", @@ -1473,10 +1439,10 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -1488,11 +1454,11 @@ "\n", "\n", " \n", - " \n", + " \n", "\n", "\n", " \n", - " \n", + " \n", "\n", " \n", "
From/pop bottle, soda bottle/62558.jpg/fur coat/104877.jpg
To/beer bottle/33640.jpg/miniskirt, mini/56033.jpg
\n", @@ -1501,10 +1467,10 @@ "
\n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -1516,11 +1482,11 @@ "\n", "\n", " \n", - " \n", + " \n", "\n", "\n", " \n", - " \n", + " \n", "\n", " \n", "
From/spider web, spider's web/70895.jpg/banana/89263.jpg
To/black widow, Latrodectus mactans/4204.jpg/orange/99657.jpg
\n", @@ -1548,7 +1514,7 @@ "0" ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1559,7 +1525,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "id": "de484e82", "metadata": { "scrolled": false @@ -1569,7 +1535,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 27503.63it/s]\n" + "100%|█████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 21743.41it/s]\n" ] }, { @@ -2120,7 +2086,7 @@ " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -2165,34 +2131,10 @@ " \n", "
\n", "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
Distance0.649107
Path/oboe, hautboy, hautbois/58272.jpg
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -2216,7 +2158,7 @@ " \n", " \n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2240,7 +2182,7 @@ " \n", " \n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2264,7 +2206,7 @@ " \n", " \n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2285,34 +2227,10 @@ " \n", "
\n", "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
Distance0.664507
Path/cardigan/39235.jpg
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -2336,7 +2254,7 @@ " \n", " \n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2360,7 +2278,7 @@ " \n", " \n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2381,10 +2299,10 @@ " \n", "
\n", "
\n", - " \n", + " \n", "
\n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2392,11 +2310,11 @@ " \n", "\n", " \n", - " \n", + " \n", "\n", "\n", " \n", - " \n", + " \n", "\n", " \n", "
Distance0.6691450.671368
Path/bannister, banister, balustrade, balusters, handrail/29014.jpg/nail/98461.jpg
\n", @@ -2408,7 +2326,7 @@ " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -2432,7 +2350,7 @@ " \n", " \n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2456,7 +2374,7 @@ " \n", " \n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2480,7 +2398,7 @@ " \n", " \n", "
\n", - "
\n", + "
\n", "
\n", " \n", " \n", @@ -2549,10 +2467,34 @@ " \n", "
\n", "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.681324
Path/cardigan/39235.jpg
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", " \n", "
\n", "
\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -2571,6 +2513,30 @@ " \n", " \n", " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
Info
Distance0.682701
Path/sock/69128.jpg
\n", + "
\n", + "
\n", + "
\n", " \n", "
\n", " \n", @@ -2592,7 +2558,7 @@ "0" ] }, - "execution_count": 17, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -2602,7 +2568,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "a4eb87fa", "metadata": {}, @@ -2620,6 +2585,10 @@ "\n", "- [**Cleaning Image Dataset**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/cleaning-image-dataset.ipynb) - Learn how to clean a dataset from broken images, duplicates, outliers, and identify dark/bright/blurry images.\n", "\n", + "- [**Try our free cloud product VL Profiler**](https://app.visual-layer.com) - VL Profiler is our first no-code commercial product that lets you visualize and inspect your dataset in your browser.\n", + "\n", + "[![image](https://raw.githubusercontent.com/visual-layer/fastdup/main/gallery/vl_profiler_promo.svg)](https://app.visual-layer.com)\n", + "\n", "As usual, feedback is welcome! Drop by our [Slack channel](https://visualdatabase.slack.com/join/shared_invite/zt-19jaydbjn-lNDEDkgvSI1QwbTXSY6dlA#/shared-invite/email) if you have questions!\n", "Happy learning 😀" ] @@ -2641,7 +2610,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.9.16" } }, "nbformat": 4,