diff --git a/README.md b/README.md index c4f4618..608a5d7 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ - - fastdup logo. + + fastdup logo.
@@ -24,7 +24,7 @@ -[pypi-shield]: https://img.shields.io/badge/Python-3.8%20|%203.9%20|%203.10%20|%203.11|%203.12-blue?style=for-the-badge +[pypi-shield]: https://img.shields.io/badge/Python-3.8%20|%203.9%20|%203.10%20|%203.11-blue?style=for-the-badge [pypi-url]: https://pypi.org/project/fastdup/ [pypiversion-shield]: https://img.shields.io/pypi/v/fastdup?style=for-the-badge&color=lightblue [downloads-shield]: https://img.shields.io/pepy/dt/fastdup?style=for-the-badge&color=success @@ -34,12 +34,12 @@ [license-shield]: https://img.shields.io/badge/License-CC%20BY--NC--ND%204.0-purple.svg?style=for-the-badge [license-url]: https://github.com/visual-layer/fastdup/blob/main/LICENSE [os-shield]: https://img.shields.io/badge/Supported%20OS-macOS%20%7C%20Linux%20%7C%20Windows%20-yellow?style=for-the-badge -[os-url]: https://visual-layer.readme.io/docs/installation +[os-url]: https://visual-layer.readme.io/docs/quickstart

- An unsupervised and free tool for image and video dataset analysis founded by the authors of XGBoost, Apache TVM & Turi Create - Danny Bickson, Carlos Guestrin and Amir Alush.

+ A powerful open-source tool for analyzing image and video datasets founded by the authors of XGBoost, Apache TVM & Turi Create - Danny Bickson, Carlos Guestrin and Amir Alush.


Documentation ยท @@ -111,7 +111,12 @@ fd.vis.stats_gallery() # gallery of image statistics (e.g. blur, brightn fd.vis.similarity_gallery() # gallery of similar images ``` -![results](./gallery/gifl_fastdup_quickstart_V1_optimized.gif) +## Check this [quickstart tutorial](https://youtu.be/Gt46ciEIxtw) for more info + + ## Features & Advantages diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb index c615ff5..aabf5e2 100644 --- a/examples/quickstart.ipynb +++ b/examples/quickstart.ipynb @@ -59,18 +59,23 @@ "\n", "[![Open in Colab](https://img.shields.io/badge/Open%20in%20Colab-blue?style=for-the-badge&logo=google-colab&labelColor=gray)](https://colab.research.google.com/github/visual-layer/fastdup/blob/main/examples/quickstart.ipynb)\n", "[![Open in Kaggle](https://img.shields.io/badge/Open%20in%20Kaggle-blue?style=for-the-badge&logo=kaggle&labelColor=gray)](https://kaggle.com/kernels/welcome?src=https://github.com/visual-layer/fastdup/blob/main/examples/quickstart.ipynb)\n", - "[![Explore the Docs](https://img.shields.io/badge/Explore%20the%20Docs-blue?style=for-the-badge&labelColor=gray&logo=read-the-docs)](https://visual-layer.readme.io/docs/quickstart)\n", + "[![Explore the Docs](https://img.shields.io/badge/Explore%20the%20Docs-blue?style=for-the-badge&labelColor=gray&logo=read-the-docs)](https://docs.visual-layer.com/docs/getting-started-with-fastdup)\n", "\n", - "This notebook shows how to quickly analyze an image dataset for potential issues using [fastdup](https://github.com/visual-layer/fastdup). We'll take you on a high-level tour showcasing the core functions of fastdup in the shortest time.\n", + "Welcome to the fastdup Quickstart Guide! ๐ŸŽ‰\n", "\n", - "By the end of this notebook, you will learn how to find out if your dataset has issues such as:\n", + "This notebook demonstrates how to efficiently analyze an image dataset for potential issues using [fastdup](https://github.com/visual-layer/fastdup), a powerful tool designed for image and video dataset exploration.\n", "\n", - "+ Broken images.\n", - "+ Duplicates/near-duplicates.\n", - "+ Outliers.\n", - "+ Dark/bright/blurry images.\n", + "### Objectives\n", + "By the end of this tutorial, you'll be able to:\n", + "- Detect and identify **broken images**.\n", + "- Spot **duplicates** or **near-duplicates** within your dataset.\n", + "- Discover **outliers** that may affect model performance.\n", + "- Find **dark, bright, or blurry images** for potential quality adjustments.\n", "\n", - "We'll also visualize clusters of visually similar images to provide a bird's-eye view and help you understand the data's structure for further analysis." + "### What's Included\n", + "In addition to identifying dataset issues, this guide will help you:\n", + "- Visualize **clusters of visually similar images**, enabling a high-level understanding of your dataset's structure.\n", + "- Learn the core functionalities of fastdup with simple, step-by-step examples." ] }, { @@ -89,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "8e6dd3e6-0f72-456b-9b16-2e53d5d5c099", "metadata": {}, "outputs": [], @@ -107,25 +112,37 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "e301485f", "metadata": { "id": "e301485f", "tags": [] }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/or_barsheshet/Library/Python/3.9/lib/python/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, { "data": { "text/plain": [ - "'2.0.17'" + "'2.14'" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "import os \n", + "os.environ['JPY_PARENT_PID'] = '1'\n", + "\n", + "# Verify fastdup installation\n", "import fastdup\n", "fastdup.__version__" ] @@ -157,7 +174,16 @@ "execution_count": null, "id": "d91abfc1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "zsh:1: command not found: wget\n", + "tar: Error opening archive: Failed to open 'images.tar.gz'\n" + ] + } + ], "source": [ "!wget https://thor.robots.ox.ac.uk/~vgg/data/pets/images.tar.gz -O images.tar.gz\n", "!tar xf images.tar.gz" @@ -179,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "id": "fe4d8211-89b2-4a2f-91f4-8074d2314aef", "metadata": {}, "outputs": [ @@ -206,7 +232,7 @@ " 1. Analyze your dataset with the \u001b[0;35m.run()\u001b[0m function of the dataset object\n", " 2. Interactively explore your data on your local machine with the \u001b[0;35m.explore()\u001b[0m function of the dataset object\n", "\n", - "For more information, use \u001b[0;35mhelp(fastdup)\u001b[0m or check our documentation [link].\n", + "For more information, use \u001b[0;35mhelp(fastdup)\u001b[0m or check our documentation https://docs.visual-layer.com/docs/getting-started-with-fastdup.\n", "\n" ] } @@ -229,65 +255,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "beac4c50-3084-47fe-9b22-b14c3d3cb139", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "fastdup By Visual Layer, Inc. 2024. All rights reserved.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Initializing data [โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– โ– ] 100% Estimated: 0 Minutes\r" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Done: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3/3 [01:20<00:00, 26.86s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Analysis complete. Use the \u001b[0;35m.explore()\u001b[0m function to interactively explore your data on your local machine.\n", - "\n", - "Alternatively, you can generate HTML-based galleries.\n", - "For more information, use \u001b[0;35mhelp(fastdup)\u001b[0m or check our documentation [link].\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "fd.run()" + "fd.run(overwrite=True)" ] }, { @@ -469,6 +444,62 @@ "fd.invalid_instances()" ] }, + { + "cell_type": "markdown", + "id": "98a0333c", + "metadata": {}, + "source": [ + "## Interactive Exploration\n", + "In addition to the static visualizations presented above, fastdup also offers interactive exploration of the dataset.\n", + "\n", + "To explore the dataset and issues interactively in a browser, run:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f2298e8", + "metadata": {}, + "outputs": [], + "source": [ + "fd.explore()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> ๐Ÿ—’ **Note** - This currently requires you to sign-up (for free) to view the interactive exploration. Alternatively, you can visualize fastdup in a non-interactive way using fastdup's built in galleries shown in the upcoming cells.\n", + "\n", + "You'll be presented with a web interface that lets you conveniently view, filter, and curate your dataset in a web interface.\n", + "\n", + "\n", + "![image.png](https://vl-blog.s3.us-east-2.amazonaws.com/fastdup_assets/cloud_preview.gif)" + ] + }, + { + "cell_type": "markdown", + "id": "c1330de5", + "metadata": {}, + "source": [ + "## Visualize Image Clusters\n", + "\n", + "One of fastdup's coolest features is visualizing image clusters. In this section, we group similar-looking images (or even duplicates) as a cluster and visualize them in the gallery.\n", + "\n", + "To do so, run:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e56690a", + "metadata": {}, + "outputs": [], + "source": [ + "fd.vis.component_gallery()" + ] + }, { "cell_type": "markdown", "id": "22e04b25-0fe7-409d-8bd9-3b92c2ec8c5b", @@ -2780,11 +2811,11 @@ "id": "789da241-e9cd-4568-9d19-aa5c80567415", "metadata": {}, "source": [ - "## Dark, Bright and Blurry Images\n", + "## Blurry, Dark and Bright Images\n", "\n", "fastdup also lets you visualize images from your dataset using statistical metrics.\n", "\n", - "For example, with `metric='dark'` we can visualize the darkest images from the dataset." + "For example, with `metric='blur'` we can visualize the blur images from the dataset." ] }, { @@ -6306,1318 +6337,6 @@ "fd.vis.stats_gallery(metric='blur')" ] }, - { - "cell_type": "markdown", - "id": "a6808750-d5d7-44bc-a6b0-aa985255407b", - "metadata": { - "tags": [] - }, - "source": [ - "## Visualize Image Clusters\n", - "\n", - "One of fastdup's coolest features is visualizing image clusters. In the previous section, we saw how to visualize similar image pairs. In this section, we group similar-looking images (or even duplicates) as a cluster and visualize them in the gallery.\n", - "\n", - "To do so, run:\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "2bcd09c1", - "metadata": {}, - "source": [ - "> **Note**: fastdup uses default parameter values when creating image clusters. Depending on your data and use case, the best value may vary. Read more [here](https://visual-layer.readme.io/docs/dataset-cleanup) on how to change parameter values to cluster images." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "2cc2b317-e92e-4e40-9655-0a6b7c569dfa", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cc097b9e33aa43febbe2b6950de2ce9e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Generating gallery: 0%| | 0/20 [00:00\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " Components Report\n", - " \n", - " \n", - "\n", - "\n", - "\n", - "
\n", - "
\n", - "
\n", - " \n", - " \"logo\"\n", - " \n", - "
\n", - " \n", - "\n", - "
\n", - "
\n", - "
\n", - " For the new and interactive data exploration\n", - " \n", - " Read more \n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - " fastdup.explore()\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "

Components Report

Showing groups of similar images

\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component599
num_images3
mean_distance1.0
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component606
num_images3
mean_distance0.965826
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component12
num_images2
mean_distance0.968095
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4593
num_images2
mean_distance0.999985
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4656
num_images2
mean_distance0.999894
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4649
num_images2
mean_distance0.999973
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4648
num_images2
mean_distance0.999969
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4636
num_images2
mean_distance0.999947
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4616
num_images2
mean_distance0.999967
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4597
num_images2
mean_distance0.999982
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4158
num_images2
mean_distance0.96041
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4235
num_images2
mean_distance0.99994
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4749
num_images2
mean_distance0.999988
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4046
num_images2
mean_distance0.999951
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4044
num_images2
mean_distance0.999976
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4013
num_images2
mean_distance0.999985
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component3642
num_images2
mean_distance1.0
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component3621
num_images2
mean_distance1.0
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4747
num_images2
mean_distance0.999986
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
Info
component4790
num_images2
mean_distance0.999705
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - " \n", - "
\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fd.vis.component_gallery()" - ] - }, - { - "cell_type": "markdown", - "id": "98a0333c", - "metadata": {}, - "source": [ - "## Interactive Exploration\n", - "In addition to the static visualizations presented above, fastdup also offers interactive exploration of the dataset.\n", - "\n", - "To explore the dataset and issues interactively in a browser, run:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f1c8b89-cf96-4130-b09e-b257904445d1", - "metadata": {}, - "outputs": [], - "source": [ - "fd.explore()" - ] - }, - { - "cell_type": "markdown", - "id": "609b7114-9bae-46f5-be4d-0b86c920770e", - "metadata": {}, - "source": [ - "> ๐Ÿ—’ **Note** - This currently requires you to sign-up (for free) to view the interactive exploration. Alternatively, you can visualize fastdup in a non-interactive way using fastdup's built in galleries shown in the upcoming cells.\n", - "\n", - "You'll be presented with a web interface that lets you conveniently view, filter, and curate your dataset in a web interface.\n", - "\n", - "\n", - "![image.png](https://vl-blog.s3.us-east-2.amazonaws.com/fastdup_assets/cloud_preview.gif)" - ] - }, { "cell_type": "markdown", "id": "6c3135e1", @@ -7637,7 +6356,6 @@ "\n", "Next, feel free to check out other tutorials -\n", "\n", - "+ โšก [**Quickstart**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/quick-dataset-analysis.ipynb): Learn how to install fastdup, load a dataset and analyze it for potential issues such as duplicates/near-duplicates, broken images, outliers, dark/bright/blurry images, and view visually similar image clusters. If you're new, start here!\n", "+ ๐Ÿงน [**Clean Image Folder**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/cleaning-image-dataset.ipynb): Learn how to analyze and clean a folder of images from potential issues and export a list of problematic files for further action. If you have an unorganized folder of images, this is a good place to start.\n", "+ ๐Ÿ–ผ [**Analyze Image Classification Dataset**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/analyzing-image-classification-dataset.ipynb): Learn how to load a labeled image classification dataset and analyze for potential issues. If you have labeled ImageNet-style folder structure, have a go!\n", "+ ๐ŸŽ [**Analyze Object Detection Dataset**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/analyzing-object-detection-dataset.ipynb): Learn how to load bounding box annotations for object detection and analyze for potential issues. If you have a COCO-style labeled object detection dataset, give this example a try. \n", @@ -7682,7 +6400,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -7696,7 +6414,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.1.undefined" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/examples/work_dir/config.json b/examples/work_dir/config.json new file mode 100644 index 0000000..dacc3ce --- /dev/null +++ b/examples/work_dir/config.json @@ -0,0 +1,27 @@ +{ + "input_dir": "images", + "work_dir": "work_dir", + "test_dir": "", + "compute": "cpu", + "verbose": false, + "num_threads": -1, + "num_images": 0, + "turi_param": "nnmodel=0,ccthreshold=0.96,run_cc=1,run_sentry=1,delete_tar=0,delete_img=0,tar_only=0,run_stats=1,run_stats_only=0,run_advanced_stats=0,sync_s3_to_local=0,store_int=1,shorten_filenames=0,save_crops=0,augmentation_horiz=0.2,augmentation_vert=0.2,augmentation_additive_margin=0,num_onnx_inter_threads=0,num_onnx_intra_threads=0,is_clip14_model=0,min_input_image_height=10,min_input_image_width=10,save_thumbnails=0,find_regex=,no_sort=0,quiet=0,fastdup_ocr_lang=en,fastdup_ocr_no_crop=0,global_log_error_level=3", + "distance": "cosine", + "threshold": 0.9, + "lower_threshold": 0.05, + "model_path": "/Users/or_barsheshet/Library/Python/3.9/lib/python/site-packages/fastdup/UndisclosedFastdupModel.ort", + "version": false, + "nearest_neighbors_k": 2, + "d": 576, + "run_mode": 0, + "nn_provider": "nnf", + "min_offset": 0, + "max_offset": 0, + "nnf_mode": "HNSW32", + "nnf_param": "", + "bounding_box": "", + "batch_size": 1, + "resume": 0, + "high_accuracy": false +} \ No newline at end of file diff --git a/gallery/Logo-fastdup-by-VL.png b/gallery/Logo-fastdup-by-VL.png new file mode 100644 index 0000000..36a1dd9 Binary files /dev/null and b/gallery/Logo-fastdup-by-VL.png differ