visual-layer · amiralush · Jun 27, 2023 · Jun 3, 2023 · Jun 3, 2023 · Jun 3, 2023
diff --git a/.github/workflows/examples-ci.yml b/.github/workflows/examples-ci.yml
@@ -0,0 +1,164 @@
+name: Test Run Examples
+
+on: 
+  workflow_dispatch:
+  schedule:
+    - cron: '0 16 * * *' # Trigger at 4PM every day
+
+jobs:
+  test-quick-dataset-analysis:
+    runs-on: ${{ matrix.os }}
+    env:
+        SENTRY_OPT_OUT: True
+    strategy:
+        matrix:
+            os: [ubuntu-22.04]
+            python-version: ['3.9']
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+            fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install fastdup matplotlib
+
+      - name: Download dataset
+        run: |
+            wget "https://thor.robots.ox.ac.uk/~vgg/data/pets/images.tar.gz" -O "images.tar.gz"
+            tar xf "images.tar.gz"
+
+      - name: Run example
+        run: |
+          python .github/workflows/tests/quick_dataset_analysis.py
+
+      - name: Save artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: fastdup_work_dir_quick_dataset_analysis
+          path: fastdup_work_dir/
+
+  test-cleaning-image-dataset:
+    runs-on: ${{ matrix.os }}
+    env:
+        SENTRY_OPT_OUT: True
+    strategy:
+        matrix:
+            os: [ubuntu-latest]
+            python-version: ['3.9']
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+            fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install fastdup matplotlib
+
+      - name: Download dataset
+        run: |
+            wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
+            tar -xf food-101.tar.gz
+
+      - name: Run example
+        run: |
+          python .github/workflows/tests/cleaning_image_dataset.py
+
+      - name: Save artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: fastdup_work_dir_cleaning_image_dataset
+          path: fastdup_work_dir/
+
+  test-labeled-image-classification:
+    runs-on: ${{ matrix.os }}
+    env:
+        SENTRY_OPT_OUT: True
+    strategy:
+        matrix:
+            os: [ubuntu-latest]
+            python-version: ['3.9']
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+            fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install fastdup matplotlib
+
+      - name: Download dataset
+        run: |
+            wget https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
+            tar -xf imagenette2-160.tgz
+
+      - name: Run example
+        run: |
+          python .github/workflows/tests/labeled_image_classification.py
+
+      - name: Save artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: fastdup_work_dir_labeled_image_classification
+          path: fastdup_work_dir/
+
+  test-labeled-object-detection:
+    runs-on: ${{ matrix.os }}
+    env:
+        SENTRY_OPT_OUT: True
+    strategy:
+        matrix:
+            os: [ubuntu-latest]
+            python-version: ['3.9']
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+            fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install fastdup fastdup plotly gdown
+
+      - name: Download dataset
+        run: |
+            gdown --fuzzy https://drive.google.com/file/d/1iSXVTlkV1_DhdYpVDqsjlT4NJFQ7OkyK/view
+            unzip -qq coco_minitrain_25k.zip
+            cd coco_minitrain_25k/annotations && gdown --fuzzy https://drive.google.com/file/d/1i12p23cXlqp1QrXjAD_vu467r4q67Mq9/view
+
+      - name: Run example
+        run: |
+          python .github/workflows/tests/labeled_object_detection.py
+
+      - name: Save artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: fastdup_work_dir_labeled_object_detection
+          path: fastdup_work_dir/
diff --git a/.github/workflows/tests/cleaning_image_dataset.py b/.github/workflows/tests/cleaning_image_dataset.py
@@ -0,0 +1,12 @@
+import fastdup
+print(f'fastdup version: {fastdup.__version__}')
+
+fd = fastdup.create(work_dir="fastdup_work_dir/", input_dir="food-101/images/")
+fd.run(num_images=1000)
+
+fd.vis.duplicates_gallery(num_images=5)
+fd.vis.component_gallery(num_images=5)
+fd.vis.outliers_gallery(num_images=5)
+fd.vis.stats_gallery(metric='dark', num_images=5)
+fd.vis.stats_gallery(metric='bright', num_images=5)
+fd.vis.stats_gallery(metric='blur', num_images=5)
diff --git a/.github/workflows/tests/labeled_image_classification.py b/.github/workflows/tests/labeled_image_classification.py
@@ -0,0 +1,50 @@
+import pandas as pd
+data_dir = 'imagenette2-160/'
+csv_path = 'imagenette2-160/noisy_imagenette.csv'
+
+label_map = {
+    'n02979186': 'cassette_player', 
+    'n03417042': 'garbage_truck', 
+    'n01440764': 'tench', 
+    'n02102040': 'English_springer', 
+    'n03028079': 'church',
+    'n03888257': 'parachute', 
+    'n03394916': 'French_horn', 
+    'n03000684': 'chain_saw', 
+    'n03445777': 'golf_ball', 
+    'n03425413': 'gas_pump'
+}
+
+df_annot = pd.read_csv(csv_path)
+# take relevant columns
+df_annot = df_annot[['path', 'noisy_labels_0']]
+
+# rename columns to fastdup's column names
+df_annot = df_annot.rename({'noisy_labels_0': 'label', 'path': 'filename'}, axis='columns')
+
+# append datadir
+df_annot['filename'] = df_annot['filename'].apply(lambda x: data_dir + x)
+
+# create split column
+df_annot['split'] = df_annot['filename'].apply(lambda x: x.split("/")[1])
+
+# map label ids to regular labels
+df_annot['label'] = df_annot['label'].map(label_map)
+
+
+import fastdup
+print(f'fastdup version: {fastdup.__version__}')
+
+work_dir = 'fastdup_work_dir'
+fd = fastdup.create(work_dir=work_dir, input_dir=data_dir) 
+fd.run(annotations=df_annot, ccthreshold=0.9, threshold=0.8)
+
+fd.vis.duplicates_gallery(num_images=5)
+fd.vis.component_gallery(num_images=5)
+fd.vis.component_gallery(slice='chain_saw')
+fd.vis.outliers_gallery(num_images=5)
+fd.vis.similarity_gallery() 
+
+fd.vis.stats_gallery(metric='dark', num_images=5)
+fd.vis.stats_gallery(metric='bright', num_images=5)
+fd.vis.stats_gallery(metric='blur', num_images=5)
diff --git a/.github/workflows/tests/labeled_object_detection.py b/.github/workflows/tests/labeled_object_detection.py
@@ -0,0 +1,21 @@
+import fastdup
+print(f'fastdup version: {fastdup.__version__}')
+
+import pandas as pd
+coco_csv = 'coco_minitrain_25k/annotations/coco_minitrain2017.csv'
+coco_annotations = pd.read_csv(coco_csv, header=None, names=['filename', 'col_x', 'row_y',
+                                                             'width', 'height', 'label', 'ext'])
+
+coco_annotations['split'] = 'train'  # Only train files were loaded
+coco_annotations['filename'] = coco_annotations['filename'].apply(lambda x: 'coco_minitrain_25k/images/train2017/'+x)
+coco_annotations = coco_annotations.drop_duplicates()
+
+input_dir = '.'
+work_dir = 'fastdup_work_dir'
+
+fd = fastdup.create(work_dir=work_dir, input_dir=input_dir)
+fd.run(annotations=coco_annotations, overwrite=True, num_images=10000)
+
+fd.vis.component_gallery(metric='size', max_width=900)
+fd.vis.outliers_gallery()
+fd.vis.component_gallery(num_images=25, slice='diff')
diff --git a/.github/workflows/tests/quick_dataset_analysis.py b/.github/workflows/tests/quick_dataset_analysis.py
@@ -0,0 +1,11 @@
+import fastdup
+print(f'fastdup version: {fastdup.__version__}')
+
+fd = fastdup.create(work_dir="fastdup_work_dir/", input_dir="images/")
+fd.run(num_images=10000)
+
+fd.vis.duplicates_gallery()
+fd.vis.outliers_gallery() 
+fd.vis.stats_gallery(metric='dark')
+fd.vis.component_gallery()
+fd.vis.similarity_gallery()