Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions .github/workflows/examples-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
name: Test Run Examples

on:
workflow_dispatch:
schedule:
- cron: '0 16 * * *' # Trigger at 4PM every day

jobs:
test-quick-dataset-analysis:
runs-on: ${{ matrix.os }}
env:
SENTRY_OPT_OUT: True
strategy:
matrix:
os: [ubuntu-22.04]
python-version: ['3.9']
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install fastdup matplotlib

- name: Download dataset
run: |
wget "https://thor.robots.ox.ac.uk/~vgg/data/pets/images.tar.gz" -O "images.tar.gz"
tar xf "images.tar.gz"

- name: Run example
run: |
python .github/workflows/tests/quick_dataset_analysis.py

- name: Save artifacts
uses: actions/upload-artifact@v3
with:
name: fastdup_work_dir_quick_dataset_analysis
path: fastdup_work_dir/

test-cleaning-image-dataset:
runs-on: ${{ matrix.os }}
env:
SENTRY_OPT_OUT: True
strategy:
matrix:
os: [ubuntu-latest]
python-version: ['3.9']
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install fastdup matplotlib

- name: Download dataset
run: |
wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
tar -xf food-101.tar.gz

- name: Run example
run: |
python .github/workflows/tests/cleaning_image_dataset.py

- name: Save artifacts
uses: actions/upload-artifact@v3
with:
name: fastdup_work_dir_cleaning_image_dataset
path: fastdup_work_dir/

test-labeled-image-classification:
runs-on: ${{ matrix.os }}
env:
SENTRY_OPT_OUT: True
strategy:
matrix:
os: [ubuntu-latest]
python-version: ['3.9']
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install fastdup matplotlib

- name: Download dataset
run: |
wget https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
tar -xf imagenette2-160.tgz

- name: Run example
run: |
python .github/workflows/tests/labeled_image_classification.py

- name: Save artifacts
uses: actions/upload-artifact@v3
with:
name: fastdup_work_dir_labeled_image_classification
path: fastdup_work_dir/

test-labeled-object-detection:
runs-on: ${{ matrix.os }}
env:
SENTRY_OPT_OUT: True
strategy:
matrix:
os: [ubuntu-latest]
python-version: ['3.9']
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install fastdup fastdup plotly gdown

- name: Download dataset
run: |
gdown --fuzzy https://drive.google.com/file/d/1iSXVTlkV1_DhdYpVDqsjlT4NJFQ7OkyK/view
unzip -qq coco_minitrain_25k.zip
cd coco_minitrain_25k/annotations && gdown --fuzzy https://drive.google.com/file/d/1i12p23cXlqp1QrXjAD_vu467r4q67Mq9/view

- name: Run example
run: |
python .github/workflows/tests/labeled_object_detection.py

- name: Save artifacts
uses: actions/upload-artifact@v3
with:
name: fastdup_work_dir_labeled_object_detection
path: fastdup_work_dir/
12 changes: 12 additions & 0 deletions .github/workflows/tests/cleaning_image_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import fastdup
print(f'fastdup version: {fastdup.__version__}')

fd = fastdup.create(work_dir="fastdup_work_dir/", input_dir="food-101/images/")
fd.run(num_images=1000)

fd.vis.duplicates_gallery(num_images=5)
fd.vis.component_gallery(num_images=5)
fd.vis.outliers_gallery(num_images=5)
fd.vis.stats_gallery(metric='dark', num_images=5)
fd.vis.stats_gallery(metric='bright', num_images=5)
fd.vis.stats_gallery(metric='blur', num_images=5)
50 changes: 50 additions & 0 deletions .github/workflows/tests/labeled_image_classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pandas as pd
data_dir = 'imagenette2-160/'
csv_path = 'imagenette2-160/noisy_imagenette.csv'

label_map = {
'n02979186': 'cassette_player',
'n03417042': 'garbage_truck',
'n01440764': 'tench',
'n02102040': 'English_springer',
'n03028079': 'church',
'n03888257': 'parachute',
'n03394916': 'French_horn',
'n03000684': 'chain_saw',
'n03445777': 'golf_ball',
'n03425413': 'gas_pump'
}

df_annot = pd.read_csv(csv_path)
# take relevant columns
df_annot = df_annot[['path', 'noisy_labels_0']]

# rename columns to fastdup's column names
df_annot = df_annot.rename({'noisy_labels_0': 'label', 'path': 'filename'}, axis='columns')

# append datadir
df_annot['filename'] = df_annot['filename'].apply(lambda x: data_dir + x)

# create split column
df_annot['split'] = df_annot['filename'].apply(lambda x: x.split("/")[1])

# map label ids to regular labels
df_annot['label'] = df_annot['label'].map(label_map)


import fastdup
print(f'fastdup version: {fastdup.__version__}')

work_dir = 'fastdup_work_dir'
fd = fastdup.create(work_dir=work_dir, input_dir=data_dir)
fd.run(annotations=df_annot, ccthreshold=0.9, threshold=0.8)

fd.vis.duplicates_gallery(num_images=5)
fd.vis.component_gallery(num_images=5)
fd.vis.component_gallery(slice='chain_saw')
fd.vis.outliers_gallery(num_images=5)
fd.vis.similarity_gallery()

fd.vis.stats_gallery(metric='dark', num_images=5)
fd.vis.stats_gallery(metric='bright', num_images=5)
fd.vis.stats_gallery(metric='blur', num_images=5)
21 changes: 21 additions & 0 deletions .github/workflows/tests/labeled_object_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import fastdup
print(f'fastdup version: {fastdup.__version__}')

import pandas as pd
coco_csv = 'coco_minitrain_25k/annotations/coco_minitrain2017.csv'
coco_annotations = pd.read_csv(coco_csv, header=None, names=['filename', 'col_x', 'row_y',
'width', 'height', 'label', 'ext'])

coco_annotations['split'] = 'train' # Only train files were loaded
coco_annotations['filename'] = coco_annotations['filename'].apply(lambda x: 'coco_minitrain_25k/images/train2017/'+x)
coco_annotations = coco_annotations.drop_duplicates()

input_dir = '.'
work_dir = 'fastdup_work_dir'

fd = fastdup.create(work_dir=work_dir, input_dir=input_dir)
fd.run(annotations=coco_annotations, overwrite=True, num_images=10000)

fd.vis.component_gallery(metric='size', max_width=900)
fd.vis.outliers_gallery()
fd.vis.component_gallery(num_images=25, slice='diff')
11 changes: 11 additions & 0 deletions .github/workflows/tests/quick_dataset_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import fastdup
print(f'fastdup version: {fastdup.__version__}')

fd = fastdup.create(work_dir="fastdup_work_dir/", input_dir="images/")
fd.run(num_images=10000)

fd.vis.duplicates_gallery()
fd.vis.outliers_gallery()
fd.vis.stats_gallery(metric='dark')
fd.vis.component_gallery()
fd.vis.similarity_gallery()