# 1. Import Dependencies

In [1]:
!pip install opencv-python



In [2]:
# Import opencv
import cv2 

# Import uuid
import uuid

# Import Operating System
import os

# Import time
import time

# 2. Define Images to Collect

In [4]:
labels = ['strands']
number_imgs = 12

# 3. Setup Folders 

In [3]:
IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'strands', 'train')

In [9]:
if not os.path.exists(IMAGES_PATH):
    if os.name == 'posix':
        !mkdir -p {IMAGES_PATH}
    if os.name == 'nt':
         !mkdir {IMAGES_PATH}
for label in labels:
    path = os.path.join(IMAGES_PATH, label)
    if not os.path.exists(path):
        !mkdir {path}

# 4. Rename Images

In [11]:
for label in labels:
    
    images = os.listdir(os.path.join(IMAGES_PATH, label))
    
    for img in images:
        img_name = os.path.join(IMAGES_PATH,label,label+'.'+'{}.jpg'.format(str(uuid.uuid1())))
        
        os.rename(os.path.join(IMAGES_PATH,label,img), img_name)

# 5. Image Labelling

In [12]:
!pip install --upgrade pyqt5 lxml

Collecting pyqt5
  Using cached PyQt5-5.15.10-cp37-abi3-win_amd64.whl.metadata (2.2 kB)
Collecting PyQt5-sip<13,>=12.13 (from pyqt5)
  Downloading PyQt5_sip-12.13.0-cp310-cp310-win_amd64.whl.metadata (524 bytes)
Collecting PyQt5-Qt5>=5.15.2 (from pyqt5)
  Using cached PyQt5_Qt5-5.15.2-py3-none-win_amd64.whl (50.1 MB)
Using cached PyQt5-5.15.10-cp37-abi3-win_amd64.whl (6.8 MB)
Downloading PyQt5_sip-12.13.0-cp310-cp310-win_amd64.whl (78 kB)
   ---------------------------------------- 0.0/78.5 kB ? eta -:--:--
   ----- ---------------------------------- 10.2/78.5 kB ? eta -:--:--
   ---------------------------------------- 78.5/78.5 kB 1.1 MB/s eta 0:00:00
Installing collected packages: PyQt5-Qt5, PyQt5-sip, pyqt5
Successfully installed PyQt5-Qt5-5.15.2 PyQt5-sip-12.13.0 pyqt5-5.15.10


In [7]:
LABELIMG_PATH = os.path.join('Tensorflow', 'labelimg')

In [14]:
if not os.path.exists(LABELIMG_PATH):
    !mkdir {LABELIMG_PATH}
    !git clone https://github.com/tzutalin/labelImg {LABELIMG_PATH}

In [15]:
if os.name == 'posix':
    !make qt5py3
if os.name =='nt':
    !cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

In [8]:
!cd {LABELIMG_PATH} && python labelImg.py

Image:C:\Users\guilh\OneDrive\Área de Trabalho\New IA images\9_2_2024_inteligencia_artificial_34.jpg -> Annotation:C:/Users/guilh/OneDrive/Área de Trabalho/New IA images\9_2_2024_inteligencia_artificial_34.xml
Image:C:\Users\guilh\OneDrive\Área de Trabalho\New IA images\9_2_2024_inteligencia_artificial_34.jpg -> Annotation:C:/Users/guilh/OneDrive/Área de Trabalho/New IA images\9_2_2024_inteligencia_artificial_34.xml
Image:C:\Users\guilh\OneDrive\Área de Trabalho\New IA images\9_2_2024_inteligencia_artificial_42.jpg -> Annotation:C:/Users/guilh/OneDrive/Área de Trabalho/New IA images\9_2_2024_inteligencia_artificial_42.xml
Image:C:\Users\guilh\OneDrive\Área de Trabalho\New IA images\9_2_2024_inteligencia_artificial_42.jpg -> Annotation:C:/Users/guilh/OneDrive/Área de Trabalho/New IA images\9_2_2024_inteligencia_artificial_42.xml


# 6. Move them into a Training and Testing Partition

# OPTIONAL - 7. Compress them for Colab Training

In [20]:
TRAIN_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'train')
TEST_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'test')
ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')

In [21]:
!tar -czf {ARCHIVE_PATH} {TRAIN_PATH} {TEST_PATH}

In [4]:
# OPTIONAL - 8. Remove images without annotations 

images = os.listdir(IMAGES_PATH)

print(len(images))

for image in images:
    print(image)
    if image[-3:] == 'png':
        img = cv2.imread(os.path.join(IMAGES_PATH,image))
        cv2.imwrite(os.path.join(IMAGES_PATH,image.replace('.png','.jpg')),img)
        os.remove(os.path.join(IMAGES_PATH,image))
        
    image = image.replace('.png','.jpg')
    
    if image.replace('.jpg','.xml') not in images:
        print(image)
        os.remove(os.path.join(IMAGES_PATH,image))

248
14_2_2024_inteligencia artificial_10.jpg
14_2_2024_inteligencia artificial_10.jpg
14_2_2024_inteligencia artificial_100.jpg
14_2_2024_inteligencia artificial_100.xml
14_2_2024_inteligencia artificial_11.jpg
14_2_2024_inteligencia artificial_11.jpg
14_2_2024_inteligencia artificial_13.jpg
14_2_2024_inteligencia artificial_13.jpg
14_2_2024_inteligencia artificial_15.jpg
14_2_2024_inteligencia artificial_15.xml
14_2_2024_inteligencia artificial_18.jpg
14_2_2024_inteligencia artificial_18.jpg
14_2_2024_inteligencia artificial_19.jpg
14_2_2024_inteligencia artificial_19.jpg
14_2_2024_inteligencia artificial_23.jpg
14_2_2024_inteligencia artificial_23.jpg
14_2_2024_inteligencia artificial_26.jpg
14_2_2024_inteligencia artificial_26.jpg
14_2_2024_inteligencia artificial_28.jpg
14_2_2024_inteligencia artificial_28.jpg
14_2_2024_inteligencia artificial_3.jpg
14_2_2024_inteligencia artificial_3.xml
14_2_2024_inteligencia artificial_30.jpg
14_2_2024_inteligencia artificial_30.jpg
14_2_2024_in

In [8]:
images = os.listdir(IMAGES_PATH)

for image in images:
    print(image)
    
    if image[-3:] == 'jpg':
        new_name = uuid.uuid1()
        
        os.rename(os.path.join(IMAGES_PATH,image), os.path.join(IMAGES_PATH,str(new_name)+'.jpg'))
        os.rename(os.path.join(IMAGES_PATH,image.replace('.jpg','.xml')), os.path.join(IMAGES_PATH,str(new_name)+'.xml'))

14_2_2024_inteligencia artificial_100.jpg
14_2_2024_inteligencia artificial_100.xml
14_2_2024_inteligencia artificial_15.jpg
14_2_2024_inteligencia artificial_15.xml
14_2_2024_inteligencia artificial_3.jpg
14_2_2024_inteligencia artificial_3.xml
14_2_2024_inteligencia artificial_34.jpg
14_2_2024_inteligencia artificial_34.xml
14_2_2024_inteligencia artificial_4.jpg
14_2_2024_inteligencia artificial_4.xml
14_2_2024_inteligencia artificial_41.jpg
14_2_2024_inteligencia artificial_41.xml
14_2_2024_inteligencia artificial_43.jpg
14_2_2024_inteligencia artificial_43.xml
14_2_2024_inteligencia artificial_45.jpg
14_2_2024_inteligencia artificial_45.xml
14_2_2024_inteligencia artificial_48.jpg
14_2_2024_inteligencia artificial_48.xml
14_2_2024_inteligencia artificial_6.jpg
14_2_2024_inteligencia artificial_6.xml
14_2_2024_inteligencia artificial_61.jpg
14_2_2024_inteligencia artificial_61.xml
14_2_2024_inteligencia artificial_62.jpg
14_2_2024_inteligencia artificial_62.xml
14_2_2024_inteligenc

In [7]:
import re

for xml in os.listdir(IMAGES_PATH):
    if xml[-3:] == 'xml':
        with open(os.path.join(IMAGES_PATH,xml)) as f:
            txt = f.read()
            filename = xml.replace('xml', 'jpg')
            
            pattern = re.compile(r'<filename>([^<]+)<\/filename>')
            match = pattern.search(txt)
            
            old_filename = match.group(1).strip()
            
            txt = txt.replace(old_filename, filename)
        with open(os.path.join(IMAGES_PATH,xml),'w') as f:
            f.write(txt)