# Learn how to scan, access or loop the files and directories in Python3

In [1]:
# import sth
import os
import os.path as osp
import time
import glob
# from glob import glob
from os.path import dirname, join, basename, isfile, isdir
import cv2
from natsort import natsorted

## Basic file path elements

In [21]:
file_path = "results/result_video.mp4"
vid_name = os.path.basename(file_path)
print(vid_name)

img_path = "/data/image/212.jpg"

## Get the filename
img_name = osp.basename(img_path)
print(img_name)

## Get the directory name
print("Get the directory name")
img_dir_path = osp.dirname(img_path)
print(img_dir_path)

img_dir_path = osp.dirname(img_dir_path)
print(img_dir_path)

## Get the path sperator
sep = os.path.sep
print(sep)

result_video.mp4
212.jpg
Get the directory name
/data/image
/data
/


## Split a path into different elements

In [8]:
img_path = "/data/image/212.jpg"

## We can use os.path.splitext to split the file name path and file extension directly
path_list = osp.splitext(img_path)
print("Split the file extension:", path_list)

## Split the file name and parent directory path
path_list = osp.split(img_path)
print(path_list)

input_fname = "/homg/zhanghm/0001.checkpoint.jpeg"

os_splited_path = osp.splitext(input_fname)
print("Absolute path splited into", os_splited_path)

os_splited_path = osp.split(input_fname) ## Split the file name and parent directory path
print(os_splited_path)

img_path = "212.jpg"
splited_path = osp.splitext(img_path)
print(splited_path)

img_path = "212.jpg"
splited_path = osp.split(img_path)
print(splited_path)

Split the file extension: ('/data/image/212', '.jpg')
('/data/image', '212.jpg')
Absolute path splited into ('/homg/zhanghm/0001.checkpoint', '.jpeg')
('/homg/zhanghm', '0001.checkpoint.jpeg')
('212', '.jpg')
('', '212.jpg')


## Check path exists

In [19]:
file_path = "./data/"

## Check whether is a existing file path
if osp.isfile(file_path):
    print("Is a file")
else:
    print("Is not a file")

## Check whether is a existing file or folder path
file_or_folder_path = "./data/example.txt"
file_or_folder_path = "./data/"
if osp.exists(file_or_folder_path):
    print("Is a file or folder")
else:
    print("Is not a file or folder")

Is not a file
Is a file or folder


## Join paths

In [20]:
root_dir = "/data/kitti//"
file_name = "123.txt"

## Would have double slash
file_path = osp.join(root_dir, file_name)
print(file_path)

## Using osp.normpath to eliminate the double slash
file_path = osp.join(osp.normpath(root_dir), file_name)
print(file_path)


data_root = "/data/waymo/lidar/"
save_root = "/data/waymo_preprocessed"

file_path = "/data/waymo/lidar/train/100000/123.pcd"

## Replace the root_dir by using another root path
result_path = osp.join(save_root, osp.normpath(file_path.replace(data_root, "") + "/"))
print(result_path)

## Generate relative path
file_relative_path = file_path.replace(data_root, "").strip("/")
print(file_relative_path)

/data/kitti//123.txt
/data/kitti/123.txt
/data/waymo_preprocessed/train/100000/123.pcd
train/100000/123.pcd


## Scan directory recursively

In [7]:
import time
from glob import glob
import os.path as osp

def scan_dir(root_dir, file_suffix=".mp4"):
    """
    Ref: https://www.geeksforgeeks.org/python-os-scandir-method/
    """
    vid_list = []
    for entry in os.scandir(root_dir):
        print(entry.path)
        if entry.is_dir():
            file_names = [file.path for file in os.scandir(entry) if file.name.endswith(file_suffix)]
            # file_names = glob(osp.join(entry.path, '*.jpg'))
            vid_list.append(len(file_names))
    
    return vid_list
    

root_dir = "/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/obama_weekly_static_25fps_origin_image"

start = time.time()
vid = scan_dir(root_dir, file_suffix=".jpg")
print("time is ", time.time() - start)

print(sum(vid))


/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/obama_weekly_static_25fps_origin_image/obama_weekly_019_clip_001
/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/obama_weekly_static_25fps_origin_image/obama_weekly_023_clip_003
/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/obama_weekly_static_25fps_origin_image/obama_weekly_021_clip_001
/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/obama_weekly_static_25fps_origin_image/obama_weekly_023_clip_002
/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/obama_weekly_static_25fps_origin_image/obama_weekly_010_clip_001
/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/obama_weekly_static_25fps_origin_image/obama_weekly_019_clip_003
/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/obama_weekly_static_25fps_origin_image/obama_weekly_014_clip_001
/home/haimingzhang/Research/Programming/cv-fight

### glob用法
[glob用法](https://blog.csdn.net/u013630349/article/details/47683293)
https://www.codenong.com/2632205/
https://www.codegrepper.com/code-examples/python/how+to+find+out+number+of+images+in+folder+python

## Scan folders recursively and save the folders path

In [18]:
def scan_files(root_dir, file_name, file_suffix=".mp4"):
    files_list = glob.glob(f'{root_dir}/*/*.mp4')
    lines_list = ['/'.join(line[:-4].split('/')[-3:]) for line in files_list]

    lines_str = '\n'.join(lines_list)
    save_file = open(f"{file_name}.txt", "w")
    save_file.write(lines_str)


root_dir = "/data/data0/zhanghm/AudioVision/Wav2LipTEDHQ/test"
scan_files(root_dir, "test")

In [4]:
image_path = "/home/example.png"
image_path = "/home/example.jpg"


if image_path.endswith((".png", ".jpg")):
    print("Is image")
else:
    print("Is not image")

Is image


## Move files

In [16]:
from glob import glob
import shutil
import os.path as osp

data_dir = "/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/debug"
all_files = os.listdir(data_dir)
print(all_files)

dst_dir = "/home/haimingzhang/Research/Programming/cv-fighter/facial_preprocessed/debug/face_image"
os.makedirs(dst_dir) ## this folder Must exist first

for file in all_files:
    shutil.move(osp.join(data_dir, file), osp.join(dst_dir, file))


['1.txt', '2.txt']


## Copy files

In [2]:
import shutil

src_file = "bad_data3.txt"
dst_file = "./debug/bad_data3_new.txt"

shutil.copy(src_file, dst_file)

'./debug/bad_data3_new.txt'

## Delete folders

In [None]:
import shutil

output_root = "./debug/test"
shutil.rmtree(output_root)