## Data Preprocessing

To make a training dataset, we need such things:

1. Ground Truth relighted images
2. Radiance hints for (1) images
3. Provisional Images for (1) images
4. generated caption ofr (1) images, using BLIP2 model, for the brightened image.
4. jsonl file that contains the information of (1) and (2) images

The jsonl file should have the following format:

```json
{
	"image": "/absolute/path/to/your/file/view_0/white_pl_0/gt.png",
	"hint": [
		"/absolute/path/to/your/file/view_0/white_pl_0/gt_diffuse.png",
		"/absolute/path/to/your/file/view_0/white_pl_0/gt_ggx0.05.png"
		"/absolute/path/to/your/file/view_0/white_pl_0/gt_ggx0.13.png"
		"/absolute/path/to/your/file/view_0/white_pl_0/gt_ggx0.34.png"
	],
	"ref": [
		"/absolute/path/to/your/file/view_0/white_pl_1/gt.png",
		"/absolute/path/to/your/file/view_0/env_0/gt.png",
		"/absolute/path/to/your/file/view_0/env_1/gt.png",
		"..."
	],
	"text": "some text description generated by BLIP2"
}


```

In [None]:
import json
import os
from tqdm.notebook import tqdm

In [None]:
import json
import numpy as np
from PIL import Image
import random
import os
import glob
import argparse


# DO NOT modify the hyperparameters
RESIZE_H, RESIZE_W = 100, 100
H, W = 128, 128


# Use this function to preprocess data
def center_crop_img(tgt_img_path, mask_img_path):
	"""
	Preprocess the image and mask to center crop and resize to 128x128

	Args:
			tgt_img_path (str): Path to the target image
			mask_img_path (str): Path to the mask image

	Returns:
			(img_canvas, mask_canvas): Tuple of PIL images
	"""
	tgt_img = Image.open(tgt_img_path).convert("RGB")
	np_tgt_img = np.array(tgt_img)

	# mask is processed as [0, 255] value
	mask_img = Image.open(mask_img_path).convert("RGB")  # Foreground mask
	# For some of the masks are given as [0, 255]
	if np.array(mask_img).max() > 1:
		np_mask_img = np.array(mask_img)
	else:
		np_mask_img = np.array(mask_img) * 255
	assert (
		np_mask_img.max() <= 255 and np_mask_img.min() >= 0
	), f"{np_mask_img.min()}, {np_mask_img.max()}"
	np_tgt_img[np_mask_img == 0] = 255

	# Crop image using bbox
	y, x, r = np.where(np_mask_img == 255)  # Get bbox using the mask
	x1, x2, y1, y2 = x.min(), x.max(), y.min(), y.max()

	crop_img = Image.fromarray(np_tgt_img).crop((x1, y1, x2, y2))
	cropped_mask = Image.fromarray(np_mask_img).crop((x1, y1, x2, y2))
	w = x2 - x1
	assert w > 0, f"{x2} - {x1} = {w}"
	h = y2 - y1
	assert h > 0, f"{y2} - {y1} = {h}"

	# Resize image with respect to max length
	max_length = max(w, h)
	ratio = RESIZE_W / max_length
	resized_w, resized_h = round(w * ratio), round(h * ratio)  # Avoid float error
	assert resized_h == RESIZE_H or resized_w == RESIZE_W

	resized_img = crop_img.resize((resized_w, resized_h))
	resized_object_mask = cropped_mask.resize((resized_w, resized_h))
	img_canvas = Image.new("RGB", (H, W), (255, 255, 255))
	mask_canvas = Image.new("RGB", (H, W), (0, 0, 0))
	pos_w, pos_h = resized_w - W, resized_h - H

	pos_w = abs(pos_w) // 2
	pos_h = abs(pos_h) // 2
	assert pos_w + resized_w <= W and pos_h + resized_h <= H

	img_canvas.paste(resized_img, (pos_w, pos_h))
	mask_canvas.paste(resized_object_mask, (pos_w, pos_h))

	return img_canvas, mask_canvas

Convert object Mask to 3 channel mask.


Directory Structure:

```plaintext
data/
		images/
			obj_28_metal_bucket/
				obj_28_metal_bucket_002_CA2.png
				obj_28_metal_bucket_003_CA2.png  
				...
		masks/
			obj_28_metal_bucket/
				mask_obj_28_metal_bucket_CA2.png
			...
```

In [None]:
from multiprocessing import Pool
from functools import partial


# Make jsonl file, which contains the path to the images and masks
def make_jsonl(output_path, data_dir):
	"""
	Make a jsonl file that contains the path to the images and masks.
	The jsonl file should have the following format:

	{"image_path": "path/to/image", "mask_path": "path/to/mask"}
	{"image_path": "path/to/image", "mask_path": "path/to/mask"}

	Args:
			output_path (str): path to the output jsonl file.
			data_dir (str): path to the data directory.
	"""
	with open(output_path, "w") as f:
		images_dir = os.path.join(data_dir, "images")
		masks_dir = os.path.join(data_dir, "masks")

		for object_name in os.listdir(images_dir):
			for image_name in os.listdir(os.path.join(images_dir, object_name)):
				image_path = os.path.join(images_dir, object_name, image_name)
				mask_name = image_name[:-11] + image_name[-7:]
				mask_path = os.path.join(masks_dir, object_name, mask_name)
				f.write(
					json.dumps({"image_path": image_path, "mask_path": mask_path})
					+ "\n"
				)


def process_image(k, output_dir, data):
	"""
	crop the object image given the mask. and save the cropped image and mask to the output directory.

	Args:
			k (str): key of the data.
			output_dir (str): output directory.
			data (dict): data dictionary. data[k] = {tgt_img_path: str, mask_path: str},
			where tgt_img_path is the path to the target image and mask_path is the
			path to the mask image.
	"""
	tgt_img_path = data[k]["tgt_img_path"]
	mask_img_path = data[k]["mask_path"]
	img, mask = center_crop_img(tgt_img_path, mask_img_path)

	img_name = k + ".png"
	# key example: 'obj_28_metal_bucket_010_NA3'
	object_name = k[:-8]
	viewpoint_id = k[-3:]

	image_dir = os.path.join(output_dir, "images", object_name)
	if not os.path.exists(image_dir):
		os.makedirs(image_dir, exist_ok=True)
	output_img_path = os.path.join(image_dir, img_name)

	mask_dir = os.path.join(output_dir, "masks", object_name)
	if not os.path.exists(mask_dir):
		os.makedirs(mask_dir, exist_ok=True)

	mask_file_name = object_name + "_" + viewpoint_id + ".png"
	output_mask_path = os.path.join(mask_dir, mask_file_name)

	img.save(output_img_path)
	mask.save(output_mask_path)
	print(f"Saved {output_img_path}")
	print(f"Saved {output_mask_path}")


# Create a pool of workers
def process_images(json_path, output_dir):
	"""
	Process all images in the json file and save the cropped images and masks to the output directory.
	And make a jsonl file for metadata, named 'img_mask_map.jsonl' in the output directory.
	the input json file should have the following format:
	{
			"obj_1": {"tgt_img_path": "path/to/target/image", "mask_path": "path/to/mask/image"},
			"obj_2": {"tgt_img_path": "path/to/target/image", "mask_path": "path/to/mask/image"},
			...
	}
	Args:
			json_path (str): path to the json file.
			output_dir (str): output directory.
	"""
	with open(json_path) as f:
		data = json.load(f)
	os.makedirs(output_dir, exist_ok=True)

	with Pool() as pool:
		for _ in tqdm(
			pool.imap_unordered(
				partial(process_image, data=data, output_dir=output_dir), data.keys()
			),
			total=len(data),
		):
			pass

	# make jsonl file for metadata
	make_jsonl(os.path.join(output_dir, "img_mask_map.jsonl"), output_dir)

### Crop Images and Masks

In [None]:
# json_file = "data/metadata.json"
# with open(json_file) as f:
# 	data = json.load(f)
# 	print(len(data))

# ouput_dir = "data/center_cropped_test"
# os.makedirs(ouput_dir, exist_ok=True)

# process_images(json_file, "center_cropped_2_test")

In [None]:
# img_mask_json_path = "data/center_cropped/img_mask_map.jsonl"
# with open(img_mask_json_path) as f:
# 	data = f.readlines()

In [None]:
# radiance hints generation

import os
from dataclasses import dataclass
from typing import Optional
import json
import imageio
import numpy as np
import cv2
import simple_parsing


@dataclass
class Args:
	img: str  # Path to the image, to generate hints for.
	seed: int = 3407  # Seed for the generation
	fov: Optional[float] = (
		None  # Field of view for the mesh reconstruction, none for auto estimation from the image
	)

	mask_path: Optional[str] = None  # Path to the mask for the image
	use_sam: bool = True  # Use SAM for background removal
	mask_threshold: float = 25.0  # Mask threshold for foreground object extraction

	power: float = 1200.0  # Power of the point light
	use_gpu_for_rendering: bool = True  # Use GPU for radiance hints rendering

	pl_x: float = 1.0  # X position of the point light
	pl_y: float = 1.0  # Y position of the point light
	pl_z: float = 1.0  # Z position of the point light

	env_map_path: Optional[str] = None  # Path to the environment map

# elem function
def generate_hint(
	img,
	seed=3407,
	fov=None,
	mask_path=None,
	use_sam=True,
	mask_threshold=25.0,
	power=1200.0,
	use_gpu_for_rendering=True,
	pl_x=1.0,
	pl_y=1.0,
	pl_z=1.0,
	output_dir="radiance_hints",
):
	args = Args(
		img=img,
		seed=seed,
		fov=fov,
		mask_path=mask_path,
		use_sam=use_sam,
		mask_threshold=mask_threshold,
		power=power,
		use_gpu_for_rendering=use_gpu_for_rendering,
		pl_x=pl_x,
		pl_y=pl_y,
		pl_z=pl_z,
	)

	from DiLightNet.demo.mesh_recon import mesh_reconstruction  # depth to mesh
	from DiLightNet.demo.render_hints import render_hint_images  # mesh, env_map -> radiance hints
	from DiLightNet.demo.rm_bg import rm_bg

	# Load input image and generate/load mask
	input_image = imageio.v3.imread(args.img)
	input_image = cv2.resize(input_image, (512, 512))

	if args.mask_path:
		# 이건 explicit하게 주면 될듯 하다.
		mask = imageio.v3.imread(args.mask_path)
		if mask.ndim == 3:
			mask = mask[..., -1]
		mask = cv2.resize(mask, (512, 512))
	else:
		_, mask = rm_bg(input_image, use_sam=args.use_sam)
	mask = mask[..., None].repeat(3, axis=-1)

	# Render radiance hints
	pls = [(args.pl_x, args.pl_y, args.pl_z)]

	# cache middle results
	# TODO: lighting condition이 env map에의해서 explicit하게 주어져야 할텐데 약간 걱정되네
	img_id = os.path.basename(args.img).split(".")[0]
	lighting_id = f"pl-{args.pl_x}-{args.pl_y}-{args.pl_z}-{args.power}"
	output_folder = os.path.join(output_dir, img_id, lighting_id)
	os.makedirs(output_folder, exist_ok=True)
	# check if the radiance hints are already rendered and full

	print(f"Rendering radiance hints")
	# Mesh reconstruction and fov estimation for hints rendering
	fov = args.fov
	# TODO: explicit하게 mesh를 주면 좋을 것이다. 결과적으로 우리가 할 것은 PSNR을 높히는 것이고, 사용하면 안되는 것은 오직 eval image pairs이다.
	mesh, fov = mesh_reconstruction(input_image, mask, False, fov, args.mask_threshold)
	print(f"Mesh reconstructed with fov: {fov}")
	render_hint_images(
		mesh,
		fov,
		pls,
		args.power,
		output_folder=output_folder,
		use_gpu=args.use_gpu_for_rendering,
	)
	print(f"Radiance hints rendered to {output_folder}")

# wrapper
def generate_hints(json_path: str, output_dir: str, gpus=["0"]):
	"""
	1. load json file
	2. split the (image, mask) pairs into chunks to distribute to GPUs
	3. save the chunk to a json file.
	4. for each gpu, launch a process to generate hints for the chunk

	How the input json file looks like:
	```
	{"image_path": "path/to/image", "mask_path": "path/to/mask"}
	{"image_path": "path/to/image", "mask_path": "path/to/mask"}
	...
	```

	How the temporary jsonl file looks like:
	[
	{"image_path": "path/to/image", "mask_path": "path/to/mask", viewpoint_id: "NA6", lighting_condition_id: '001'}
	{"image_path": "path/to/image", "mask_path": "path/to/mask", viewpoint_id: "NA6", lighting_condition_id: '001'}
	...
	]

	As a result of running this function, the hints will be saved to the output directory.
	The output directory will have the following structure:
	```
	output_dir
	├── chunk_0.jsonl
	├── chunk_1.jsonl
	├── chunk_2.jsonl
	...
	├── chunk_N.jsonl

	├── img_id/
	|   ├── radiance_hint_0.png
	|   ├── radiance_hint_1.png
	|   ├── radiance_hint_2.png
	|   ├── radiance_hint_3.png
	├── img_id/
	|   ├── radiance_hint_0.png
	|   ├── radiance_hint_1.png
	|   ├── radiance_hint_2.png
	|   ├── radiance_hint_3.png
	...

	```

	And this function also generates a jsonl file that contains the path to the images and the hints.
	The jsonl file will have the following format:
	```
	{"image_id": "img_id", "object_id": "object_id", "image_path": "path/to/image",
	  "mask_path": "path/to/mask", "radiance_hints_dir": "path/to/radiance_hints"},
	{"image_id": "img_id", "object_id": "object_id", "image_path": "path/to/image",
	  "mask_path": "path/to/mask", "radiance_hints_dir": "path/to/radiance_hints"},
	...
	```

	Args:
									json_path: path to the json file containing the (image, mask) pairs
									output_dir: path to the output directory
									gpus: list of gpu ids to use for generating hints. e.g. ['0', '1', '2', '3']
	"""
	with open(json_path) as f:
		data = f.readlines()

	if not os.path.exists(output_dir):
		os.makedirs(output_dir, exist_ok=True)

	# split the data into chunks
	chunk_size = len(data) // len(gpus)
	chunks = [data[i : i + chunk_size] for i in range(0, len(data), chunk_size)]

	assert len(chunks) == len(gpus)

	# save the chunks to jsonl files
	image_table = []
	for i, chunk in enumerate(chunks):
		chunk_path = os.path.join(output_dir, f"chunk_{i}.jsonl")
		with open(chunk_path, "w") as f:
			chunk_json_dicts = []
			for line in chunk:
				image_path = json.loads(line)["image_path"]
				mask_path = json.loads(line)["mask_path"]
				viewpoint_id = image_path.split("/")[-1].split("_")[-1].split(".")[0]
				lighting_condition_id = image_path.split("/")[-1].split("_")[-2]
				image_id = image_path.split("/")[-1].split(".")[0]
				object_id = image_path.split("/")[-2]
				image_dict = {
					"image_id": image_id,
					"object_id": object_id,
					"image_path": image_path,
					"mask_path": mask_path,
					"viewpoint_id": viewpoint_id,
					"lighting_condition_id": lighting_condition_id,
					"radiance_hints_dir": os.path.join(output_dir, image_id),
				}
				chunk_json_dicts.append(image_dict)
				image_table.append(image_dict)
			json.dump(chunk_json_dicts, f)

	import subprocess

	processes = []
	# generate hints for each chunk, parallelly
	for i, chunk_path in enumerate(chunks):
		cmd = [
			"python",
			"generate_hint.py",
			"--json_path",
			chunk_path,
			"--output_dir",
			output_dir,
		]
		env = os.environ.copy()
		env["CUDA_VISIBLE_DEVICES"] = str(gpus[i])  # 각 GPU를 설정
		process = subprocess.Popen(cmd, env=env)
		processes.append(process)

	# wait for all processes to finish
	for process in processes:
		process.wait()

	# make jsonl file for metadata, using the image_table
	with open(os.path.join(output_dir, "train_data_metadata.jsonl"), "w") as f:
		for image_dict in image_table:
			json.dump(image_dict, f)
			f.write("\n")
	# TODO: Need test for this function.

In [None]:
# generate_hints(
# 	"center_cropped_2.jsonl",
# 	"radiance_hints_test",
# 	["0", "1", "2", "3", "4", "5", "6", "7"],
# )

In [None]:
# with open(img_mask_json_path) as f:
# 	# load json file as a list of dict
# 	data = [json.loads(line) for line in f]
# 	for d in tqdm(data):
# 		generate_hint(
# 			d["image_path"], mask_path=d["mask_path"], output_dir="radiance_hints"
# 		)

### Make train.jsonl

```json

{
	"image": "/absolute/path/to/your/file/view_0/white_pl_0/gt.png",
	"hint": [
		"/absolute/path/to/your/file/view_0/white_pl_0/gt_diffuse.png",
		"/absolute/path/to/your/file/view_0/white_pl_0/gt_ggx0.05.png"
		"/absolute/path/to/your/file/view_0/white_pl_0/gt_ggx0.13.png"
		"/absolute/path/to/your/file/view_0/white_pl_0/gt_ggx0.34.png"
	],
	"ref": [
		"/absolute/path/to/your/file/view_0/white_pl_1/gt.png",
		"/absolute/path/to/your/file/view_0/env_0/gt.png",
		"/absolute/path/to/your/file/view_0/env_1/gt.png",
		"..."
	],
	"text": "some text description generated by BLIP2"
}

```

image: 모든 이미지들
각 이미지 별로 ref는 다음과 같다.
1. 같은 object일 것
2. 같은 view일 것
3. lighting condition이 다를 것
이러면 총 12개의 ref가 나온다.

이미지들은 object 별로 정리가 되어 있다. 이미지에 대한 경로 정보는
image_mask_map.jsonl 메타데이터 파일에 정리되어 있다.
다만 이 파일은 이미지와 마스크에 대한 맵핑만 들고 있지, 오브젝트와 view point, lighting 종류 별로 정리되어 있지 않다.

그리고 각 이미지 별로 radiance hint가 4개씩 있는데, 이것에 대한 맵핑도 필요하다.
결론적으로 다음 테이블이 있으면 된다.

1. image table: image_id, object_id, view_id, light_id, image_path, mask_path, radiance_hint_dir_path

위 테이블이 generate_hints() 함수를 실행하면 hint가 들어갈 output_dir에 metadata.jsonl 파일로 생성될 것이다.
jsonl파일은 generate_hints() 함수의 documenation을 참고하자.


In [None]:
# caption generator

from transformers import pipeline
from PIL import Image


class CaptionGenerator:
	def __init__(self):
		# Use a pipeline as a high-level helper

		self.pipe = pipeline("image-to-text", model="Salesforce/blip2-opt-2.7b", device=0)

	def __call__(self, img_path):
		if type(img_path) == str:
			image = Image.open(img_path).convert("RGB")
			caption = self.pipe(image)
			return caption[0]["generated_text"]
		elif type(img_path) == list:
			return self.batch_process(img_path)

	def batch_process(self, img_paths):
		images = [Image.open(img_path).convert("RGB") for img_path in img_paths]
		captions = self.pipe(images)
		return [caption["generated_text"] for caption in captions]

In [None]:
# Edit Here
split = 'train'


dataset_dir = f'dataset/{split}'
image_dir = os.path.join(dataset_dir, 'images')
objects = os.listdir(image_dir)
jsons = []
for object in tqdm(objects):
	object_dir = os.path.join(image_dir, object)
	images = os.listdir(object_dir)
	for image in images:
		viewpoint_id = image.split('_')[-1].split('.')[0]
		lighting_condition_id = image.split('_')[-2]
		image_path = os.path.join(object_dir, image)
		mask_path = os.path.join(dataset_dir, 'masks', object, f'{object}_{viewpoint_id}.png')
		radiance_hints_dir = os.path.join(dataset_dir, 'hints', object, viewpoint_id, lighting_condition_id)
		
		jsons.append({
			"image_path": image_path,
			"mask_path": mask_path,
			"radiance_hints_dir": radiance_hints_dir,
			"object_id": object,
			"viewpoint_id": viewpoint_id,
			"lighting_condition_id": lighting_condition_id,
		})

with open(f'dataset/{split}_data_metadata.jsonl', 'w') as f:
	for json_dict in jsons:
		json.dump(json_dict, f)
		f.write('\n')


In [None]:
# DiLightNet train json generator
import os
import pandas as pd


def make_train_jsonl(metadata_jsonl_path: str, output_path: str = "train_data.jsonl"):
	"""
	Make a jsonl file that contains the path to the images and masks and radiance hints.
	The jsonl file should have the following format:

	{"image_path": "path/to/image", "mask_path": "path/to/mask", "radiance_hints_dir": "path/to/radiance_hints"}
	{"image_path": "path/to/image", "mask_path": "path/to/mask", "radiance_hints_dir": "path/to/radiance_hints"}
	with the follwing keys:
	- image_path: path to the image
	- mask_path: path to the mask
	- radiance_hints_dir: path to the radiance hints directory
	- object_id: object id
	- viewpoint_id: viewpoint id
	- lighting_condition_id: lighting condition id
	Args:
			metadata_jsonl_path (str): path to the metadata jsonl file.
			output_path (str): path to the output jsonl file.
	"""
	table = pd.read_json(metadata_jsonl_path, lines=True)
	caption_generator = CaptionGenerator()
	train_dicts = []
	num_rows = len(table)



	for i, row in tqdm(table.iterrows(), total=num_rows):
		hint_dir = row["radiance_hints_dir"]
		
		try:
			hint_images = os.listdir(hint_dir)
		except Exception as e:
			print(f"Error: {e}")
			continue
		
		try:
			# check if the image and masks exists
			assert os.path.exists(row["image_path"])
			assert os.path.exists(row["mask_path"])
		except Exception as e:
			print(f"Error: {e}")
			continue
		hint_images = [os.path.join(hint_dir, img) for img in hint_images]
		object_id = row["object_id"]
		viewpoint_id = row["viewpoint_id"]
		lighting_condition_id = row["lighting_condition_id"]
		ref_images = []
		# 1. find out rows with the same object id and the same viewpoint id
		same_object_rows = table[
			(table["object_id"] == object_id) & (table["viewpoint_id"] == viewpoint_id)
		]
		# 2. find out the row with different lighting condition id
		for j, same_object_row in same_object_rows.iterrows():
			if same_object_row["lighting_condition_id"] != lighting_condition_id:
				ref_images.append(same_object_row["image_path"])

		train_dict = {}
		train_dict["image"] = row["image_path"]
		train_dict["hint"] = hint_images
		train_dict["ref"] = ref_images
		train_dict["text"] = caption_generator(row["image_path"])
		train_dicts.append(train_dict)
	del caption_generator
	with open(output_path, "w") as f:
		for train_dict in train_dicts:
			json.dump(train_dict, f)
			f.write("\n")

In [None]:
make_train_jsonl('dataset/eval_data_metadata.jsonl', 'dataset/eval_data.jsonl')

In [None]:
import json

train_json_path = 'dataset/eval_data.jsonl'

# load the jsonl as list of dict
with open(train_json_path) as f:
	train_data = [json.loads(line) for line in f]

# add mask path to the dict, to the 'mask' key
for data in train_data:
	image_path = data['image']
	object_id = image_path.split('/')[-2]
	viewpoint_id = image_path.split('/')[-1].split('_')[-1].split('.')[0]
	mask_path = f'dataset/masks/{object_id}/{object_id}_{viewpoint_id}.png'
	data['mask'] = mask_path

# save the updated jsonl
with open('dataset/eval_data_with_mask.jsonl', 'w') as f:
	for data in train_data:
		json.dump(data, f)
		f.write('\n')


## 데이터셋 폴더 구조 정리

```json
{
	"image": "dataset/eval/images/obj_28_metal_bucket/obj_28_metal_bucket_006_NC3.png",
 	"hint": [
			"dataset/eval/hints/obj_28_metal_bucket/NC3/006/hint00_ggx0.13.png",
			"dataset/eval/hints/obj_28_metal_bucket/NC3/006/hint00_ggx0.05.png",
			"dataset/eval/hints/obj_28_metal_bucket/NC3/006/hint00_ggx0.34.png",
			"dataset/eval/hints/obj_28_metal_bucket/NC3/006/hint00_diffuse.png"],
	"ref": ["dataset/eval/images/obj_28_metal_bucket/obj_28_metal_bucket_011_NC3.png"], 
	"text": "a black bucket with a handle on a white background\n",
	"mask": "dataset/masks/obj_28_metal_bucket/obj_28_metal_bucket_NC3.png"}
```

In [None]:
import os, json
split = 'eval'
dataset_json = f'dataset/{split}_data_with_mask.jsonl'
output_dir = f'dataset/{split}_data'
resulting_json_dicts = []
# read json and hold the data as a list of dict
with open(dataset_json) as f:
	data = [json.loads(line) for line in f]

for item in data:
	object_id = item['image'].split('/')[-2]
	viewpoint_id = item['image'].split('/')[-1].split('_')[-1].split('.')[0]
	lighting_condition_id = item['image'].split('/')[-1].split('_')[-2]
	gt_image_path = item['image']
	hints_dir = '/'.join(item['hint'][0].split('/')[:-1])
	gt_diffuse_hint_path = os.path.join(hints_dir, 'hint00_diffuse.png')
	gt_ggx05_hint_path = os.path.join(hints_dir, 'hint00_ggx0.05.png')
	gt_ggx13_hint_path = os.path.join(hints_dir, 'hint00_ggx0.13.png')
	gt_ggx34_hint_path = os.path.join(hints_dir, 'hint00_ggx0.34.png')
	hints = [gt_diffuse_hint_path, gt_ggx05_hint_path, gt_ggx13_hint_path, gt_ggx34_hint_path]
	# create the output directory
	image_output_dir = os.path.join(output_dir, object_id, viewpoint_id, lighting_condition_id)
	os.makedirs(image_output_dir, exist_ok=True)

	# copy the images to the output directory
	# output_hint_name =  
	# gt_diffuse.png
# │   │   ├── gt_ggx0.34.png
# │   │   ├── gt_ggx0.13.png
# │   │   ├── gt_ggx0.05.png
# │   │   └── gt.png

	dict_json = {}
	dict_json['hint'] = []
	dict_json['ref'] = []
	for ref in item['ref']:
		# copied ref image path
		ref_object_id = ref.split('/')[-2]
		ref_viewpoint_id = ref.split('/')[-1].split('_')[-1].split('.')[0]
		ref_lighting_condition_id = ref.split('/')[-1].split('_')[-2]
		ref_image_path = os.path.join(output_dir, ref_object_id, ref_viewpoint_id, ref_lighting_condition_id, 'gt.png')
		dict_json['ref'].append(ref_image_path)
	import shutil
	try:
		shutil.copy(gt_image_path, os.path.join(image_output_dir, 'gt.png'))
		dict_json['image'] = os.path.join(image_output_dir, 'gt.png')
	except Exception as e:
		print(f"Error: {e}")
	
	try:
		shutil.copy(gt_diffuse_hint_path, os.path.join(image_output_dir, 'gt_diffuse.png'))
		dict_json['hint'].append(os.path.join(image_output_dir, 'gt_diffuse.png'))
	except Exception as e:
		print(f"Error: {e}")
	
	try:
		shutil.copy(gt_ggx05_hint_path, os.path.join(image_output_dir, 'gt_ggx0.05.png'))
		dict_json['hint'].append(os.path.join(image_output_dir, 'gt_ggx0.05.png'))
	except Exception as e:
		print(f"Error: {e}")
	
	try:
		shutil.copy(gt_ggx13_hint_path, os.path.join(image_output_dir, 'gt_ggx0.13.png'))
		dict_json['hint'].append(os.path.join(image_output_dir, 'gt_ggx0.13.png'))
	except Exception as e:
		print(f"Error: {e}")
	
	try:
		shutil.copy(gt_ggx34_hint_path, os.path.join(image_output_dir, 'gt_ggx0.34.png'))
		dict_json['hint'].append(os.path.join(image_output_dir, 'gt_ggx0.34.png'))
	except Exception as e:
		print(f"Error: {e}")

	dict_json['text'] = item['text']
	mask_name = f'{object_id}_{viewpoint_id}.png'
	mask_path = os.path.join(output_dir, 'masks', object_id, mask_name)
	dict_json['mask'] = mask_path
	resulting_json_dicts.append(dict_json)


# dump json
with open(f'dataset/{split}_data_final.jsonl', 'w') as f:
	for json_dict in resulting_json_dicts:
		json.dump(json_dict, f)
		f.write('\n')