## Introduction
- This notebook contains the experiment that evaluates the effectiveness of the PQE experiment on the RAISE-2k dataset.
- Each image is JPEG compressed twice with random quality factors in range $[50,95]$ with intervals of $5$. We then estimate the first quantization matrix of the doubly compressed image using our PQE implementation.
- Make sure to download the RAISE dataset before running this notebook. The dataset used in this notebook is the RAISE-2k subset.

## Imports and settings

In [1]:
import io
import os
import sys
import time
import glob
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import to_tensor, to_pil_image
import torchjpeg.codec
from fast_histogram import histogram1d

In [144]:
sys.path.append('/home/y/yuanbo/fyp/estimate_jpeg')
sys.path.append('/home/y/yuanbo/fyp/estimate_jpeg/utils')

from utils import *
from q_table_estimation_torchjpeg import *

## Create randomized 1st and 2nd compression dataset

Randomized the quality factors chosen within the range of [50-100] with intervals of 5.

In [15]:
np.arange(50,105,5)

array([ 50,  55,  60,  65,  70,  75,  80,  85,  90,  95, 100])

In [49]:
qf_pairs = np.random.choice(np.arange(50, 105, 5), size=(1999,2))

In [51]:
print("QF1 > QF2:", (qf_pairs[:,0] > qf_pairs[:,1]).sum())
print("QF1 < QF2:", (qf_pairs[:,0] < qf_pairs[:,1]).sum())
print("QF1 = QF2:", (qf_pairs[:,0] == qf_pairs[:,1]).sum())

QF1 > QF2: 905
QF1 < QF2: 916
QF1 = QF2: 178


Further split quality factors into 3 buckets: low, med, high
- Low: 50,  55,  60,  65
- Med: 70,  75,  80,  85
- High: 90,  95, 100

In [130]:
# Low-High
lh = ((qf_pairs[:,0] <= 65) & (qf_pairs[:,1] >= 90)).sum()

# High-Low
hl = ((qf_pairs[:,0] >= 90) & (qf_pairs[:,1] <= 65)).sum()

# Low-Med
lm = ((qf_pairs[:,0] <= 65) & ((qf_pairs[:,1] > 65) & (qf_pairs[:,1] < 90))).sum()

# Med-Low
ml = (((qf_pairs[:,0] > 65) & (qf_pairs[:,0] < 90)) & (qf_pairs[:,1] <= 65)).sum()

# Med-High
mh = (((qf_pairs[:,0] > 65) & (qf_pairs[:,0] < 90)) & (qf_pairs[:,1] >= 90)).sum()

# High-Med
hm = ((qf_pairs[:,0] >= 90) & ((qf_pairs[:,1] > 65) & (qf_pairs[:,1] < 90))).sum()

# Low-Low
ll = ((qf_pairs[:,0] <= 65) & (qf_pairs[:,1] <= 65)).sum()

# Med-Med
mm = (((qf_pairs[:,0] > 65) & (qf_pairs[:,0] < 90)) & ((qf_pairs[:,1] > 65) & (qf_pairs[:,1] < 90))).sum()

# High-High
hh = ((qf_pairs[:,0] >= 90) & (qf_pairs[:,1] >= 90)).sum()

In [134]:
print("low-high: ", lh)
print("high-low: ", hl)
print("low-med: ", lm)
print("med-low: ", ml)
print("med-high: ", mh)
print("high-med: ", hm)
print("low-low: ", ll)
print("med-med: ", mm)
print("high-high: ", hh)

low-high:  220
high-low:  198
low-med:  274
med-low:  266
med-high:  182
high-med:  212
low-low:  249
med-med:  261
high-high:  137


In [136]:
with open('random_qf_pairs.csv', 'w') as f:
    f.write('\n'.join([f"{qf[0]},{qf[1]}" for qf in qf_pairs]))

### Make dataset

In [138]:
test_path = glob.glob("../raise_1k/*") + glob.glob("../raise_2k/*")
print(len(test_path))

1999


In [None]:
df = pd.read_csv("random_qf_pairs.csv", header=None)
qf_pairs = list(df.itertuples(index=False, name=None))

In [139]:
data_folder = f'data_random_qf'
os.makedirs(data_folder)

qf_img_mapping = {
    "img": [],
    "qf_1": [],
    "qf_2": []
}

print(f"JPEG Compression randomized quality factors. Create dataset at {data_folder}...")
start = time.time()
for i, p in enumerate(test_path):
    qf_1, qf_2 = qf_pairs[i]
    q_table_1 = scale_q_table(STANDARD_LUMI_QT, qf_1)
    q_table_2 = scale_q_table(STANDARD_LUMI_QT, qf_2)
    
    name, ext = os.path.splitext(os.path.basename(p))

    im = Image.open(p).convert('L')

    # first compression
    first_buffer = jpeg_compress_to_buffer(im, q_table_1.reshape(-1))
    first_im = Image.open(first_buffer)

    # second compression
    second_buffer = jpeg_compress_to_buffer(first_im, q_table_2.reshape(-1))

    with open(f"{data_folder}/{name}.jpg", "wb") as f:
        f.write(second_buffer.getbuffer())
    
    qf_img_mapping["img"].append(f"{name}.jpg")
    qf_img_mapping["qf_1"].append(qf_1)
    qf_img_mapping["qf_2"].append(qf_2)

duration = time.time() - start
print(f"Time taken to create dataset of {len(test_path)} images: {round(duration, 2)} seconds.")

JPEG Compression randomized quality factors. Create dataset at data_random_qf...
Time taken to create dataset of 1999 images: 1778.81 seconds.


In [141]:
df = pd.DataFrame(qf_img_mapping)
df.head()

Unnamed: 0,img,qf_1,qf_2
0,r146771d7t.jpg,60,55
1,r1e3303ebt.jpg,90,50
2,r1a2095b2t.jpg,50,60
3,r15dfc1b8t.jpg,100,55
4,r02897203t.jpg,50,100


In [142]:
df.to_csv("img_qf_mapping.csv", index=False)

### Estimate previous quantization matrix

In [146]:
n = 100
bin_range = 100

results = {
    "img": [],
    "qf_1": [],
    "qf_2": [],
    "est_q_table": []
}
start = time.time()
time_taken = 0

running_first_15_acc = 0

for i in range(len(test_path)):
    p = os.path.join(data_folder, qf_img_mapping["img"][i])
    res = estimate_q_table(p, n, bin_range)
    time_taken = (time.time() - start)

    # calculate simple metrics
    gt_q_table = scale_q_table(STANDARD_LUMI_QT, qf_img_mapping["qf_1"][i])
    diff = res - gt_q_table
    first_15_dct = diff[ZIGZAG_ROW_IDX[:15], ZIGZAG_COL_IDX[:15]]
    first_15_acc = ((first_15_dct == 0).sum() / len(first_15_dct)) * 100
    running_first_15_acc += first_15_acc

    # print intermediate results
    first_15_results_str = f"F15 DCT: {round(first_15_acc, 2)}% (Acc)"
    time_str = f"{round(time_taken, 2)}/{round(time_taken/(i+1)*len(test_path), 2)}s"
    print(f"\r[{i+1}/{len(test_path)}] - [{time_str}] - [{first_15_results_str}]", end="", flush=True)

    if i != 0 and i % 100 == 0:
        running_first_15_results_str = f"First 15 DCT: {round(running_first_15_acc / (i+1), 2)}% (Acc)"
        print(f"\nCurrent dataset metrics - [{running_first_15_results_str}]")

    results["img"].append(qf_img_mapping["img"][i])
    results["qf_1"].append(qf_img_mapping["qf_1"][i])
    results["qf_2"].append(qf_img_mapping["qf_2"][i])
    results["est_q_table"].append(res)


print(f"Total time taken: {round(time_taken, 2)} seconds.")
print(f"Ave time taken per image: {round(time_taken / len(test_path), 2)} seconds.")

[101/1999] - [2421.1/47918.63s] - [F15 DCT: 100.0% (Acc)]
Current dataset metrics - [First 15 DCT: 69.77% (Acc)]
[201/1999] - [4759.26/47332.19s] - [F15 DCT: 53.33% (Acc)]
Current dataset metrics - [First 15 DCT: 68.52% (Acc)]
[301/1999] - [7107.55/47202.66s] - [F15 DCT: 26.67% (Acc)]
Current dataset metrics - [First 15 DCT: 66.27% (Acc)]
[401/1999] - [9433.63/47027.0s] - [F15 DCT: 100.0% (Acc)]]
Current dataset metrics - [First 15 DCT: 65.57% (Acc)]
[501/1999] - [12020.35/47961.43s] - [F15 DCT: 93.33% (Acc)]
Current dataset metrics - [First 15 DCT: 66.64% (Acc)]
[601/1999] - [14589.63/48526.9s] - [F15 DCT: 100.0% (Acc)]]
Current dataset metrics - [First 15 DCT: 66.69% (Acc)]
[701/1999] - [17031.06/48566.46s] - [F15 DCT: 60.0% (Acc)]]
Current dataset metrics - [First 15 DCT: 66.53% (Acc)]
[801/1999] - [19478.58/48611.34s] - [F15 DCT: 33.33% (Acc)]
Current dataset metrics - [First 15 DCT: 66.51% (Acc)]
[901/1999] - [21976.1/48757.18s] - [F15 DCT: 40.0% (Acc)]]]
Current dataset metrics -

In [147]:
with open('res_random_qf.pkl', 'wb') as handle:
    pickle.dump(results, handle)