# Post-Processing Predicted Camera Pose and CO3D Ground Truth Data

In [2]:
import pandas as pd

def parse_images_txt(file_path):
    """
    解析 images.txt 檔案，提取相機姿態資訊。
    """
    poses = []
    with open(file_path, 'r') as f:
        lines = f.readlines()

    # 跳過前四行的註解和統計資訊
    i = 4
    while i < len(lines):
        # 這是包含姿態資訊的行
        pose_line = lines[i].strip().split()
        image_id = int(pose_line[0])
        qw, qx, qy, qz = map(float, pose_line[1:5])
        tx, ty, tz = map(float, pose_line[5:8])
        camera_id = int(pose_line[8])
        name = pose_line[9]

        poses.append({
            'name': name,
            'image_id': image_id,
            'qw': qw, 'qx': qx, 'qy': qy, 'qz': qz,
            'tx': tx, 'ty': ty, 'tz': tz
        })

        # 跳過下一行的 2D points 資訊
        i += 2

    return pd.DataFrame(poses)


import json
from typing import Optional

def parse_frame_annotations(file_path: str) -> Optional[pd.DataFrame]:
    """
    Parses a nested JSON file of frame annotations and flattens it into a DataFrame.

    This function specifically uses pandas.json_normalize to handle the nested
    structure, creating separate columns for nested data like 'image.path' and 'viewpoint.R'.

    Args:
        file_path (str): The path to the 'frame_annotations.json' file.

    Returns:
        Optional[pd.DataFrame]: A DataFrame containing the flattened data,
                                or None if an error occurs.
    """
    try:
        # Open and load the file's content first
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Use json_normalize to flatten the data into a clean table
        ground_truth_df = pd.json_normalize(data)
        return ground_truth_df

    except FileNotFoundError:
        print(f"❌ Error: The file was not found at '{file_path}'")
        return None
    except json.JSONDecodeError:
        print(f"❌ Error: The file at '{file_path}' is not a valid JSON.")
        return None
    except Exception as e:
        print(f"❌ An unexpected error occurred: {e}")
        return None

In [12]:
PREDICTED_PATH = '/media/daniel/storage1/2.research/1.3d-reconstruct/results/CO3D/110_13051_23361/text/images.txt'
GT_PATH = '/media/daniel/storage1/2.research/1.3d-reconstruct/dataset/CO3D/apple/frame_annotations.json'
TARGET_SEQUENCE = "110_13051_23361" # The sequence name from the file path

predicted_df = parse_images_txt(PREDICTED_PATH)
ground_truth_df = parse_frame_annotations(GT_PATH)

if predicted_df is not None and ground_truth_df is not None:
    # --- Filter Ground Truth to the specific sequence ---
    print(f"\n--- Filtering for sequence: {TARGET_SEQUENCE} ---")
    filtered_gt_df = ground_truth_df[ground_truth_df['sequence_name'] == TARGET_SEQUENCE].copy()
    print(f"Found {len(filtered_gt_df)} matching frames in ground truth.")

    # --- Prepare the key for merging ---
    filtered_gt_df['filename'] = filtered_gt_df['image.path'].str.split('/').str[-1]

    # --- Perform a CLEAN merge ---
    merged_df = pd.merge(
            predicted_df,
            filtered_gt_df,
            left_on='name',
            right_on='filename'
        )

    # --- Sort the final result by frame number ---
    print("\n--- Sorting by frame number to ensure correct sequence ---")
    merged_df.sort_values(by='frame_number', inplace=True)
    merged_df.reset_index(drop=True, inplace=True)

    print("\n--- Final, Cleaned, and Sorted Data ---")
    print(merged_df[['name', 'frame_number', 'qw', 'qx', 'qy', 'qz', 'tx', 'ty', 'tz', 'viewpoint.R', 'viewpoint.T']].head())

    # merged_df.to_csv('merged_data.csv', index=False)

    # import matplotlib.pyplot as plt

    # Extract ground truth translation components
    # gt_T = pd.DataFrame(merged_df['viewpoint.T'].to_list(), columns=['gt_tx', 'gt_ty', 'gt_tz'])

    # plt.figure(figsize=(12, 6))
    # plt.title('Predicted vs. Ground Truth Camera Trajectory (X-axis)')
    # plt.xlabel('Frame Number')
    # plt.ylabel('X Translation')
    # plt.plot(merged_df['frame_number'], merged_df['tx'], label='Predicted TX', marker='.')
    # plt.plot(merged_df['frame_number'], gt_T['gt_tx'], label='Ground Truth TX', marker='.')
    # plt.legend()
    # plt.grid(True)
    # plt.show()


--- Filtering for sequence: 110_13051_23361 ---
Found 202 matching frames in ground truth.

--- Sorting by frame number to ensure correct sequence ---

--- Final, Cleaned, and Sorted Data ---
              name  frame_number        qw        qx        qy        qz  \
0  frame000001.jpg             1  0.999999 -0.001639  0.000317 -0.000082   
1  frame000002.jpg             2  1.000000 -0.000355  0.000677 -0.000270   
2  frame000003.jpg             3  0.999999  0.000557  0.000702 -0.000634   
3  frame000004.jpg             4  0.999998  0.001311  0.001161 -0.000914   
4  frame000005.jpg             5  0.999995  0.002528  0.001562 -0.001090   

         tx        ty        tz  \
0  0.055844 -1.761651  0.985386   
1  0.041557 -1.742804  0.996667   
2  0.030375 -1.726939  1.008236   
3  0.027887 -1.714112  1.013429   
4  0.032747 -1.702738  1.019127   

                                         viewpoint.R  \
0  [[-0.9983327388763428, -0.007844997569918633, ...   
1  [[-0.9983288645744324, -

## Convert Quaternion to Rotation Matrix

In [None]:
import numpy as np
from scipy.spatial.transform import Rotation as R

post_processed_df = merged_df[['name', 'frame_number', 'qw', 'qx', 'qy', 'qz', 'tx', 'ty', 'tz','viewpoint.R', 'viewpoint.T']].copy()

def quat_to_matrix(row):
    # 四元數順序為 [x, y, z, w]
    quaternion = [row['qx'], row['qy'], row['qz'], row['qw']]    
    # 進行轉換
    rotation = R.from_quat(quaternion)
    return rotation.as_matrix() # 返回 3x3 numpy array

def trans_to_vector(row):
    return np.array([row['tx'], row['ty'], row['tz']])

post_processed_df['pred_viewpoint_R'] = post_processed_df.apply(quat_to_matrix, axis=1)
post_processed_df['pred_viewpoint_T'] = post_processed_df.apply(trans_to_vector, axis=1)

print(post_processed_df[['name', 'pred_viewpoint_R']])
print(post_processed_df[['name', 'pred_viewpoint_T']])

post_processed_df[['name', 'frame_number', 'pred_viewpoint_R', 'pred_viewpoint_T', 'viewpoint.R', 'viewpoint.T']].to_csv('post_processed_df.csv', index=False)



                name                                   pred_viewpoint_R
0    frame000001.jpg  [[0.9999997855363876, 0.00016294812650218226, ...
1    frame000002.jpg  [[0.9999989362109638, 0.000539926128578973, 0....
2    frame000003.jpg  [[0.9999982091438151, 0.0012692536169403813, 0...
3    frame000004.jpg  [[0.9999956351318855, 0.001830993221596829, 0....
4    frame000005.jpg  [[0.9999927454323816, 0.002187878140049406, 0....
..               ...                                                ...
197  frame000198.jpg  [[0.9993601278158781, -0.02268173731189705, 0....
198  frame000199.jpg  [[0.9993914489637736, -0.02147576701304468, 0....
199  frame000200.jpg  [[0.9994587303140047, -0.020892163129824025, 0...
200  frame000201.jpg  [[0.9994978211021945, -0.020653738528080848, 0...
201  frame000202.jpg  [[0.999522353469453, -0.01900210169045953, 0.0...

[202 rows x 2 columns]
                name                                   pred_viewpoint_T
0    frame000001.jpg  [0.055843573628215

In [None]:
print(post_processed_df[['viewpoint.R', 'viewpoint.T']].head())

(202, 2)
