From c5968a0c05e68d1eae72effedb167adc76682be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agust=C3=ADn=20Castro?= Date: Mon, 5 Feb 2024 12:50:09 -0300 Subject: [PATCH] Use motion estimator and mask generator when using videos --- norfair/common_reference_ui.py | 86 +++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 28 deletions(-) diff --git a/norfair/common_reference_ui.py b/norfair/common_reference_ui.py index c7ae2e25..11dbdc71 100644 --- a/norfair/common_reference_ui.py +++ b/norfair/common_reference_ui.py @@ -45,7 +45,7 @@ def set_reference( reference: str, footage: str, transformation_getter: TransformationGetter = None, - mask_generator=None, + mask_generator=None, desired_size=700, motion_estimator=None, ): @@ -58,11 +58,11 @@ def set_reference( To add a point, just click a pair of points (one from the footage window, and another from the reference window) and select "Add" To remove a point, just select the corresponding point at the bottom left corner, and select "Remove". - If either footage or reference are videos, you can jump to future frames to pick points that match. + If either footage or reference are videos, you can jump to future frames to pick points that match. For example, to jump 215 frames in the footage, just write an integer number of frames to jump next to 'Frames to skip (footage)', and select "Skip frames". A motion estimator can be used to relate the coordinates of the current frame you see (in either footage or reference) to coordinates in its corresponding first frame. - Once a transformation has been estimated, you can test it: + Once a transformation has been estimated, you can test it: To Test your transformation, Select the 'Test' mode, and pick a point in either the reference or the footage, and see the associated point in the other window. You can keep adding more associated points until you are satisfied with the estimated transformation @@ -74,16 +74,16 @@ def set_reference( Path to the footage image or video - transformation_getter: TransformationGetter, optional - TransformationGetter defining the type of transformation you want to fix between reference and footage. - Since the transformation can be really far from identity (given that the perspectives in footage and reference can be immensely different), + TransformationGetter defining the type of transformation you want to fix between reference and footage. + Since the transformation can be really far from identity (given that the perspectives in footage and reference can be immensely different), and also knowing that outliers shouldn't be common given that a human is picking the points, it is recommended to use a high ransac_reproj_threshold (~ 1000) - - mask_generator: optional function that creates a mask (np.ndarray) from a PIL image. This mask is then provided to the corresponding MotionEstimator to avoid + - mask_generator: optional function that creates a mask (np.ndarray) from a PIL image. This mask is then provided to the corresponding MotionEstimator to avoid sampling points within the mask. - desired_size: int, optional How large you want the clickable windows in the UI to be. - + - motion_estimator: MotionEstimator, optional When using videos for either the footage or the reference, you can provide a MotionEstimator to relate the coordinates in all the frames in the video. The motion estimator is only useful if the camera in either the video of the footage or the video of the reference can move. Otherwise, avoid using it. @@ -176,15 +176,22 @@ def estimate_transformation(points): return None def test_transformation( - change_of_coordinates, canvas, point, original_size, canvas_size, motion_transformation=None, + change_of_coordinates, + canvas, + point, + original_size, + canvas_size, + motion_transformation=None, ): point_in_new_coordinates = change_of_coordinates(np.array([point]))[0] try: - point_in_new_coordinates = motion_transformation.abs_to_rel(np.array([point_in_new_coordinates]))[0] + point_in_new_coordinates = motion_transformation.abs_to_rel( + np.array([point_in_new_coordinates]) + )[0] except AttributeError: pass - + point_in_canvas_coordinates = np.multiply( point_in_new_coordinates, np.array( @@ -256,37 +263,46 @@ def handle_annotation(event): global reference_canvas_size global footage_original_size global footage_canvas_size - + points[key]["marked"] = not points[key]["marked"] if points[key]["marked"]: points[key]["button"].configure(fg="black", highlightbackground="red") try: - footage_point_in_rel_coords = skipper["footage"]["motion_transformation"].abs_to_rel(np.array([points[key]["footage"]]))[0] + footage_point_in_rel_coords = skipper["footage"][ + "motion_transformation" + ].abs_to_rel(np.array([points[key]["footage"]]))[0] footage_point_in_rel_coords = np.multiply( footage_point_in_rel_coords, np.array( - [footage_canvas_size[0] / footage_original_size[0], footage_canvas_size[1] / footage_original_size[1]] + [ + footage_canvas_size[0] / footage_original_size[0], + footage_canvas_size[1] / footage_original_size[1], + ] ), ).astype(int) except AttributeError: footage_point_in_rel_coords = points[key]["footage_canvas"] pass - + try: - reference_point_in_rel_coords = skipper["reference"]["motion_transformation"].abs_to_rel(np.array([points[key]["footage"]]))[0] + reference_point_in_rel_coords = skipper["reference"][ + "motion_transformation" + ].abs_to_rel(np.array([points[key]["footage"]]))[0] reference_point_in_rel_coords = np.multiply( reference_point_in_rel_coords, np.array( - [reference_canvas_size[0] / reference_original_size[0], reference_canvas_size[1] / reference_original_size[1]] + [ + reference_canvas_size[0] / reference_original_size[0], + reference_canvas_size[1] / reference_original_size[1], + ] ), ).astype(int) except AttributeError: reference_point_in_rel_coords = points[key]["reference_canvas"] pass - draw_point_in_canvas( canvas_footage, footage_point_in_rel_coords, color="red" ) @@ -348,7 +364,9 @@ def handle_annotation(event): mask = mask_generator(image) else: mask = None - motion_transformation = motion_estimator_footage.update(np.array(image), mask) + motion_transformation = motion_estimator_footage.update( + np.array(image), mask + ) footage_original_width = image.width footage_original_height = image.height @@ -379,14 +397,20 @@ def reference_coord_chosen_in_footage(event): footage_point_canvas = (event.x, event.y) draw_point_in_canvas(canvas_footage, footage_point_canvas) - footage_point = np.array( - [event.x * (footage_original_width / footage_canvas_width), event.y * (footage_original_height / footage_canvas_height)] + [ + event.x * (footage_original_width / footage_canvas_width), + event.y * (footage_original_height / footage_canvas_height), + ] ) print("Footage window clicked at: ", footage_point.round(1)) try: - footage_point = skipper["footage"]["motion_transformation"].rel_to_abs(np.array([footage_point]))[0].round(1) + footage_point = ( + skipper["footage"]["motion_transformation"] + .rel_to_abs(np.array([footage_point]))[0] + .round(1) + ) except AttributeError: pass @@ -420,7 +444,6 @@ def reference_coord_chosen_in_footage(event): "current_frame_label": None, } - motion_estimator_reference = None motion_transformation = None try: @@ -467,18 +490,23 @@ def reference_coord_chosen_in_reference(event): global footage_canvas_size global skipper - - reference_point_canvas = (event.x, event.y) draw_point_in_canvas(canvas_reference, reference_point_canvas) reference_point = np.array( - [event.x * (reference_original_width / reference_canvas_width), event.y * (reference_original_height / reference_canvas_height)] + [ + event.x * (reference_original_width / reference_canvas_width), + event.y * (reference_original_height / reference_canvas_height), + ] ) print("Reference window clicked at: ", reference_point.round(1)) try: - reference_point = skipper["reference"]["motion_transformation"].rel_to_abs(np.array([reference_point]))[0].round(1) + reference_point = ( + skipper["reference"]["motion_transformation"] + .rel_to_abs(np.array([reference_point]))[0] + .round(1) + ) except AttributeError: pass @@ -560,8 +588,10 @@ def handle_skip_frame(event): mask = mask_generator(image) else: mask = None - motion_transformation = motion_estimator.update(np.array(image), mask) - + motion_transformation = motion_estimator.update( + np.array(image), mask + ) + skipper[video_type]["motion_estimator"] = motion_estimator skipper[video_type]["motion_transformation"] = motion_transformation