diff --git a/demos/multi_camera/demo.py b/demos/multi_camera/demo.py
index fda78a0f..8f113e80 100644
--- a/demos/multi_camera/demo.py
+++ b/demos/multi_camera/demo.py
@@ -1,7 +1,7 @@
 import argparse
 import os
 import pickle
-import sys
+from logging import warning
 
 import cv2
 import numpy as np
@@ -192,36 +192,36 @@ def run():
         "--confidence-threshold",
         type=float,
         help="Confidence threshold of detections",
-        default=0.15,
+        default=0.2,
     )
     parser.add_argument(
         "--iou-threshold",
         type=float,
-        default=0.6,
-        help="Max IoU to consider when matching detections and tracked objects",
+        default=0.5,
+        help="Max '1-IoU' to consider when matching detections and tracked objects",
     )
     parser.add_argument(
         "--distance-threshold",
         type=float,
-        default=0.1,
+        default=0.15,
         help="Maximum distance that two tracked objects of different videos can have in order to match",
     )
     parser.add_argument(
         "--max-votes-grow",
         type=int,
-        default=5,
+        default=4,
         help="Amount of votes we need before increasing the size of a cluster",
     )
     parser.add_argument(
         "--max-votes-split",
         type=int,
-        default=5,
+        default=15,
         help="Amount of votes we need before decreasing the size of a cluster",
     )
     parser.add_argument(
         "--memory",
         type=int,
-        default=3,
+        default=2,
         help="How long into the past should we consider past clusters",
     )
     parser.add_argument(
@@ -233,13 +233,13 @@ def run():
     parser.add_argument(
         "--initialization-delay",
         type=float,
-        default=3,
+        default=6,
         help="Min detections needed to start the tracked object",
     )
     parser.add_argument(
         "--hit-counter-max",
         type=int,
-        default=30,
+        default=20,
         help="Max iteration the tracked object is kept after when there are no detections",
     )
     parser.add_argument(
@@ -353,8 +353,22 @@ def mask_generator(frame):
         if args.use_motion_estimator_footage:
             motion_estimators[path].transformation = initial_transformations[path]
 
-    # now initialize the videos and their trackers
+    # initialize the reference if it exists
+    reference = {"video": None, "image": None, "motion_estimator": None}
     image_reference = None
+    if not first_video_is_reference:
+        # if failing to read it as an image, try to read it as a video
+        image_reference = cv2.imread(args.reference)
+        reference["image"] = image_reference
+        if image_reference is None:
+            video = Video(input_path=path)
+            image_reference = next(video.__iter__())
+            reference["video"] = video
+            reference["motion_estimator"] = motion_estimator_reference
+
+    # now initialize the videos and their trackers
+    fps = None
+    total_frames = None
     for path in args.files:
         extension = os.path.splitext(path)[1]
         if args.output_name is None:
@@ -363,8 +377,28 @@ def mask_generator(frame):
             output_path = args.output_name
 
         video = Video(input_path=path, output_path=output_path)
+
+        # check that the fps
+        if fps is None:
+            fps = video.output_fps
+            total_frames = int(video.video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        else:
+            current_fps = video.output_fps
+            current_total_frames = int(
+                video.video_capture.get(cv2.CAP_PROP_FRAME_COUNT)
+            )
+            if current_fps != fps:
+                warning(
+                    f"{args.files[0]} is at {fps} FPS, but {path} is at {current_fps} FPS."
+                )
+            if total_frames != current_total_frames:
+                warning(
+                    f"{args.files[0]} has {total_frames} frames, but {path} has {current_total_frames} frames."
+                )
         if image_reference is None:
             image_reference = next(video.__iter__())
+        else:
+            next(video.__iter__())
         videos[path] = video
         trackers[path] = Tracker(
             distance_function="iou",
@@ -376,22 +410,20 @@ def mask_generator(frame):
         )
         tracked_objects[path] = []
 
-    reference = {"video": None, "image": None, "motion_estimator": None}
-    if not first_video_is_reference:
-        # if failing to read it as an image, try to read it as a video
-        image_reference = cv2.imread(args.reference)
-        reference["image"] = image_reference
-        if image_reference is None:
-            video = Video(input_path=path)
-            image_reference = next(video.__iter__())
-            reference["video"] = video
-            reference["motion_estimator"] = motion_estimator_reference
-
-    height_reference = image_reference.shape[0]
     big_black_frame = np.zeros(
-        tuple([args.resolution[1] * 2, args.resolution[0] * 2, 3]), dtype=np.uint8
+        tuple(
+            [
+                args.resolution[1]
+                * ((len(args.files) + (not first_video_is_reference) + 1) // 2),
+                args.resolution[0] * 2,
+                3,
+            ]
+        ),
+        dtype=np.uint8,
     )
 
+    height_reference = image_reference.shape[0]
+
     def normalized_foot_distance(tracker1, tracker2):
         return (
             np.linalg.norm(get_absolute_feet(tracker1) - get_absolute_feet(tracker2))
@@ -460,6 +492,8 @@ def normalized_foot_distance(tracker1, tracker2):
                     if reference["motion_estimator"] is not None:
                         if args.args.mask_detections:
                             mask = mask_generator(frame)
+                        else:
+                            mask = None
                         coord_transformations = reference["motion_estimator"].update(
                             frame, mask
                         )
diff --git a/norfair/multi_camera.py b/norfair/multi_camera.py
index ef70dd90..36545a7e 100644
--- a/norfair/multi_camera.py
+++ b/norfair/multi_camera.py
@@ -424,7 +424,8 @@ def update(self, trackers_by_camera):
                     if cluster.grow_votes == self.max_votes_grow:
                         # if the votes to grow are enough, then we will expand our cluster
                         # we might need to steal ids from other clusters, so first we will remove those from the others
-                        cluster.grow_votes -= 1
+                        cluster.grow_votes = 0
+                        cluster.split_votes = 0
 
                         other_cluster_number = 0
                         while other_cluster_number < len(self.clusters):
@@ -481,7 +482,8 @@ def update(self, trackers_by_camera):
                         # if we have enough votes to split our cluster
                         # we update the old cluster with the information of the biggest current cluster inside
                         # for the other current clusters that intersect it, we create new clusters
-                        cluster.split_votes -= 1
+                        cluster.split_votes = 0
+                        cluster.grow_votes = 0
 
                         other_current_cluster_number = 0
                         while other_current_cluster_number < len(current_clusters):