diff --git a/.gitignore b/.gitignore index 2d1721f..eccc02f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ __pycache__/ .coverage mieda.egg-info htmlcov -build \ No newline at end of file +build +.ipynb_checkpoints diff --git a/comparisons/iterative.py b/comparisons/iterative.py new file mode 100644 index 0000000..0c3762c --- /dev/null +++ b/comparisons/iterative.py @@ -0,0 +1,77 @@ +def createInterval(start, finish, keys, key): + new_interval = {} + new_interval["start"] = start + new_interval["finish"] = finish + new_interval[key] = keys.copy() + return new_interval + + +def getMainPermutations(intervals, key): + conflicts = False + new_intervals = [] + for start_interval in intervals: + for compare_interval in intervals: + if start_interval["start"] >= compare_interval["finish"] or start_interval["finish"] <= compare_interval["start"]: + continue + conflicts = True + + first_finish = start_interval["start"] + if start_interval["start"] != compare_interval["start"]: + first_start = min(start_interval["start"], compare_interval["start"]) + first_finish = max(start_interval["start"], compare_interval["start"]) + keys = start_interval[key] if start_interval["start"] < compare_interval["start"] else compare_interval[key] + new_interval = createInterval(first_start, first_finish, keys, key) + if new_interval not in new_intervals: + new_intervals.append(new_interval) + + last_start = start_interval["finish"] + if start_interval["finish"] != compare_interval["finish"]: + last_start = min(start_interval["finish"], compare_interval["finish"]) + last_finish = max(start_interval["finish"], compare_interval["finish"]) + keys = start_interval[key] if start_interval["finish"] > compare_interval["finish"] else compare_interval[key] + new_interval = createInterval(last_start, last_finish, keys, key) + if new_interval not in new_intervals: + new_intervals.append(new_interval) + + new_interval = createInterval(first_finish, last_start, start_interval[key].union(compare_interval[key]), key) + if new_interval not in new_intervals: + new_intervals.append(new_interval) + return conflicts, new_intervals + + +def resolveConflicts(intervals, key): + resolved_intervals = [] + skip = {} + unresolved = False + for i, start_interval in enumerate(intervals): + if (start_interval["start"], start_interval["finish"]) in skip: + continue + + conflict = False + for j, compare_interval in enumerate(intervals): + if start_interval["start"] == compare_interval["start"] and start_interval["finish"] > compare_interval["finish"]: + compare_interval[key] = compare_interval[key].union(start_interval[key]) + conflict = True + break + + if start_interval["start"] < compare_interval["start"] < start_interval["finish"]: + unresolved = True + + elif (start_interval["start"], start_interval["finish"]) == (compare_interval["start"], compare_interval["finish"]): + start_interval[key] = start_interval[key].union(compare_interval[key]) + skip[(start_interval["start"], start_interval["finish"])] = True + + if not conflict: + resolved_intervals.append(start_interval) + return unresolved, resolved_intervals + +class Merge: + @staticmethod + def union(intervals: list, key: str = "set_items"): + while True: + conflict, intervals = getMainPermutations(intervals, key) + unresolved, intervals = resolveConflicts(intervals, key) if conflict else (False, intervals) + if not unresolved: + break + + return intervals \ No newline at end of file diff --git a/comparisons/iterative_optimized.py b/comparisons/iterative_optimized.py new file mode 100644 index 0000000..179d57d --- /dev/null +++ b/comparisons/iterative_optimized.py @@ -0,0 +1,110 @@ +from operator import itemgetter + + +def create_interval(start, finish, keys, key): + new_interval = {} + new_interval["start"] = start + new_interval["finish"] = finish + new_interval[key] = keys.copy() + return new_interval + + +def get_max_list_intervals(min_start, max_list, key): + intervals = [] + max_list = sorted(max_list, key=lambda x: x["point"]) + for i, end in enumerate(max_list): + if i == 0 or min_start["point"] == end["point"]: + if len(max_list) == 1: + intervals.append(create_interval(min_start["point"], end["point"], end["keys"], key)) + min_start["keys"] = min_start["keys"].union(end["keys"]) + continue + + keys = set() + if min_start["label"] == "start": + keys = min_start["keys"].union(intervals[-1][key]) if len(intervals) > 0 else min_start["keys"] + elif end["label"] == "finish": + if i + 1 < len(max_list): + if max_list[i+1]["label"] == "start" and intervals: + keys = intervals[-1][key].difference(max_list[i-1]["keys"]) + else: + keys = end["keys"].union(max_list[i+1]["keys"]) + else: + keys = end["keys"] + else: + keys = intervals[-1][key].difference(min_start["keys"]).difference(end["keys"]) + + intervals.append(create_interval(min_start["point"], end["point"], keys, key)) + min_start = end + return intervals + + +def merge_same_intervals(intervals, key): + new_intervals = [] + last_interval = intervals[0] + for interval in intervals[1:]: + if (last_interval["start"], last_interval["finish"]) == (interval["start"], interval["finish"]): + last_interval[key] = last_interval[key].union(interval[key]) + else: + new_intervals.append(last_interval) + last_interval = interval + if not new_intervals or new_intervals[-1] != last_interval: + new_intervals.append(last_interval) + return new_intervals + + +def create_vertex(interval, label, key): + vertex = {"point": interval[label], "keys": interval[key], "label": label} + return vertex + + +def get_main_permutations(intervals, key): + conflicts = False + new_intervals = [] + intervals = sorted(intervals, key=itemgetter("start", "finish")) + intervals = merge_same_intervals(intervals, key) + + min_start = create_vertex(intervals[0], "start", key) + max_end = intervals[0]["finish"] + max_list = [create_vertex(intervals[0], "start", key), create_vertex(intervals[0], "finish", key)] + for interval in intervals[1:]: + if min_start["point"] < interval["start"] < max_end: + max_list.append(create_vertex(interval, "start", key)) + max_list.append(create_vertex(interval, "finish", key)) + max_end = max(max_end, interval["finish"]) + elif min_start["point"] < interval["finish"] < max_end: + max_list.append(create_vertex(interval, "finish", key)) + elif min_start["point"] == interval["start"] and interval["finish"] > max_end: + min_start["keys"] = min_start["keys"].union(interval[key]) + max_list.append(create_vertex(interval, "finish", key)) + max_end = interval["finish"] + + if interval["start"] >= max_end: + new_intervals += get_max_list_intervals(min_start, max_list, key) + min_start = create_vertex(interval, "start", key) + max_list = [create_vertex(interval, "finish", key)] + new_intervals += get_max_list_intervals(min_start, max_list, key) + + return conflicts, new_intervals + + +def resolve_conflicts(intervals, key): + resolved_intervals = [] + current_interval = intervals[0] + for interval in intervals[1:]: + if interval["start"] > current_interval["start"]: + resolved_intervals.append(current_interval) + current_interval = interval + else: + current_interval[key] = current_interval[key].union(interval[key]) + resolved_intervals.append(current_interval) + return resolved_intervals + + +class Merge: + @staticmethod + def union(intervals: list, key: str = "set_items"): + conflict, intervals = get_main_permutations(intervals, key) + if conflict: + intervals = sorted(intervals, key=itemgetter("start", "finish")) + intervals = resolve_conflicts(intervals, key) + return intervals \ No newline at end of file diff --git a/notebooks/Speed Profiling.ipynb b/notebooks/Speed Profiling.ipynb index 92588df..0e95b98 100644 --- a/notebooks/Speed Profiling.ipynb +++ b/notebooks/Speed Profiling.ipynb @@ -30,7 +30,18 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'/usr/local/anaconda3/bin/python'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# let's make sure we are using the correct kernel \n", "import sys; sys.executable" @@ -42,7 +53,12 @@ "metadata": {}, "outputs": [], "source": [ - "from mieda.intervals import Merge" + "import os\n", + "import inspect\n", + "\n", + "currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n", + "parentdir = os.path.dirname(currentdir)\n", + "sys.path.insert(0, parentdir)" ] }, { @@ -50,6 +66,17 @@ "execution_count": 3, "metadata": {}, "outputs": [], + "source": [ + "from mieda.intervals import Merge\n", + "from comparisons.iterative import Merge as it_merge\n", + "from comparisons.iterative_optimized import Merge as itop_merge" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], "source": [ "# plotting and other things \n", "import datetime\n", @@ -73,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -86,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -97,7 +124,7 @@ " 'set_items': {'1'}}]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -108,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -117,7 +144,7 @@ "[0, 10, 20, 30, 40, 50, 60, 70, 80, 90]" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -130,18 +157,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "580945c0f4ce4e5e99a2c34cca804556", + "model_id": "12b5c0e1811a463c898b45c6a3d4deb8", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -184,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -225,7 +252,7 @@ " 'set_items': {'9'}}]" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -237,7 +264,41 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def measureBulkTime(func, runs=10, time_limit=None):\n", + " run_interval_counts = list()\n", + " run_seconds_elapsed = list()\n", + "\n", + " measure_start_time = time.process_time()\n", + " for r in range(runs):\n", + " print(\"\\n--- run \" + str(r + 1) + \" ---\")\n", + " interval_count = list()\n", + " seconds_elapsed = list()\n", + " flops_used = list()\n", + " for i in tqdm(inputs):\n", + " interval_count.append(len(i))\n", + " start = time.process_time()\n", + " func(i)\n", + " seconds_elapsed.append(time.process_time() - start)\n", + " if time_limit and time.process_time() - measure_start_time > time_limit:\n", + " break\n", + "\n", + " run_interval_counts.append(interval_count)\n", + " run_seconds_elapsed.append(seconds_elapsed)\n", + " \n", + " if time_limit and time.process_time() - measure_start_time > time_limit:\n", + " break\n", + " \n", + " return (run_interval_counts, run_seconds_elapsed)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": { "scrolled": true }, @@ -253,12 +314,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4cf489b7ed9349e0ae885f82f1d7f889", + "model_id": "48998d7edbd142e78ba8865f56452c59", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -276,12 +337,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6ca343bc1c974b0685c398f4d77af479", + "model_id": "f58500f0c82746589d46341ef303e3fc", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -299,12 +360,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d73e4d6826714385b73313078b1191ca", + "model_id": "4c28f677e38646baa024383c6b2d6d03", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -322,12 +383,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "88d16360513b47c99031af600348de21", + "model_id": "111eb840d87b49b4bcf13f87f6bb103c", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -345,12 +406,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1b1683e2390646829b735f71ee837268", + "model_id": "70e388f69d6049f89a78d4239c5efc10", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -368,12 +429,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9f78f2b4a9f44ffab9ab74856eeacd5c", + "model_id": "3d343a4223e745b792a4dae9e6e34be7", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -391,12 +452,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a0dea76a655048a184deccaaa30e5b3f", + "model_id": "2df1944894b34eee90c7dda8bca56a7f", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -414,12 +475,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "deb9bf4092f0483fad5c378a6c74b93c", + "model_id": "4479425641944f0dba303da1099ceabd", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -437,12 +498,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e9b4e35d25f44b4ba204e60d4b9a6f98", + "model_id": "f33a2c779ea74c0f81e4765638e32fd8", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -460,12 +521,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3bf0f4f8e306443799f08063d51851c6", + "model_id": "cb5235edd20745c0903ec404431dad20", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0), HTML(value='')))" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, "metadata": {}, @@ -482,113 +543,437 @@ "source": [ "# for each, use mieda to overlap the intervals and create new sets in the intervals through a union merge \n", "# track how long it takes for each set of intervals to arrive at a profile of speed based on the input size \n", - "\n", - "runs = range(0, 10)\n", - "run_interval_counts = list()\n", - "run_seconds_elasped = list()\n", - "\n", - "for r in runs:\n", - " print(\"\\n--- run \" + str(r + 1) + \" ---\")\n", - " interval_count = list()\n", - " seconds_elapsed = list()\n", - " flops_used = list()\n", - " for i in tqdm(inputs):\n", - " interval_count.append(len(i))\n", - " start = time.process_time()\n", - " Merge.union(intervals=i)\n", - " seconds_elapsed.append(time.process_time() - start)\n", - " \n", - " run_interval_counts.append(interval_count)\n", - " run_seconds_elasped.append(seconds_elapsed)" + "run_interval_counts, run_seconds_elapsed = measureBulkTime(Merge.union, 10)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "# flatten the list of runs for plotting \n", - "flattened_interval_counts = [item for sublist in run_interval_counts for item in sublist]\n", - "flattened_seconds_elapsed = [item for sublist in run_seconds_elasped for item in sublist]" + "def plotMultiSpeed(trials):\n", + " # flatten the list of runs for plotting \n", + " flattened_interval_counts = []\n", + " flattened_seconds_elapsed = []\n", + " algorithm_used = []\n", + " algorithms = []\n", + " for i, trial in enumerate(trials):\n", + " flattened_trial_interval_counts = [item for sublist in trial[\"interval_count\"] for item in sublist]\n", + " flattened_trial_seconds_elapsed = [item for sublist in trial[\"seconds_elapsed\"] for item in sublist]\n", + " flattened_interval_counts += flattened_trial_interval_counts\n", + " flattened_seconds_elapsed += flattened_trial_seconds_elapsed\n", + " algorithm_used += [i] * len(flattened_trial_seconds_elapsed)\n", + " algorithms.append(trial[\"algorithm\"])\n", + "\n", + " df_runs = pd.DataFrame(\n", + " np.array([flattened_interval_counts, flattened_seconds_elapsed, algorithm_used]).T,\n", + " columns=[\"interval_count\", \"seconds_elapsed\", \"algorithm\"]\n", + " )\n", + " \n", + " g = sns.lmplot(\n", + " x=\"interval_count\", \n", + " y=\"seconds_elapsed\", \n", + " data=df_runs,\n", + " order=2,\n", + " height=7,\n", + " aspect=1.5,\n", + " legend_out=False,\n", + " hue=\"algorithm\",\n", + " palette=['#4daf4a','#1f78b4','#e41a1c','#7570b3'],\n", + " line_kws={\"lw\":4, 'ls':'--'}\n", + " )\n", + " g.set(ylim=(-.05, 1.5), xlim=(0, 1010))\n", + " ax = plt.gca()\n", + " ax.set_title('Seconds Elapsed by Interval Count')\n", + " ax.set(xlabel='\\nInterval Count', ylabel='Seconds Elapsed\\n')\n", + " \n", + " legend = g.axes[0, 0].get_legend()\n", + " for i, text in enumerate(legend.texts):\n", + " legend.texts[i].set_text(algorithms[i])" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "df_runs = pd.DataFrame(\n", - " np.array([flattened_interval_counts, flattened_seconds_elapsed]).T,\n", - " columns=[\"interval_count\", \"seconds_elapsed\"]\n", - ")" + "def plotSingleSpeed(run_interval_counts, run_seconds_elapsed):\n", + " # flatten the list of runs for plotting \n", + " flattened_interval_counts = [item for sublist in run_interval_counts for item in sublist]\n", + " flattened_seconds_elapsed = [item for sublist in run_seconds_elapsed for item in sublist]\n", + " df_runs = pd.DataFrame(\n", + " np.array([flattened_interval_counts, flattened_seconds_elapsed]).T,\n", + " columns=[\"interval_count\", \"seconds_elapsed\"]\n", + " )\n", + " \n", + " g = sns.lmplot(\n", + " x=\"interval_count\", \n", + " y=\"seconds_elapsed\", \n", + " data=df_runs,\n", + " order=2,\n", + " height=7,\n", + " aspect=1.5,\n", + " line_kws={'color': 'red'}\n", + " )\n", + " g.set(ylim=(-.05, 1.5))\n", + " ax = plt.gca()\n", + " ax.set_title('Seconds Elapsed by Interval Count')\n", + " ax.set(xlabel='\\nInterval Count', ylabel='Seconds Elapsed\\n')" ] }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "scrolled": false - }, + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Graph speed of mieda algorithm on increasing number of intervals\n", + "plotSingleSpeed(run_interval_counts, run_seconds_elapsed)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- run 1 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6bc908c2c2fa4c42a332296bd95f97d5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "iterative_counts, iterative_time = measureBulkTime(it_merge.union, 1, time_limit=60)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- run 1 ---\n" + ] + }, { "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "77d68462872f4bc18c44343e8ad069bd", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - "[Text(6.675000000000004, 0.5, 'Seconds Elapsed\\n'),\n", - " Text(0.5, 6.800000000000011, '\\nInterval Count')]" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, - "execution_count": 17, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 2 ---\n" + ] }, { "data": { - "image/png": "\n", + "application/vnd.jupyter.widget-view+json": { + "model_id": "a1bc2e6987ae499990b9952674b0ae2e", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - "
" + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" ] }, - "metadata": { - "needs_background": "light" + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 3 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a9b6a66741cf4e9098b9d1315e6f48a0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] }, + "metadata": {}, "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 4 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "67ce0234bba3442abb9db336fa277976", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 5 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b5f474ff58d44f5aa4e45bc537a845fe", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 6 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "807d500019984258a4d5b3cc510f0c87", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 7 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f26b59cc9c5c4930a5c16a8cf19a2963", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 8 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4675736515d041dcbb7747bf8add1d1d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 9 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cf6370faa1274a8fa121359c1574167b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--- run 10 ---\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cf99123dfdd04e44b3e09b7c9b142840", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] } ], "source": [ - "g = sns.lmplot(\n", - " x=\"interval_count\", \n", - " y=\"seconds_elapsed\", \n", - " data=df_runs,\n", - " order=2,\n", - " height=7,\n", - " aspect=1.5,\n", - " line_kws={'color': 'red'}\n", - ")\n", - "g.set(ylim=(-.05, 1.5))\n", - "ax = plt.gca()\n", - "ax.set_title('Seconds Elapsed by Interval Count')\n", - "ax.set(xlabel='\\nInterval Count', ylabel='Seconds Elapsed\\n')" + "iterative_opt_counts, iterative_opt_time = measureBulkTime(itop_merge.union, 10)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "algorithms = [\n", + " {\n", + " \"interval_count\": run_interval_counts,\n", + " \"seconds_elapsed\": run_seconds_elapsed,\n", + " \"algorithm\": \"mieda\"\n", + " },\n", + " {\n", + " \"interval_count\": iterative_counts,\n", + " \"seconds_elapsed\": iterative_time,\n", + " \"algorithm\": \"iterative\"\n", + " },\n", + " {\n", + " \"interval_count\": iterative_opt_counts,\n", + " \"seconds_elapsed\": iterative_opt_time,\n", + " \"algorithm\": \"iterative optimized\"\n", + " }\n", + "]" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotMultiSpeed(algorithms)" + ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -598,7 +983,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -629,9 +1014,9 @@ ], "metadata": { "kernelspec": { - "display_name": "venv", + "display_name": "Python 3", "language": "python", - "name": "venv" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -643,7 +1028,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/tests/test_merge_iterative.py b/tests/test_merge_iterative.py new file mode 100644 index 0000000..08ea8cf --- /dev/null +++ b/tests/test_merge_iterative.py @@ -0,0 +1,732 @@ +# Authors: Valentino Constantinou , Asitang Mishra +# License: Apache 2.0 + +from comparisons.iterative import Merge + +import datetime +import pytest +from operator import itemgetter + + +# fixtures +@pytest.fixture() +def interval_inputs() -> list: + """ + This fixture generates a set of all possible scenarios for consideration of one interval against another. + :return: a list of intervals which contains all possible scenarios. + """ + + # create a list to store various interval types + intervals = list() + + # B starts and ends after A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts after A but ends at the same time as A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts after A and ends before A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 3, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts at the same time as A and ends after A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts at the same time as A and ends before A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 3, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts at the same time as A and ends at the same time as A also + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts wholly after A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 7, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + return intervals + + +@pytest.fixture() +def interval_outputs() -> list: + """ + This fixture generates a set of correct outputs against the scenarios provided in the output of interval_inputs(). + :return: a list of intervals which contains the correct output for all possible scenarios. + """ + + # create a list to store the various outputs + outputs = list() + + # B starts and ends after A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'1', '2'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2'}} + ] + ) + + # B starts after A but ends at the same time as A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1'}} + ] + ) + + # B starts after A and ends before A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 3, 1, 0), + 'set_items': {'2', '1'}}, + {'start': datetime.datetime(2020, 1, 3, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'1'}} + ] + ) + + # B starts at the same time as A and ends after A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2'}} + ] + ) + + # B starts at the same time as A and ends before A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 3, 1, 0), + 'set_items': {'1', '2'}}, + {'start': datetime.datetime(2020, 1, 3, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'1'}} + ] + ) + + # B starts at the same time as A and ends at the same time as A also + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1'}} + ] + ) + + # B starts wholly after A + outputs.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 7, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + return outputs + + +@pytest.fixture() +def complex_interval_inputs() -> list: + """ + This fixture generates a set of complex scenarios for consideration of one interval against another. + :return: a list of intervals which contains some more complex scenarios. + """ + + # create a list to store various interval types + intervals = list() + + # B starts after A, C starts at the same time as B but ends before B + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"3"}}, + ] + ) + + # B starts after A, C starts at the same time as B but ends after + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 7, 1, 0, 0), + "set_items": {"3"}} + ] + ) + + # B starts after A, C starts at the same time as B and ends at the same time as B + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"3"}} + ] + ) + + # B starts after A, C starts at the same time as B and ends at the same time as B, but A contains 2 items + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1", "A"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"3"}} + ] + ) + + # B starts after A, but C starts at the same time as B and ends before. D starts at the end time of C and ends last. + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"3"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 8, 1, 0, 0), + "set_items": {"4"}} + ] + ) + + # B starts after A, but C starts at the same time as B and ends before. D starts at the end time of C and ends last. + # now however there is a "dangling" interval in isolation + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"3"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 8, 1, 0, 0), + "set_items": {"4"}}, + {"start": datetime.datetime(2020, 1, 11, 1, 0, 0), "finish": datetime.datetime(2020, 1, 12, 1, 0, 0), + "set_items": {"5"}} + ] + ) + + # test for the ability to handle more than one duplicate interval in the input data + intervals.append( + [ + {'start': 1477875126, 'finish': 1477920079, 'set_items': {'1'}}, + {'start': 1477875126, 'finish': 1477920079, 'set_items': {'1'}}, + {'start': 1477875126, 'finish': 1477920079, 'set_items': {'1'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477915725, 'finish': 1477987473, 'set_items': {'3'}}, + {'start': 1477915725, 'finish': 1477987473, 'set_items': {'3'}}, + {'start': 1477915725, 'finish': 1477987473, 'set_items': {'3'}}, + {'start': 1477915725, 'finish': 1477987473, 'set_items': {'3'}}, + {'start': 1477939605, 'finish': 1477977748, 'set_items': {'4'}}, + {'start': 1477939605, 'finish': 1477977748, 'set_items': {'4'}}, + {'start': 1477961500, 'finish': 1478006402, 'set_items': {'5'}}, + {'start': 1477961500, 'finish': 1478006402, 'set_items': {'5'}}, + {'start': 1477961500, 'finish': 1478006402, 'set_items': {'5'}} + ] + ) + + return intervals + + +@pytest.fixture() +def complex_interval_outputs() -> list: + """ + This fixture generates a set of correct outputs against the scenarios provided in the output of + complex_interval_inputs(). + :return: a list of intervals which contains the correct output for all scenarios in complex_interval_inputs(). + """ + + # create a list to store the various outputs + outputs = list() + + # B starts after A, C starts at the same time as B but ends before B + outputs.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 2, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"3", "2", "1"}}, + {"start": datetime.datetime(2020, 1, 4, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"3", "2"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts after A, C starts at the same time as B but ends after + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1', '3'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2', '3'}}, + {'start': datetime.datetime(2020, 1, 6, 1, 0), 'finish': datetime.datetime(2020, 1, 7, 1, 0), + 'set_items': {'3'}} + ] + ) + + # B starts after A, C starts at the same time as B and ends at the same time as B + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1', '3'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2', '3'}} + ] + ) + + # B starts after A, C starts at the same time as B and ends at the same time as B, but A contains 2 items + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1', 'A'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '3', '1', 'A'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2', '3'}} + ] + ) + + # B starts after A, but C starts at the same time as B and ends before. D starts at the end time of C and ends last. + outputs.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 2, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1", "2", "3"}}, + {"start": datetime.datetime(2020, 1, 4, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"2", "3"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2", "4"}}, + {"start": datetime.datetime(2020, 1, 6, 1, 0, 0), "finish": datetime.datetime(2020, 1, 8, 1, 0, 0), + "set_items": {"4"}} + ] + ) + + # B starts after A, but C starts at the same time as B and ends before. D starts at the end time of C and ends last. + # now however there is a "dangling" interval in isolation + outputs.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 2, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1", "2", "3"}}, + {"start": datetime.datetime(2020, 1, 4, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"2", "3"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2", "4"}}, + {"start": datetime.datetime(2020, 1, 6, 1, 0, 0), "finish": datetime.datetime(2020, 1, 8, 1, 0, 0), + "set_items": {"4"}}, + {"start": datetime.datetime(2020, 1, 11, 1, 0, 0), "finish": datetime.datetime(2020, 1, 12, 1, 0, 0), + "set_items": {"5"}} + ] + ) + + # test for the ability to handle more than one duplicate interval in the input data + outputs.append( + [ + {'start': 1477875126, 'finish': 1477901090, 'set_items': {'1'}}, + {'start': 1477901090, 'finish': 1477915725, 'set_items': {'1', '2'}}, + {'start': 1477915725, 'finish': 1477920079, 'set_items': {'1', '2', '3'}}, + {'start': 1477920079, 'finish': 1477938541, 'set_items': {'2', '3'}}, + {'start': 1477938541, 'finish': 1477939605, 'set_items': {'3'}}, + {'start': 1477939605, 'finish': 1477961500, 'set_items': {'3', '4'}}, + {'start': 1477961500, 'finish': 1477977748, 'set_items': {'3', '4', '5'}}, + {'start': 1477977748, 'finish': 1477987473, 'set_items': {'3', '5'}}, + {'start': 1477987473, 'finish': 1478006402, 'set_items': {'5'}} + ] + ) + + + return outputs + + +@pytest.fixture() +def interval_inputs_integers() -> list: + """ + This fixture generates a set of intervals with integers as indices. + :return: a list of intervals to use for testing. + """ + + # create a list to store various interval types + intervals = list() + + # B starts and ends after A + intervals.append( + [ + {"start": 1, "finish": 4, "set_items": {"1"}}, + {"start": 2, "finish": 6, "set_items": {"2"}} + ] + ) + + # B starts after A but ends at the same time as A + intervals.append( + [ + {"start": 1, "finish": 4, "set_items": {"1"}}, + {"start": 2, "finish": 4, "set_items": {"2"}} + ] + ) + + # B starts after A and ends before A + intervals.append( + [ + {"start": 1, "finish": 4, "set_items": {"1"}}, + {"start": 2, "finish": 3, "set_items": {"2"}} + ] + ) + + return intervals + + +@pytest.fixture() +def interval_outputs_integers() -> list: + """ + This fixture generates a set of correct outputs against intervals with integers as indices. + :return: a list of intervals which contains the correct output for the integer-indexed intervals. + """ + + # create a list to store the various outputs + outputs = list() + + # B starts and ends after A + outputs.append( + [ + {'start': 1, 'finish': 2, 'set_items': {'1'}}, + {'start': 2, 'finish': 4, 'set_items': {'1', '2'}}, + {'start': 4, 'finish': 6, 'set_items': {'2'}} + ] + ) + + # B starts after A but ends at the same time as A + outputs.append( + [ + {'start': 1, 'finish': 2, 'set_items': {'1'}}, + {'start': 2, 'finish': 4, 'set_items': {'2', '1'}} + ] + ) + + # B starts after A and ends before A + outputs.append( + [ + {'start': 1, 'finish': 2, 'set_items': {'1'}}, + {'start': 2, 'finish': 3, 'set_items': {'2', '1'}}, + {'start': 3, 'finish': 4, 'set_items': {'1'}} + ] + ) + + return outputs + + +@pytest.fixture() +def interval_inputs_strings() -> list: + """ + This fixture generates a set of intervals with strings as indices. + :return: a list of intervals to use for testing. + """ + + # create a list to store various interval types + intervals = list() + + # B starts and ends after A + intervals.append( + [ + {"start": "A", "finish": "D", "set_items": {"1"}}, + {"start": "B", "finish": "F", "set_items": {"2"}} + ] + ) + + # B starts after A but ends at the same time as A + intervals.append( + [ + {"start": "A", "finish": "D", "set_items": {"1"}}, + {"start": "B", "finish": "D", "set_items": {"2"}} + ] + ) + + # B starts after A and ends before A + intervals.append( + [ + {"start": "A", "finish": "D", "set_items": {"1"}}, + {"start": "B", "finish": "C", "set_items": {"2"}} + ] + ) + + return intervals + + +@pytest.fixture() +def interval_outputs_strings() -> list: + """ + This fixture generates a set of correct outputs against intervals with strings as indices. + :return: a list of intervals which contains the correct output for the string-indexed intervals. + """ + + # create a list to store the various outputs + outputs = list() + + # B starts and ends after A + outputs.append( + [ + {'start': "A", 'finish': "B", 'set_items': {'1'}}, + {'start': "B", 'finish': "D", 'set_items': {'1', '2'}}, + {'start': "D", 'finish': "F", 'set_items': {'2'}} + ] + ) + + # B starts after A but ends at the same time as A + outputs.append( + [ + {'start': "A", 'finish': "B", 'set_items': {'1'}}, + {'start': "B", 'finish': "D", 'set_items': {'2', '1'}} + ] + ) + + # B starts after A and ends before A + outputs.append( + [ + {'start': "A", 'finish': "B", 'set_items': {'1'}}, + {'start': "B", 'finish': "C", 'set_items': {'2', '1'}}, + {'start': "C", 'finish': "D", 'set_items': {'1'}} + ] + ) + + return outputs + + +# tests +def test_interval_types(interval_inputs, interval_outputs) -> None: + """ + For each of the available input types, tests whether the output is correct. + :param interval_inputs: A set of all types of interval overlaps. + :param interval_outputs: A set of outputs for all types of interval overlaps. + :return: None + """ + + for output in interval_outputs: + for interval in output: + interval["set_items"] = set(sorted(interval["set_items"], reverse=True)) + + for i, o in zip(interval_inputs, interval_outputs): + out = sorted(Merge.union(i), key=itemgetter('start', 'finish')) + assert out == sorted(o, key=itemgetter('start', 'finish')) + + +def test_complex_interval_types(complex_interval_inputs, complex_interval_outputs) -> None: + """ + For each of the available input types, tests whether the output is correct. + :param complex_interval_inputs: A set of complex interval overlaps. + :param complex_interval_outputs: A set of outputs for all complex interval overlaps. + :return: None + """ + + for i, o in zip(complex_interval_inputs, complex_interval_outputs): + out = sorted(Merge.union(i), key=itemgetter('start', 'finish')) + assert len(out) == len(o) + + o = sorted(o, key=itemgetter('start', 'finish')) + assert out == o + for out, expected in zip(out, o): + assert out == expected + + +def test_interval_integers(interval_inputs_integers, interval_outputs_integers) -> None: + """ + For each of the available input types, tests whether the output is correct. + :param interval_inputs_integers: A set of all types of interval overlaps. + :param interval_outputs_integers: A set of outputs for all types of interval overlaps. + :return: None + """ + + for output in interval_outputs_integers: + for interval in output: + interval["set_items"] = set(sorted(interval["set_items"], reverse=True)) + + for i, o in zip(interval_inputs_integers, interval_outputs_integers): + out = sorted(Merge.union(i), key=itemgetter('start', 'finish')) + assert out == sorted(o, key=itemgetter('start', 'finish')) + + +def test_interval_strings(interval_inputs_strings, interval_outputs_strings) -> None: + """ + For each of the available input types, tests whether the output is correct. + :param interval_inputs_strings: A set of all types of interval overlaps. + :param interval_outputs_strings: A set of outputs for all types of interval overlaps. + :return: None + """ + + for output in interval_outputs_strings: + for interval in output: + interval["set_items"] = set(sorted(interval["set_items"], reverse=True)) + + for i, o in zip(interval_inputs_strings, interval_outputs_strings): + out = sorted(Merge.union(i), key=itemgetter('start', 'finish')) + assert out == sorted(o, key=itemgetter('start', 'finish')) + + +# def test_incorrect_format(interval_inputs) -> None: +# """ +# Ensures that the correct type and number of warnings are issued when the user uses a +# list of items instead of a set, which is the correct formatting. +# :param interval_inputs: A set of all types of interval overlaps. +# :return: None +# """ + +# # convert the set to a list, which is the incorrect input format +# for i in interval_inputs: + +# for j in i: +# j["set_items"] = sorted(j["set_items"]) + +# with pytest.warns(UserWarning) as record: + +# # try to do a merge with a list +# Merge.union(i) + + +def test_alternate_attribute_key(interval_inputs) -> None: + """ + Ensures that an attribute other than "set_items" can be used as input. + :param interval_inputs: A set of all types of interval overlaps. + :return: None + """ + + # first change the attribute from "set_items" to "items" + for i in interval_inputs: + for j in i: + j["items"] = j["set_items"] + del j["set_items"] + + # ensure there's some output and that the key is correct + for i in interval_inputs: + out = Merge.union(i, key="items") + for j in out: + assert "items" in j.keys() + + +# def test_return_graph(interval_inputs) -> None: +# """ +# Ensures that a netwworkx.DiGraph object is returned with 'return_graph' is +# set to True. +# :param interval_inputs: A set of all types of interval overlaps. +# :return: None +# """ + +# for i in interval_inputs: +# out = Merge.union(i, return_graph=True) +# assert type(out).__name__ == "DiGraph" + + +def test_nx_exception() -> None: + + """ + Tests an exception which may occur in some scenarios such as one in which a "waterfall" of intervals occurs. + :return: None + """ + + # create a seed interval + seed_interval = [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}} + ] + + # let's create 10 intervals + interval_counts = list(range(0, 100, 10)) + + inputs = list() + intervals = list() + + # create 10 intervals in a sequence with the same overlap + for i in interval_counts: + intervals = seed_interval + list() + for j in range(i): + + # get the last interval + update_interval = intervals[-1] + + # add 2 hours to the start and end times + new_interval = update_interval.copy() + new_interval["start"] = update_interval["start"] + datetime.timedelta(hours=2) + new_interval["finish"] = update_interval["finish"] + datetime.timedelta(hours=2) + + # update the set items + new_interval["set_items"] = {str(j)} + + # append the interval(s) + intervals.append(new_interval) + + inputs.append(intervals) + + # make sure we run without errors and return a result + out = Merge.union(intervals=intervals) + assert out is not None diff --git a/tests/test_merge_iterative_optimized.py b/tests/test_merge_iterative_optimized.py new file mode 100644 index 0000000..eba87b8 --- /dev/null +++ b/tests/test_merge_iterative_optimized.py @@ -0,0 +1,732 @@ +# Authors: Valentino Constantinou , Asitang Mishra +# License: Apache 2.0 + +from comparisons.iterative_optimized import Merge + +import datetime +import pytest +from operator import itemgetter + + +# fixtures +@pytest.fixture() +def interval_inputs() -> list: + """ + This fixture generates a set of all possible scenarios for consideration of one interval against another. + :return: a list of intervals which contains all possible scenarios. + """ + + # create a list to store various interval types + intervals = list() + + # B starts and ends after A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts after A but ends at the same time as A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts after A and ends before A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 3, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts at the same time as A and ends after A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts at the same time as A and ends before A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 3, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts at the same time as A and ends at the same time as A also + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts wholly after A + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 7, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + return intervals + + +@pytest.fixture() +def interval_outputs() -> list: + """ + This fixture generates a set of correct outputs against the scenarios provided in the output of interval_inputs(). + :return: a list of intervals which contains the correct output for all possible scenarios. + """ + + # create a list to store the various outputs + outputs = list() + + # B starts and ends after A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'1', '2'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2'}} + ] + ) + + # B starts after A but ends at the same time as A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1'}} + ] + ) + + # B starts after A and ends before A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 3, 1, 0), + 'set_items': {'2', '1'}}, + {'start': datetime.datetime(2020, 1, 3, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'1'}} + ] + ) + + # B starts at the same time as A and ends after A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2'}} + ] + ) + + # B starts at the same time as A and ends before A + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 3, 1, 0), + 'set_items': {'1', '2'}}, + {'start': datetime.datetime(2020, 1, 3, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'1'}} + ] + ) + + # B starts at the same time as A and ends at the same time as A also + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1'}} + ] + ) + + # B starts wholly after A + outputs.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 7, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + return outputs + + +@pytest.fixture() +def complex_interval_inputs() -> list: + """ + This fixture generates a set of complex scenarios for consideration of one interval against another. + :return: a list of intervals which contains some more complex scenarios. + """ + + # create a list to store various interval types + intervals = list() + + # B starts after A, C starts at the same time as B but ends before B + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"3"}}, + ] + ) + + # B starts after A, C starts at the same time as B but ends after + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 7, 1, 0, 0), + "set_items": {"3"}} + ] + ) + + # B starts after A, C starts at the same time as B and ends at the same time as B + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"3"}} + ] + ) + + # B starts after A, C starts at the same time as B and ends at the same time as B, but A contains 2 items + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1", "A"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"3"}} + ] + ) + + # B starts after A, but C starts at the same time as B and ends before. D starts at the end time of C and ends last. + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"3"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 8, 1, 0, 0), + "set_items": {"4"}} + ] + ) + + # B starts after A, but C starts at the same time as B and ends before. D starts at the end time of C and ends last. + # now however there is a "dangling" interval in isolation + intervals.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"3"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 8, 1, 0, 0), + "set_items": {"4"}}, + {"start": datetime.datetime(2020, 1, 11, 1, 0, 0), "finish": datetime.datetime(2020, 1, 12, 1, 0, 0), + "set_items": {"5"}} + ] + ) + + # test for the ability to handle more than one duplicate interval in the input data + intervals.append( + [ + {'start': 1477875126, 'finish': 1477920079, 'set_items': {'1'}}, + {'start': 1477875126, 'finish': 1477920079, 'set_items': {'1'}}, + {'start': 1477875126, 'finish': 1477920079, 'set_items': {'1'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477901090, 'finish': 1477938541, 'set_items': {'2'}}, + {'start': 1477915725, 'finish': 1477987473, 'set_items': {'3'}}, + {'start': 1477915725, 'finish': 1477987473, 'set_items': {'3'}}, + {'start': 1477915725, 'finish': 1477987473, 'set_items': {'3'}}, + {'start': 1477915725, 'finish': 1477987473, 'set_items': {'3'}}, + {'start': 1477939605, 'finish': 1477977748, 'set_items': {'4'}}, + {'start': 1477939605, 'finish': 1477977748, 'set_items': {'4'}}, + {'start': 1477961500, 'finish': 1478006402, 'set_items': {'5'}}, + {'start': 1477961500, 'finish': 1478006402, 'set_items': {'5'}}, + {'start': 1477961500, 'finish': 1478006402, 'set_items': {'5'}} + ] + ) + + return intervals + + +@pytest.fixture() +def complex_interval_outputs() -> list: + """ + This fixture generates a set of correct outputs against the scenarios provided in the output of + complex_interval_inputs(). + :return: a list of intervals which contains the correct output for all scenarios in complex_interval_inputs(). + """ + + # create a list to store the various outputs + outputs = list() + + # B starts after A, C starts at the same time as B but ends before B + outputs.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 2, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"3", "2", "1"}}, + {"start": datetime.datetime(2020, 1, 4, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"3", "2"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2"}} + ] + ) + + # B starts after A, C starts at the same time as B but ends after + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1', '3'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2', '3'}}, + {'start': datetime.datetime(2020, 1, 6, 1, 0), 'finish': datetime.datetime(2020, 1, 7, 1, 0), + 'set_items': {'3'}} + ] + ) + + # B starts after A, C starts at the same time as B and ends at the same time as B + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '1', '3'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2', '3'}} + ] + ) + + # B starts after A, C starts at the same time as B and ends at the same time as B, but A contains 2 items + outputs.append( + [ + {'start': datetime.datetime(2020, 1, 1, 1, 0), 'finish': datetime.datetime(2020, 1, 2, 1, 0), + 'set_items': {'1', 'A'}}, + {'start': datetime.datetime(2020, 1, 2, 1, 0), 'finish': datetime.datetime(2020, 1, 4, 1, 0), + 'set_items': {'2', '3', '1', 'A'}}, + {'start': datetime.datetime(2020, 1, 4, 1, 0), 'finish': datetime.datetime(2020, 1, 6, 1, 0), + 'set_items': {'2', '3'}} + ] + ) + + # B starts after A, but C starts at the same time as B and ends before. D starts at the end time of C and ends last. + outputs.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 2, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1", "2", "3"}}, + {"start": datetime.datetime(2020, 1, 4, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"2", "3"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2", "4"}}, + {"start": datetime.datetime(2020, 1, 6, 1, 0, 0), "finish": datetime.datetime(2020, 1, 8, 1, 0, 0), + "set_items": {"4"}} + ] + ) + + # B starts after A, but C starts at the same time as B and ends before. D starts at the end time of C and ends last. + # now however there is a "dangling" interval in isolation + outputs.append( + [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 2, 1, 0, 0), + "set_items": {"1"}}, + {"start": datetime.datetime(2020, 1, 2, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1", "2", "3"}}, + {"start": datetime.datetime(2020, 1, 4, 1, 0, 0), "finish": datetime.datetime(2020, 1, 5, 1, 0, 0), + "set_items": {"2", "3"}}, + {"start": datetime.datetime(2020, 1, 5, 1, 0, 0), "finish": datetime.datetime(2020, 1, 6, 1, 0, 0), + "set_items": {"2", "4"}}, + {"start": datetime.datetime(2020, 1, 6, 1, 0, 0), "finish": datetime.datetime(2020, 1, 8, 1, 0, 0), + "set_items": {"4"}}, + {"start": datetime.datetime(2020, 1, 11, 1, 0, 0), "finish": datetime.datetime(2020, 1, 12, 1, 0, 0), + "set_items": {"5"}} + ] + ) + + # test for the ability to handle more than one duplicate interval in the input data + outputs.append( + [ + {'start': 1477875126, 'finish': 1477901090, 'set_items': {'1'}}, + {'start': 1477901090, 'finish': 1477915725, 'set_items': {'1', '2'}}, + {'start': 1477915725, 'finish': 1477920079, 'set_items': {'1', '2', '3'}}, + {'start': 1477920079, 'finish': 1477938541, 'set_items': {'2', '3'}}, + {'start': 1477938541, 'finish': 1477939605, 'set_items': {'3'}}, + {'start': 1477939605, 'finish': 1477961500, 'set_items': {'3', '4'}}, + {'start': 1477961500, 'finish': 1477977748, 'set_items': {'3', '4', '5'}}, + {'start': 1477977748, 'finish': 1477987473, 'set_items': {'3', '5'}}, + {'start': 1477987473, 'finish': 1478006402, 'set_items': {'5'}} + ] + ) + + + return outputs + + +@pytest.fixture() +def interval_inputs_integers() -> list: + """ + This fixture generates a set of intervals with integers as indices. + :return: a list of intervals to use for testing. + """ + + # create a list to store various interval types + intervals = list() + + # B starts and ends after A + intervals.append( + [ + {"start": 1, "finish": 4, "set_items": {"1"}}, + {"start": 2, "finish": 6, "set_items": {"2"}} + ] + ) + + # B starts after A but ends at the same time as A + intervals.append( + [ + {"start": 1, "finish": 4, "set_items": {"1"}}, + {"start": 2, "finish": 4, "set_items": {"2"}} + ] + ) + + # B starts after A and ends before A + intervals.append( + [ + {"start": 1, "finish": 4, "set_items": {"1"}}, + {"start": 2, "finish": 3, "set_items": {"2"}} + ] + ) + + return intervals + + +@pytest.fixture() +def interval_outputs_integers() -> list: + """ + This fixture generates a set of correct outputs against intervals with integers as indices. + :return: a list of intervals which contains the correct output for the integer-indexed intervals. + """ + + # create a list to store the various outputs + outputs = list() + + # B starts and ends after A + outputs.append( + [ + {'start': 1, 'finish': 2, 'set_items': {'1'}}, + {'start': 2, 'finish': 4, 'set_items': {'1', '2'}}, + {'start': 4, 'finish': 6, 'set_items': {'2'}} + ] + ) + + # B starts after A but ends at the same time as A + outputs.append( + [ + {'start': 1, 'finish': 2, 'set_items': {'1'}}, + {'start': 2, 'finish': 4, 'set_items': {'2', '1'}} + ] + ) + + # B starts after A and ends before A + outputs.append( + [ + {'start': 1, 'finish': 2, 'set_items': {'1'}}, + {'start': 2, 'finish': 3, 'set_items': {'2', '1'}}, + {'start': 3, 'finish': 4, 'set_items': {'1'}} + ] + ) + + return outputs + + +@pytest.fixture() +def interval_inputs_strings() -> list: + """ + This fixture generates a set of intervals with strings as indices. + :return: a list of intervals to use for testing. + """ + + # create a list to store various interval types + intervals = list() + + # B starts and ends after A + intervals.append( + [ + {"start": "A", "finish": "D", "set_items": {"1"}}, + {"start": "B", "finish": "F", "set_items": {"2"}} + ] + ) + + # B starts after A but ends at the same time as A + intervals.append( + [ + {"start": "A", "finish": "D", "set_items": {"1"}}, + {"start": "B", "finish": "D", "set_items": {"2"}} + ] + ) + + # B starts after A and ends before A + intervals.append( + [ + {"start": "A", "finish": "D", "set_items": {"1"}}, + {"start": "B", "finish": "C", "set_items": {"2"}} + ] + ) + + return intervals + + +@pytest.fixture() +def interval_outputs_strings() -> list: + """ + This fixture generates a set of correct outputs against intervals with strings as indices. + :return: a list of intervals which contains the correct output for the string-indexed intervals. + """ + + # create a list to store the various outputs + outputs = list() + + # B starts and ends after A + outputs.append( + [ + {'start': "A", 'finish': "B", 'set_items': {'1'}}, + {'start': "B", 'finish': "D", 'set_items': {'1', '2'}}, + {'start': "D", 'finish': "F", 'set_items': {'2'}} + ] + ) + + # B starts after A but ends at the same time as A + outputs.append( + [ + {'start': "A", 'finish': "B", 'set_items': {'1'}}, + {'start': "B", 'finish': "D", 'set_items': {'2', '1'}} + ] + ) + + # B starts after A and ends before A + outputs.append( + [ + {'start': "A", 'finish': "B", 'set_items': {'1'}}, + {'start': "B", 'finish': "C", 'set_items': {'2', '1'}}, + {'start': "C", 'finish': "D", 'set_items': {'1'}} + ] + ) + + return outputs + + +# tests +def test_interval_types(interval_inputs, interval_outputs) -> None: + """ + For each of the available input types, tests whether the output is correct. + :param interval_inputs: A set of all types of interval overlaps. + :param interval_outputs: A set of outputs for all types of interval overlaps. + :return: None + """ + + for output in interval_outputs: + for interval in output: + interval["set_items"] = set(sorted(interval["set_items"], reverse=True)) + + for i, o in zip(interval_inputs, interval_outputs): + out = sorted(Merge.union(i), key=itemgetter('start', 'finish')) + assert out == sorted(o, key=itemgetter('start', 'finish')) + + +def test_complex_interval_types(complex_interval_inputs, complex_interval_outputs) -> None: + """ + For each of the available input types, tests whether the output is correct. + :param complex_interval_inputs: A set of complex interval overlaps. + :param complex_interval_outputs: A set of outputs for all complex interval overlaps. + :return: None + """ + + for i, o in zip(complex_interval_inputs, complex_interval_outputs): + out = sorted(Merge.union(i), key=itemgetter('start', 'finish')) + assert len(out) == len(o) + + o = sorted(o, key=itemgetter('start', 'finish')) + assert out == o + for out, expected in zip(out, o): + assert out == expected + + +def test_interval_integers(interval_inputs_integers, interval_outputs_integers) -> None: + """ + For each of the available input types, tests whether the output is correct. + :param interval_inputs_integers: A set of all types of interval overlaps. + :param interval_outputs_integers: A set of outputs for all types of interval overlaps. + :return: None + """ + + for output in interval_outputs_integers: + for interval in output: + interval["set_items"] = set(sorted(interval["set_items"], reverse=True)) + + for i, o in zip(interval_inputs_integers, interval_outputs_integers): + out = sorted(Merge.union(i), key=itemgetter('start', 'finish')) + assert out == sorted(o, key=itemgetter('start', 'finish')) + + +def test_interval_strings(interval_inputs_strings, interval_outputs_strings) -> None: + """ + For each of the available input types, tests whether the output is correct. + :param interval_inputs_strings: A set of all types of interval overlaps. + :param interval_outputs_strings: A set of outputs for all types of interval overlaps. + :return: None + """ + + for output in interval_outputs_strings: + for interval in output: + interval["set_items"] = set(sorted(interval["set_items"], reverse=True)) + + for i, o in zip(interval_inputs_strings, interval_outputs_strings): + out = sorted(Merge.union(i), key=itemgetter('start', 'finish')) + assert out == sorted(o, key=itemgetter('start', 'finish')) + + +# def test_incorrect_format(interval_inputs) -> None: +# """ +# Ensures that the correct type and number of warnings are issued when the user uses a +# list of items instead of a set, which is the correct formatting. +# :param interval_inputs: A set of all types of interval overlaps. +# :return: None +# """ + +# # convert the set to a list, which is the incorrect input format +# for i in interval_inputs: + +# for j in i: +# j["set_items"] = sorted(j["set_items"]) + +# with pytest.warns(UserWarning) as record: + +# # try to do a merge with a list +# Merge.union(i) + + +def test_alternate_attribute_key(interval_inputs) -> None: + """ + Ensures that an attribute other than "set_items" can be used as input. + :param interval_inputs: A set of all types of interval overlaps. + :return: None + """ + + # first change the attribute from "set_items" to "items" + for i in interval_inputs: + for j in i: + j["items"] = j["set_items"] + del j["set_items"] + + # ensure there's some output and that the key is correct + for i in interval_inputs: + out = Merge.union(i, key="items") + for j in out: + assert "items" in j.keys() + + +# def test_return_graph(interval_inputs) -> None: +# """ +# Ensures that a netwworkx.DiGraph object is returned with 'return_graph' is +# set to True. +# :param interval_inputs: A set of all types of interval overlaps. +# :return: None +# """ + +# for i in interval_inputs: +# out = Merge.union(i, return_graph=True) +# assert type(out).__name__ == "DiGraph" + + +def test_nx_exception() -> None: + + """ + Tests an exception which may occur in some scenarios such as one in which a "waterfall" of intervals occurs. + :return: None + """ + + # create a seed interval + seed_interval = [ + {"start": datetime.datetime(2020, 1, 1, 1, 0, 0), "finish": datetime.datetime(2020, 1, 4, 1, 0, 0), + "set_items": {"1"}} + ] + + # let's create 10 intervals + interval_counts = list(range(0, 100, 10)) + + inputs = list() + intervals = list() + + # create 10 intervals in a sequence with the same overlap + for i in interval_counts: + intervals = seed_interval + list() + for j in range(i): + + # get the last interval + update_interval = intervals[-1] + + # add 2 hours to the start and end times + new_interval = update_interval.copy() + new_interval["start"] = update_interval["start"] + datetime.timedelta(hours=2) + new_interval["finish"] = update_interval["finish"] + datetime.timedelta(hours=2) + + # update the set items + new_interval["set_items"] = {str(j)} + + # append the interval(s) + intervals.append(new_interval) + + inputs.append(intervals) + + # make sure we run without errors and return a result + out = Merge.union(intervals=intervals) + assert out is not None