In [1]:
import sys
import os

# from src.puzzle_creators import Direction

SCRIPT_DIR = os.path.dirname(os.path.abspath(os.getcwd()+"/notebooks"))
sys.path.append(os.path.dirname(SCRIPT_DIR))

In [3]:
import pandas as pd 
import matplotlib.pyplot as plt
import glob
import os
from functools import reduce
from pathlib import Path
from notebooks.notebooks_utils import n_convex_hull,n_interior_points

In [9]:
samples_path = os.path.join(Path().resolve().parent,"data","puzzles")
samples = list(filter(lambda exa: "frame-" not in exa ,
                 [ex for ex in glob.glob(f"{samples_path}/*")]))
samples = sorted(samples,key=lambda ex: n_convex_hull(ex)+n_interior_points(ex)*1.01)
print(samples[:5])
print("number of samples is " +str(len(samples)))

['C:\\Users\\yaniv\\Desktop\\MSCBenGurion\\iCVL\\rgons\\data\\puzzles\\convex_hull-5-int-0-107', 'C:\\Users\\yaniv\\Desktop\\MSCBenGurion\\iCVL\\rgons\\data\\puzzles\\convex_hull-5-int-0-1826', 'C:\\Users\\yaniv\\Desktop\\MSCBenGurion\\iCVL\\rgons\\data\\puzzles\\convex_hull-5-int-0-2039', 'C:\\Users\\yaniv\\Desktop\\MSCBenGurion\\iCVL\\rgons\\data\\puzzles\\convex_hull-5-int-0-3371', 'C:\\Users\\yaniv\\Desktop\\MSCBenGurion\\iCVL\\rgons\\data\\puzzles\\convex_hull-5-int-0-367']
number of samples is 95


In [10]:
counts = []
samples_names = []
puzzles_names = []
dfs = []
for sample_dir in samples:
    for puzzle_csv in glob.glob(f"{sample_dir}/results/*.csv"):
        samples_names.append(sample_dir.split("\\")[-1])
        puzzles_names.append(puzzle_csv.split("\\")[-1])
        df_puzzle_polygons = pd.read_csv(puzzle_csv,index_col=False)
        df_puzzle_polygons = df_puzzle_polygons.groupby("id").size().reset_index(name="polygon_type")
        # print(df_puzzle_polygons.head(20))
        # print(df_puzzle_polygons.columns)
        df = pd.DataFrame(df_puzzle_polygons.value_counts(["polygon_type"])).reset_index()
        df.columns = ["polygon_type","count"]
        # print(df.head())
        # print(df.columns)
        df = df.set_index("polygon_type")
        df = df.T#.reset_index(drop=True)
        df.columns = [f"polygon_{col}" for col in df.columns]
        df = df.reset_index(drop=True)
        dfs.append(df)
        # print(df.head())
        # print(df.columns)
        
all_puzzles = pd.concat(dfs).fillna(0)
all_puzzles["name"] = puzzles_names
all_puzzles["sample"] = samples_names
all_puzzles["n_convex_hull"] = all_puzzles["sample"].transform(n_convex_hull)
all_puzzles["n_interior"]  = all_puzzles["sample"].transform(n_interior_points)
all_puzzles = all_puzzles.reset_index(drop=True)
# cols = list(all_puzzles.columns)
# all_puzzles = all_puzzles[cols[-4:]+cols[:-4]]
print(all_puzzles.head(5))
print(len(all_puzzles))

   polygon_3  polygon_4  polygon_5  polygon_6            name  \
0        3.0        0.0        0.0        0.0  1-9_1-3_s_.csv   
1        3.0        0.0        0.0        0.0    1-9_2-3_.csv   
2        1.0        1.0        0.0        0.0    1-9_3-3_.csv   
3        1.0        1.0        0.0        0.0      2-9_s_.csv   
4        0.0        0.0        1.0        0.0        3-9_.csv   

                    sample  n_convex_hull  n_interior  
0  convex_hull-5-int-0-107              5           0  
1  convex_hull-5-int-0-107              5           0  
2  convex_hull-5-int-0-107              5           0  
3  convex_hull-5-int-0-107              5           0  
4  convex_hull-5-int-0-107              5           0  
35205


In [11]:
df_samples = all_puzzles[["sample","n_convex_hull","n_interior"]].groupby(["sample","n_convex_hull","n_interior"]).size().reset_index(name="n_puzzles")
df_samples.head(10)
print(df_samples.groupby(["n_convex_hull","n_interior"])["n_puzzles"].size().reset_index(name="n_samples"))


    n_convex_hull  n_interior  n_samples
0               3           2          7
1               3           3          1
2               4           1         24
3               4           2          2
4               4           3          2
5               4           4          1
6               4           5          2
7               5           0         19
8               5           1          6
9               5           2          7
10              5           3          3
11              5           4          2
12              6           0          1
13              6           1          1
14              6           2          6
15              6           3          3
16              7           2          2
17              8           1          1
18           2000           8          1
19           2000          12          1
20           2000          15          1
21          10000          16          1
    n_convex_hull  n_interior  mean_n_puzzles
0          

In [13]:
print(df_samples.groupby(["n_convex_hull","n_interior"])["n_puzzles"].mean().reset_index(name="mean_n_puzzles"))


    n_convex_hull  n_interior  mean_n_puzzles
0               3           2        3.000000
1               3           3       27.000000
2               4           1        5.000000
3               4           2       17.000000
4               4           3       71.000000
5               4           4      195.000000
6               4           5     1291.000000
7               5           0       11.000000
8               5           1       23.666667
9               5           2       98.571429
10              5           3      533.000000
11              5           4     2363.000000
12              6           0       43.000000
13              6           1      105.000000
14              6           2      540.666667
15              6           3     2681.333333
16              7           2     3384.000000
17              8           1     3253.000000
18           2000           8       16.000000
19           2000          12       14.000000
20           2000          15     

In [12]:
print(df_samples.groupby(["n_convex_hull","n_interior"])["n_puzzles"].std().reset_index(name="std_n_puzzles"))

    n_convex_hull  n_interior  std_n_puzzles
0               3           2       0.000000
1               3           3            NaN
2               4           1       0.000000
3               4           2       0.000000
4               4           3       5.656854
5               4           4            NaN
6               4           5     551.543289
7               5           0       0.000000
8               5           1       1.632993
9               5           2      24.575443
10              5           3     355.595276
11              5           4    2008.183259
12              6           0            NaN
13              6           1            NaN
14              6           2     151.099526
15              6           3    1473.679861
16              7           2     287.085353
17              8           1            NaN
18           2000           8            NaN
19           2000          12            NaN
20           2000          15            NaN
21        