@@ -42,37 +42,37 @@ def preprocess_function(examples):
42
42
43
43
if __name__ == "__main__" :
44
44
#data_path = "data/prot_total/prot_total.txt"
45
- # path = "data/prot_minimal/prot_minimal.txt"
46
- # main(data_path)
47
-
48
-
49
-
50
- paths = ["/mnt/home/mheinzinger/deepppi1tb/ProSST5/martins_set/data_mixed/train.csv" ,
51
- "/mnt/home/mheinzinger/deepppi1tb/ProSST5/martins_set/data_mixed/test.csv" ,
52
- "/mnt/home/mheinzinger/deepppi1tb/ProSST5/martins_set/data_mixed/val.csv" ]
53
-
54
- print ("saved paths" )
55
-
56
-
57
-
58
-
59
-
60
- print ("load datasets as raw" )
61
- raw_datasets = datasets .load_dataset ("csv" , data_files = paths )
62
-
63
- print ("process datasets" )
64
- print (type (raw_datasets ))
65
- print (len (raw_datasets ))
66
- processed_datasets = raw_datasets .map (
67
- preprocess_function ,
68
- batched = True ,
69
- num_proc = 1 ,
70
- remove_columns = [],
71
- load_from_cache_file = True ,
72
- desc = "Running tokenizer on dataset" ,
73
- )
74
-
75
- print (raw_datasets )
45
+ path = "data/prot_minimal/prot_minimal.txt"
46
+ main (data_path )
47
+
48
+
49
+
50
+ # paths = ["/mnt/home/mheinzinger/deepppi1tb/ProSST5/martins_set/data_mixed/train.csv",
51
+ # "/mnt/home/mheinzinger/deepppi1tb/ProSST5/martins_set/data_mixed/test.csv",
52
+ # "/mnt/home/mheinzinger/deepppi1tb/ProSST5/martins_set/data_mixed/val.csv"]
53
+ #
54
+ # print("saved paths")
55
+ #
56
+ #
57
+ #
58
+ #
59
+ #
60
+ # print("load datasets as raw")
61
+ # raw_datasets = datasets.load_dataset("csv", data_files=paths)
62
+ #
63
+ # print("process datasets")
64
+ # print(type(raw_datasets))
65
+ # print(len(raw_datasets))
66
+ # processed_datasets = raw_datasets.map(
67
+ # preprocess_function,
68
+ # batched=True,
69
+ # num_proc=1,
70
+ # remove_columns=[],
71
+ # load_from_cache_file=True,
72
+ # desc="Running tokenizer on dataset",
73
+ # )
74
+ #
75
+ # print(raw_datasets)
76
76
77
77
78
78
0 commit comments