In [1]:
def get_unique_ids(file_path):
    """
    Reads a file and returns a set of unique IDs.
    
    Args:
        file_path (str): Path to the file containing unique IDs.
    
    Returns:
        set: A set of unique IDs.
    """
    with open(file_path, 'r') as file:
        # Read lines, strip whitespace, and store them in a set
        return set(line.strip() for line in file)

def find_overlap_ids(file_a, file_b):
    """
    Finds overlapping unique IDs between two files.
    
    Args:
        file_a (str): Path to the first file (A-unique-id.txt).
        file_b (str): Path to the second file (B-unique-id.txt).
    
    Returns:
        set: A set of overlapping unique IDs.
    """
    ids_a = get_unique_ids(file_a)
    ids_b = get_unique_ids(file_b)
    
    # Find intersection (overlap) between the two sets
    overlap_ids = ids_a.intersection(ids_b)
    
    return overlap_ids

# Example usage
file_a_path = 'raw_fault/fault-'
file_b_path = 'raw_seismic'

overlapping_ids = find_overlap_ids(file_a_path, file_b_path)

# Output the results
if overlapping_ids:
    print(f"Found {len(overlapping_ids)} overlapping IDs:")
    for uid in overlapping_ids:
        print(uid)
else:
    print("No overlapping IDs found.")


FileNotFoundError: [Errno 2] No such file or directory: 'raw_fault/fault-'

In [15]:
import os
import re

list_a = list()
list_b = list()

def extract_numbers_from_filenames(folder_path):
    """
    Extracts numbers from the names of files in the specified folder.
    
    Args:
        folder_path (str): Path to the folder containing the files.
    
    Returns:
        dict: A dictionary where the keys are filenames and the values are lists of extracted numbers.
    """
    # Regular expression to match numbers (including integers and decimals)
    number_pattern = re.compile(r'\d+')

    # Dictionary to store numbers extracted from each file name
    extracted_numbers = {}

    # Loop through all files in the folder
    for filename in os.listdir(folder_path):
        # Extract numbers from the filename
        numbers = number_pattern.findall(filename)
        
        if numbers:
            # Convert the extracted numbers from strings to integers
            extracted_numbers[filename] = [int(num) for num in numbers]

    return extracted_numbers


# Example usage
folder_path = 'raw_fault'

result = extract_numbers_from_filenames(folder_path)

# Output the results
if result:
    for filename, numbers in result.items():
        list_a.append(numbers[0])
        # print(f"File: {filename} | Extracted Numbers: {numbers}")
else:
    print("No numbers found in any file names.")


# Example usage
folder_path = 'raw_seismic'

result = extract_numbers_from_filenames(folder_path)

# Output the results
if result:
    for filename, numbers in result.items():
        list_b.append(numbers[0])
        # print(f"File: {filename} | Extracted Numbers: {numbers}")
else:
    print("No numbers found in any file names.")

In [16]:
def find_intersection(list_a, list_b):
    """
    Finds intersecting numbers between two lists.
    
    Args:
        list_a (list): First list of numbers.
        list_b (list): Second list of numbers.
    
    Returns:
        list: A sorted list of intersecting numbers.
    """
    # Convert lists to sets and find the intersection
    intersection = set(list_a).intersection(set(list_b))
    
    # Convert the result back to a sorted list
    return sorted(intersection)

intersecting_numbers = find_intersection(list_a, list_b)

# Output the results
if intersecting_numbers:
    print(f"Found {len(intersecting_numbers)} intersecting numbers:")
    print(intersecting_numbers)
else:
    print("No intersecting numbers found.")


Found 362 intersecting numbers:
[1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1