In [None]:
import re
import pandas as pd
from collections import defaultdict

# Function to extract data from the structure info file and group by space group
def extract_structure_data(file_path, functional_name, grouped_data):
    with open(file_path, 'r') as file:
        data = file.read()
    
    # Split data by separators
    phases = data.split('-----------------------------------------')
    
    for phase in phases:
        # Extract space group, lattice constants, and angles
        space_group_match = re.search(r'International:\s*(\S+)', phase)
        lattice_constants_match = re.search(r'Lattice Constants:\s*([\d\.]+\s+[\d\.]+\s+[\d\.]+)', phase)
        lattice_angles_match = re.search(r'Lattice Angles:\s*([\d\.]+\s+[\d\.]+\s+[\d\.]+)', phase)
        
        if space_group_match and lattice_constants_match and lattice_angles_match:
            space_group = space_group_match.group(1)
            lattice_constants = list(map(float, lattice_constants_match.group(1).split()))
            lattice_angles = list(map(float, lattice_angles_match.group(1).split()))
            
            # Store the data in the grouped_data dictionary
            if space_group not in grouped_data:
                grouped_data[space_group] = {'PBE': [], 'PBESol': [], 'HSE': []}
            
            # Store the lattice constants and angles in the corresponding functional category
            grouped_data[space_group][functional_name] = lattice_constants + lattice_angles

# Function to save the grouped data into a CSV file
def save_to_csv(file_paths, output_csv):
    grouped_data = defaultdict(lambda: {'PBE': [], 'PBESol': [], 'HSE': []})
    
    # Extract data from each file
    for file_path, functional_name in file_paths:
        extract_structure_data(file_path, functional_name, grouped_data)
    
    # Prepare the data for the CSV file
    all_data = []
    headers = ["Space Group", 
               "PBE a", "PBE b", "PBE c", "PBE alpha", "PBE beta", "PBE gamma", 
               "PBESol a", "PBESol b", "PBESol c", "PBESol alpha", "PBESol beta", "PBESol gamma", 
               "HSE a", "HSE b", "HSE c", "HSE alpha", "HSE beta", "HSE gamma"]
    
    # Organize data for each space group
    for space_group, data in grouped_data.items():
        row = [space_group]
        
        for functional in ['PBE', 'PBESol', 'HSE']:
            if data[functional]:
                row.extend(data[functional])
            else:
                # If no data for a particular functional, fill with NaN
                row.extend([float('nan')] * 6)  # 6 columns for lattice constants and angles
        
        all_data.append(row)
    
    # Save to CSV
    df = pd.DataFrame(all_data, columns=headers)
    df.to_csv(output_csv, index=False)

# File paths and corresponding functional names
file_paths = [
    ('HSE_structure_info.txt', 'HSE'),
    ('PBE_structure_info.txt', 'PBE'),
    ('PBESol_structure_info.txt', 'PBESol')
]

# Output CSV file
output_csv = 'structure_data_grouped.csv'
save_to_csv(file_paths, output_csv)
