In [1]:
import re

# 测试文件名
test_filename = "InRhO3(mp-1203220).csv"

# 匹配化学式和mp-id
match = re.search(r'([a-zA-Z0-9]+)\(mp-\d+\)', test_filename)
if match:
    formula = match.group(1)
    mp_id = re.search(r'mp-(\d+)', test_filename).group(1)
    print(f"Formula: {formula}, mp-id: {mp_id}")
else:
    print(f"Invalid file name format: {test_filename}")

Formula: InRhO3, mp-id: 1203220


In [2]:
import os
import pandas as pd


csv_folder_path = '/coordination number change cubic/'
external_csv_path = 'Ionic dielectric polarizabilityy.csv'
output_folder = '/Ionic radium/'


if not os.path.exists(output_folder):
    os.makedirs(output_folder)


external_df = pd.read_csv(external_csv_path)


for csv_file in os.listdir(csv_folder_path):
    if csv_file.endswith('.csv'):
        file_path = os.path.join(csv_folder_path, csv_file)
        df = pd.read_csv(file_path)

        all_matched = True

        df['IR'] = None


        for index, row in df.iterrows():
            element = row['ion']
            v = row['V']
            cn = row['cn']


            match = external_df[(external_df['Element'] == element) & (external_df['V'] == v) & (external_df['CN'] == cn)]
            if not match.empty:
                df.at[index, 'IR'] = match['IR'].values[0]
            else:
                all_matched = False
                break


        if all_matched:
            output_file = os.path.join(output_folder, csv_file)
            df.to_csv(output_file, index=False)
            print(f"Saved new CSV file for {csv_file} to {output_file}")

print("Processing complete.")

Saved new CSV file for Ba3ZrIr2O9(mp-1214509).csv to /Users/yangmingyue/Desktop/ABO365/模型应用新新/配位数化合价离子半径/Ba3ZrIr2O9(mp-1214509).csv
Saved new CSV file for Ba2SmWO6(mp-1517191).csv to /Users/yangmingyue/Desktop/ABO365/模型应用新新/配位数化合价离子半径/Ba2SmWO6(mp-1517191).csv
Saved new CSV file for InRhO3(mp-1203220).csv to /Users/yangmingyue/Desktop/ABO365/模型应用新新/配位数化合价离子半径/InRhO3(mp-1203220).csv
Saved new CSV file for Mg2SbWO6(mp-1047279).csv to /Users/yangmingyue/Desktop/ABO365/模型应用新新/配位数化合价离子半径/Mg2SbWO6(mp-1047279).csv
Saved new CSV file for TbVO3(mp-1187389).csv to /Users/yangmingyue/Desktop/ABO365/模型应用新新/配位数化合价离子半径/TbVO3(mp-1187389).csv
Saved new CSV file for Ba2ScTaO6(mp-1205823).csv to /Users/yangmingyue/Desktop/ABO365/模型应用新新/配位数化合价离子半径/Ba2ScTaO6(mp-1205823).csv
Saved new CSV file for Sr2UInO6(mp-1206007).csv to /Users/yangmingyue/Desktop/ABO365/模型应用新新/配位数化合价离子半径/Sr2UInO6(mp-1206007).csv
Saved new CSV file for MnZnO3(mp-772528).csv to /Users/yangmingyue/Desktop/ABO365/模型应用新新/配位数化合价离子半径/MnZnO3(m

In [5]:
import pandas as pd
import os
import math

# --- Configuration starts ---
input_folder = '/ionic radium/'  # Path to the input folder containing CSV files
output_folder = '/ionic Vm/' # Path to the output folder for processed files
radius_column_name = 'IR'                # Column name for ionic radius in the input CSV
ion_column_name = 'ion'                  # Column name for the ion identifier in the input CSV
# --- Configuration ends ---

def calculate_and_average_ionic_volumes(input_filepath, output_filepath):
    """
    Reads a CSV file, calculates ionic volumes from radii,
    averages them per ion type, and saves the result.
    """
    try:
        df = pd.read_csv(input_filepath)
        print(f"Processing file: {input_filepath}")

        # Check if necessary columns exist
        if radius_column_name not in df.columns:
            print(f"Error: Radius column '{radius_column_name}' not found in file {input_filepath}. Skipping this file.")
            return
        if ion_column_name not in df.columns:
            print(f"Error: Ion identifier column '{ion_column_name}' not found in file {input_filepath}. Skipping this file.")
            return

        # Convert radius column to numeric, coercing errors to NaN
        df[radius_column_name] = pd.to_numeric(df[radius_column_name], errors='coerce')
        
        # Handle rows with non-numeric or missing radii
        original_rows = len(df)
        df.dropna(subset=[radius_column_name], inplace=True)
        if len(df) < original_rows:
            print(f"Warning: Removed {original_rows - len(df)} rows from file {input_filepath} because the '{radius_column_name}' column contained non-numeric or null values.")

        if df.empty:
            print(f"Warning: File {input_filepath} is empty after removing invalid radii. Skipping this file.")
            return

        # Calculate ionic volume: V = (4/3) * pi * r^3
        df['ionic_volume'] = (4/3) * math.pi * (df[radius_column_name] ** 3)

        # Group by ion type and calculate the mean ionic volume
        average_volumes_df = df.groupby(ion_column_name, as_index=False)['ionic_volume'].mean()

        # Rename the resulting column for clarity
        average_volumes_df.rename(columns={'ionic_volume': 'average_ionic_volume'}, inplace=True)

        # Save the result to a new CSV file
        average_volumes_df.to_csv(output_filepath, index=False, encoding='utf-8-sig') # utf-8-sig ensures correct display in Excel, especially with non-ASCII characters if any were present in ion names
        print(f"Processed and saved results to: {output_filepath}")

    except FileNotFoundError:
        print(f"Error: Input file {input_filepath} not found.")
    except pd.errors.EmptyDataError:
        print(f"Error: Input file {input_filepath} is empty.")
    except Exception as e:
        print(f"An unexpected error occurred while processing file {input_filepath}: {e}")

def main():
    """
    Main function to orchestrate the processing of CSV files.
    """
    # Check if input folder exists
    if not os.path.isdir(input_folder):
        print(f"Error: Input folder '{input_folder}' does not exist. Please check the path.")
        return

    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created output folder: {output_folder}")

    # Get a list of CSV files in the input folder
    csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

    if not csv_files:
        print(f"No CSV files found in folder '{input_folder}'.")
        return

    print(f"Found {len(csv_files)} CSV files to process.")

    # Process each CSV file
    for filename in csv_files:
        input_filepath = os.path.join(input_folder, filename)

        # Construct output filename
        base, ext = os.path.splitext(filename)
        output_filename = f"{base}_avg_volume{ext}" # Appending suffix to distinguish output
        output_filepath = os.path.join(output_folder, output_filename)

        calculate_and_average_ionic_volumes(input_filepath, output_filepath)

    print("\nAll files processed.")

if __name__ == "__main__":
    main()

找到 3097 个CSV文件待处理。
正在处理文件: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子半径/Ba3ZrIr2O9(mp-1214509).csv
已处理并保存结果到: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子体积/Ba3ZrIr2O9(mp-1214509)_avg_volume.csv
正在处理文件: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子半径/Ba2SmWO6(mp-1517191).csv
已处理并保存结果到: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子体积/Ba2SmWO6(mp-1517191)_avg_volume.csv
正在处理文件: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子半径/InRhO3(mp-1203220).csv
已处理并保存结果到: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子体积/InRhO3(mp-1203220)_avg_volume.csv
正在处理文件: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子半径/Mg2SbWO6(mp-1047279).csv
已处理并保存结果到: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子体积/Mg2SbWO6(mp-1047279)_avg_volume.csv
正在处理文件: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子半径/TbVO3(mp-1187389).csv
已处理并保存结果到: /Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子体积/TbVO3(mp-1187389)_avg_volume.csv
正在处理文件: /Users/yangmingyue/Desktop/AB

In [6]:
import os
import pandas as pd
import re


input_folder = "/Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子体积/"
output_file = "/Users/yangmingyue/Desktop/ABO365/模型应用新新/总离子体积.csv"


results = []


for filename in os.listdir(input_folder):
    if filename.endswith('_avg_volume.csv'):

        match = re.match(r'([^(]+)\(([^)]+)\)', os.path.splitext(filename)[0])
        if match:
            formula = match.group(1)
            mp = match.group(2)
            formula_mp = f"{formula}({mp})"
        else:
            continue


        file_path = os.path.join(input_folder, filename)


        df = pd.read_csv(file_path)


        ions = re.findall(r'([A-Z][a-z]*)(\d*)', formula)
        ion_dict = {ion: int(coeff) if coeff else 1 for ion, coeff in ions}


        total_volume = 0
        for _, row in df.iterrows():
            ion = row['ion']
            volume = row['average_ionic_volume']
            if ion in ion_dict:
                total_volume += volume * ion_dict[ion]


        results.append({'formula_mp': formula_mp, 'total_ionic_volume': total_volume})


result_df = pd.DataFrame(results)
result_df.to_csv(output_file, index=False)
print(f"over {output_file}")

处理完成，结果已保存到 /Users/yangmingyue/Desktop/ABO365/模型应用新新/总离子体积.csv


In [8]:
import os
import pandas as pd
import re


input_folder = "/Users/yangmingyue/Desktop/ABO365/模型应用新新/修改后配位数化合价离子体积/"
output_file = "/Users/yangmingyue/Desktop/ABO365/模型应用新新/总离子体积处理后.csv"


results = []


for filename in os.listdir(input_folder):
    if filename.endswith('_avg_volume.csv'):

        match = re.match(r'([^(]+)\(([^)]+)\)', os.path.splitext(filename)[0])
        if match:
            formula = match.group(1)
            mp = match.group(2)
            formula_mp = f"{formula}({mp})"
        else:
            continue


        file_path = os.path.join(input_folder, filename)


        df = pd.read_csv(file_path)


        ions = re.findall(r'([A-Z][a-z]*)(\d*)', formula)
        ion_dict = {ion: int(coeff) if coeff else 1 for ion, coeff in ions}


        o_match = re.search(r'O(\d+)(\))?', formula)
        if o_match:
            o_coeff = int(o_match.group(1))
            if o_match.group(2): 
                print(f"Skipping {formula_mp} due to 'O{coeff})'")
                continue
            divisor = o_coeff // 3  
        else:
            divisor = 1  


        total_volume = 0
        for _, row in df.iterrows():
            ion = row['ion']
            volume = row['average_ionic_volume']
            if ion in ion_dict:
                total_volume += volume * ion_dict[ion]


        adjusted_volume = total_volume / divisor


        results.append({'formula_mp': formula_mp, 'total_ionic_volume': adjusted_volume})


result_df = pd.DataFrame(results)
result_df.to_csv(output_file, index=False)
print(f"over {output_file}")

处理完成，结果已保存到 /Users/yangmingyue/Desktop/ABO365/模型应用新新/总离子体积处理后.csv
