In [1]:
from ace.utils import prep_general_material_data, read_file

In [2]:
df = read_file("../data/system_1/PRE_MARA.csv", "CSV", {"header": "true"})

In [5]:
# Pyspark libraries
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import DataFrame

from ace.utils import enforce_schema, read_file
from ace.schemas import MARA_SCHEMA


def prep_general_material_data(
    df: DataFrame,
    col_mara_global_material_number:str,
    check_old_material_number_is_valid: bool = True,
    check_material_is_not_deleted: bool = True,
):
    """
    Filters materials based on validity of the old material number (BISMT) and deletion flag (LVORM)
    and renames the global material number column and selects required columns.

    Parameters:
    -----------
    df : DataFrame
        Input PySpark DataFrame containing material data.
    col_mara_global_material_number : str
        Column name for the global material number for the system.
    check_old_material_number_is_valid : bool, optional (default=True)
        If True, filters out rows where the old material number is invalid.
        Valid old material numbers are not in ["ARCHIVE", "DUPLICATE", "RENUMBERED"] or null.
    check_material_is_not_deleted : bool, optional (default=True)
        If True, excludes rows where the deletion flag is not null or not empty.
    rename_global_material_number : str, optional (default=None)
        If specified, renames the global material number column to this consistent name.

    Returns:
    --------
    DataFrame
        A PySpark DataFrame after applying the filters and renaming.
    """
    # Apply old material number validity filter
    if check_old_material_number_is_valid:
        df = df.filter(
            (F.col("BISMT").isNull()) | (~F.col("BISMT").isin("ARCHIVE", "DUPLICATE", "RENUMBERED"))
        )

    # Apply material not deleted filter
    if check_material_is_not_deleted:
        df = df.filter(
            (F.col("LVORM").isNull()) | (F.col("LVORM") == "")
        )

    # Rename global material number column
    df = df.withColumnRenamed(col_mara_global_material_number, "global_material_number")


    return enforce_schema(df, MARA_SCHEMA)

In [7]:
prep_general_material_data(df, "ZZMDGM").show(truncate=False)

+----------------------------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+
|MANDT                                                           |MATNR                                                           |MEINS                                                           |global_material_number                                          |
+----------------------------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+
|ad57366865126e55649ecb23ae1d48887544976efea46a48eb5d85a6eeb4d306|73247d2a426212859ed5573281c4fb0f1ac040983509226591035355f4d0fa68|72dfcfb0c470ac255cde83fb8fe38de8a128188e03ea5ba5b2a93adbea1062fa|73247d2a426212859e

In [8]:
import os

In [None]:
for file_name in os.listdir("../data/system_1/"):
    file_path = os.path.join("../data/system_1/", file_name)

    # Check if it is a file (not a subfolder)
    if os.path.isfile(file_path):
        # Extract the file name without extension
        base_name = os.path.splitext(file_name)[0]
        print(base_name)

        # Read the file based on its extension and create DataFrame
        if file_name.endswith('.csv'):
            df = read_file(file_path, "csv", {"header": "true", "inferSchema": "true"})

        # Dynamically assign the DataFrame to a variable with the same name as the file (without extension)
        globals()[base_name] = df
        print(f"Data loaded into variable: {base_name}")

../data/system_1/PRE_AFKO.csv
PRE_AFKO
Data loaded into variable: PRE_AFKO
../data/system_1/PRE_AFPO.csv
PRE_AFPO
Data loaded into variable: PRE_AFPO
../data/system_1/PRE_AUFK.csv
PRE_AUFK
Data loaded into variable: PRE_AUFK
../data/system_1/PRE_MARA.csv
PRE_MARA
Data loaded into variable: PRE_MARA
../data/system_1/PRE_MARC.csv
PRE_MARC
Data loaded into variable: PRE_MARC
../data/system_1/PRE_MBEW.csv
PRE_MBEW
Data loaded into variable: PRE_MBEW
../data/system_1/PRE_T001.csv
PRE_T001
Data loaded into variable: PRE_T001
../data/system_1/PRE_T001K.csv
PRE_T001K
Data loaded into variable: PRE_T001K
../data/system_1/PRE_T001W.csv
PRE_T001W
Data loaded into variable: PRE_T001W


In [15]:
PRE_T001W.show()

+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+-----+--------------------+--------------------+--------------------+--------------------+-----+-----+-----+--------------------+--------------------+--------------------+-----+-----+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+-----+--------------------+---------+-----+-----+-----+-----+-----+------------------+------------------+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+-----+-----+-----+------+--------+-------+-----+-----+------+--------------------+--------+--------+-----------+--------------------+--------------------+--------------------+
|               MANDT|               WERKS|               NAME1|               BWKEY|           

In [16]:
from ace.utils import enforce_schema