<a href="https://colab.research.google.com/github/ugoGS/Py/blob/py_add/LSMW_Scripts_functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#ZIUSACRATTCHEQ: Create Attach Equipment


In [None]:
def lsmw_ziusachobjtype_script(df_input):

  try:

    lsmw_ziusachobjtype_schema = StructType([
      StructField("id_data", StringType(), True),
      StructField("EQUNR", StringType(), True),
      StructField("EQUART", StringType(), True),
      StructField("date_time", TimestampType(), True),
      StructField("process_datetime", StringType(), True)
    ])

      # Crear y transformar filas de error directamente en un RDD
    rdd_errors = df_input.rdd.map(
        lambda row: (
            row["id_data"],
            row["sap_equipment_number"],
            row["new_value"],
            datetime.now(),
            row["process_datetime"]))

    #New DF
    new_error_df = spark.createDataFrame(rdd_errors, schema=lsmw_ziusachobjtype_schema)

    #Add partition to parquet file
    new_error_df.write.mode('append').partitionBy('process_datetime').parquet(f'{refined_path}/ziusachobjtype')

    #Deliverable file
    df_ziusachobjtype = new_error_df.select("EQUNR", "EQUART")

    generate_csv_lsmw(df_ziusachobjtype, 'ziusachobjtype')
    rename_csv_file("ziusachobjtype")

    return True

  except Exception as e:
    print(f"[lsmw_ziusachobjtype_script] Unexpected error: {str(e)}")
    return False


#ZIUSADISEQCHLOC: Equipment - Dismantle without changing the location

In [None]:
def lsmw_ziusadiseqchloc_script(df_input):

  try:
    lsmw_ziusadiseqchloc_schema = StructType([
      StructField("id_data", StringType(), True),
      StructField("Equipment", StringType(), True),
      StructField("FunctionalLocation", StringType(), True),
      StructField("date_time", TimestampType(), True),
      StructField("process_datetime", StringType(), True)
    ])

      # Crear y transformar filas de error directamente en un RDD
    rdd_errors = df_input.rdd.map(
        lambda row: (
            row["id"],
            row["sap_equipment_number"],
            row["floc"],
            datetime.now(),
            row["process_datetime"]))

    #New DF
    new_error_df = spark.createDataFrame(rdd_errors, schema=lsmw_ziusadiseqchloc_schema)

    #Add partition to parquet file
    new_error_df.write.mode('append').partitionBy('process_datetime').parquet(f'{refined_path}/ziusadiseqchloc')

    #Deliverable file
    df_ziusadiseqchloc = new_error_df.select("Equipment", "FunctionalLocation")

    generate_csv_lsmw(df_ziusadiseqchloc, 'ziusadiseqchloc')
    rename_csv_file("ziusadiseqchloc")

    return True

  except Exception as e:
    print(f"[lsmw_ziusadiseqchloc_script] Unexpected error: {str(e)}")
    return False

#ZIUSAEQIDISMANT: Dismantle Equip - INAC DLFL


In [None]:
def lsmw_ziusaeqidismant_script(df_input):

  try:
    lsmw_ziusaeqidismant_schema = StructType([
      StructField("id_data", StringType(), True),
      StructField("Equipment", StringType(), True),
      StructField("date_time", TimestampType(), True),
      StructField("process_datetime", StringType(), True)
    ])

      # Crear y transformar filas de error directamente en un RDD
    rdd_errors = df_input.rdd.map(
        lambda row: (
            row["id"],
            row["sap_equipment_number"],
            datetime.now(),
            row["process_datetime"]))

    #New DF
    new_error_df = spark.createDataFrame(rdd_errors, schema=lsmw_ziusaeqidismant_schema)

    #Add partition to parquet file
    new_error_df.write.mode('append').partitionBy('process_datetime').parquet(f'{refined_path}/ziusaeqidismant')

    #Deliverable file
    df_ziusaeqidismant = new_error_df.select("Equipment")

    generate_csv_lsmw(df_ziusaeqidismant, 'ziusaeqidismant')
    rename_csv_file("ziusaeqidismant")

    return True

  except Exception as e:
    print(f"[lsmw_ziusaeqidismant_script] Unexpected error: {str(e)}")
    return False

#ZIUSACRKTYPEQ: Create Equipment Category K from Reference


In [None]:
def lsmw_ziusacrktypeq_script(df_input):

  try:

    lsmw_ziusacrktypeq_schema = StructType([
      StructField("id_data", StringType(), True),
      StructField("Equipment category", StringType(), True),
      StructField("Reference Equipment Number", StringType(), True),
      StructField("Start-up Date of the Technical Object", DateType(), True),
      StructField("Year of construction", StringType(), True),
      StructField("Month of construction", StringType(), True),
      StructField("Material Number", StringType(), True),
      StructField("Serial Number", StringType(), True),
      StructField("date_time", TimestampType(), True),
      StructField("process_datetime", StringType(), True)])

    rdd_errors = df_input.rdd.map(
        lambda row: (
            row["id"],
            row["equipment_category"],
            row["sap_equipment_number"],
            row["start_up_date"],
            row["construction_year"],
            row["construction_month"],
            'D00000012',
            get_datetime_now_as_serial_number(),
            # row["manufacturer_serial_number"],
            datetime.now(),
            row["process_datetime"]))

    #New DF
    new_error_df = spark.createDataFrame(rdd_errors, schema=lsmw_ziusacrktypeq_schema)

    #Add partition to parquet file
    new_error_df.write.mode('append').partitionBy('process_datetime').parquet(f'{refined_path}/ziusacrktypeq')

    #Deliverable file
    df_ziusacrktypeq = new_error_df.select("Equipment category", "Reference Equipment Number", "Start-up Date of the Technical Object",
                                          "Year of construction", "Month of construction", "Material Number", "Serial Number")

    df_ziusacrktypeq = df_ziusacrktypeq.withColumn("Reference Equipment Number",
                                                  when(col("Reference Equipment Number") == "0", lit("TBD"))
                                                  .otherwise(col("Reference Equipment Number")))

    generate_csv_lsmw(df_ziusacrktypeq, 'ziusacrktypeq')
    rename_csv_file("ziusacrktypeq")

    return True

  except Exception as e:
    print(f"[lsmw_ziusacrktypeq_script] Unexpected error: {str(e)}")
    return False

In [None]:
# lsmw_ziusacrktypeq_schema = StructType([
#       StructField("id_data", StringType(), True),
#       StructField("functional_location", StringType(), True),
#       StructField("reference_equipment_number", StringType(), True),
#       StructField("object_type", StringType(), True),
#       StructField("equipment_category", StringType(), True),
#       StructField("start_up_date_of_the_technical_object", DateType(), True),
#       StructField("year_of_construction", StringType(), True),
#       StructField("month_of_construction", StringType(), True),
#       StructField("material_number", StringType(), True),
#       StructField("serial_number", StringType(), True),
#       StructField("date_time", TimestampType(), True),
#       StructField("process_datetime", StringType(), True)

#     rdd_errors = df_input.rdd.map(
#         lambda row: (
#             row["id"],
#             row["floc"],
#             row["sap_equipment_number"],
#             row["object_type"],
#             row["equipment_category"],
#             row["start_up_date"],
#             row["construction_year"],
#             row["construction_month"],
#             row["mid"],
#             row["manufacturer_serial_number"],
#             datetime.now(),
#             row["process_datetime"]))

#ZIUSAINACEQUI: Equipment - set INAC – Change Description and long text


In [None]:
def lsmw_ziusainecequi_script(df_input):

  try:

    lsmw_lsmw_ziusainecequi_schema = StructType([
      StructField("id_data", StringType(), True),
      StructField("Equipment", StringType(), True),
      StructField("Descr", StringType(), True), #Optional
      StructField("Long Text Desc", StringType(), True), #Optional
      StructField("date_time", TimestampType(), True),
      StructField("process_datetime", StringType(), True)
    ])

    # Craete and transform records to RDD
    rdd_errors = df_input.rdd.map(
        lambda row: (
            row["id"],
            row["sap_equipment_number"],
            row["description_of_object_type"],
            row["long_text"],
            datetime.now(),
            row["process_datetime"]))

    #New DF
    new_error_df = spark.createDataFrame(rdd_errors, schema=lsmw_lsmw_ziusainecequi_schema)

    #Add partition to parquet file
    new_error_df.write.mode('append').partitionBy('process_datetime').parquet(f'{refined_path}/ziusainecequi')

    #Deliverable file
    df_ziusainecequi = new_error_df.select("Equipment", "Descr", "Long Text Desc")

    df_ziusainecequi = df_ziusainecequi.withColumn("Long Text Desc",
                                                   when(col("Long Text Desc").isNull() | (col("Long Text Desc") == "") | (col("Long Text Desc") == "NaN"),
                                                   lit("Remove equipment from FLOC and do not transfer back into inventory")).otherwise(col("Long Text Desc")))

    generate_csv_lsmw(df_ziusainecequi, 'ziusainecequi')
    rename_csv_file("ziusainecequi")

    return True

  except Exception as e:
    print(f"[lsmw_ziusainecequi_script] Unexpected error: {str(e)}")
    return False

#ZIUSACREQWOREF Create Equipment without using reference

In [10]:
def lsmw_ziusacreqworef_script(df_input):
  try:

    lsmw_ziusacreqworef_schema = StructType([
      StructField("id_data", StringType(), True),
      StructField("Type", StringType(), True), #Reqd
      StructField("Description", StringType(), True), #Reqd
      StructField("StartUpDate", DateType(), True), #(MMDDYYYY) Reqd
      StructField("ConstructionYear", StringType(), True),
      StructField("ConstructionMonth", StringType(), True),
      StructField("ObjectType", StringType(), True), #Reqd
      StructField("Inventory", StringType(), True),
      StructField("ManufSerialNo", StringType(), True),
      StructField("Manufacturer", StringType(), True),
      StructField("Model", StringType(), True),
      StructField("MaintenancePlant", StringType(), True), #Reqd
      StructField("SortField", StringType(), True),
      StructField("FLOC", StringType(), True),
      StructField("date_time", TimestampType(), True),
      StructField("process_datetime", StringType(), True)])

    rdd_errors = df_input.rdd.map(
        lambda row: (
          row["id"],
          row["equipment_category"],
          row["description_of_object_type"],
          row["start_up_date"],
          row["construction_year"],
          row["construction_month"],
          row["object_type"],
          "TBD", #Inventory
          row["manufacturer_serial_number"],
          row["manufacturer"],
          row["model_number"], #Could be not a number as showN in LSMW Scripts pptx (VERIZON)
          "TBD", #MaintenancePlant
          row["sort_field_pole_number"],
          row["floc"],
          datetime.now(),
          row["process_datetime"]))

    #New DF
    new_error_df = spark.createDataFrame(rdd_errors, schema=lsmw_ziusacreqworef_schema)

    new_error_df.write.mode('append').partitionBy("process_datetime").parquet(f"{refined_path}/ziusacreqworef")

    df_ziusacreqworef = new_error_df.select("Type", "Description", "StartUpDate", "ConstructionYear", "ConstructionMonth", "ObjectType",
                                            "Inventory", "ManufSerialNo", "Manufacturer", "Model", "MaintenancePlant", "SortField" ,"FLOC")

    #df_ziusacreqworef = df_ziusacreqworef.withColumn("ConstructionMonth", col("ConstructionMonth").cast("int")).cast("string")) Shows with 1 digit

    df_ziusacreqworef = new_error_df.withColumn("StartUpDate", date_format(col("StartUpDate"), "MMddyyyy"))

    df_ziusacreqworef = df_ziusacreqworef.withColumn("Description", when((col("Description") == "N/A") | (col("Description") == "NaN"), lit("")).otherwise(col("Description")))
    df_ziusacreqworef = df_ziusacreqworef.withColumn("ManufSerialNo", when((col("ManufSerialNo") == "N/A") | (col("ManufSerialNo") == "NaN"), lit("")).otherwise(col("ManufSerialNo")))
    df_ziusacreqworef = df_ziusacreqworef.withColumn("Manufacturer", when((col("Manufacturer") == "N/A") | (col("Manufacturer") == "NaN"), lit("")).otherwise(col("Manufacturer")))
    df_ziusacreqworef = df_ziusacreqworef.withColumn("Model", when((col("Model") == "N/A") | (col("Model") == "NaN"), lit("")).otherwise(col("Model")))
    df_ziusacreqworef = df_ziusacreqworef.withColumn("SortField", when((col("SortField") == "Need Pole #") | (col("SortField") == "N/A") | (col("SortField") == "NaN"), lit(""))
                                                    .otherwise(col("SortField")))

    #Generate LSMW file with rows which don't have empty values for mandatory columns.
    df_ziusacreqworef_without_empty_values = df_ziusacreqworef.filter(
      (col("Description") != "") &
      (col("StartUpDate") != "") &
      (col("FLOC") != "") &
      (col("SortField") != ""))

    # df_ziusacreqworef_without_empty_values = df_ziusacreqworef_without_empty_values.select("Type", "Description", "StartUpDate", "ConstructionYear", "ConstructionMonth", "ObjectType", "Inventory",
    #                                                                                        "ManufSerialNo", "Manufacturer", "Model", "MaintenancePlant", "SortField", "FLOC")

    generate_csv_lsmw(df_ziusacreqworef_without_empty_values, 'ziusacreqworef')
    rename_csv_file("ziusacreqworef")

    return True

  except Exception as e:
    print(f"[lsmw_ziusacreqworef_script] Unexpected error: \n")
    traceback.print_exc()
    return False

#ZIUAFL002 Long Text for Equipment and FLOC

In [15]:
def lsmw_ziuafl002_script(df_input):

  try:

    lsmw_ziuafl002_schema = StructType([
        StructField("id_data", StringType(), True),
        StructField("FLOC", StringType(), True),
        StructField("Line Number", StringType(), True),
        StructField("Long Text", StringType(), True),
        StructField("date_time", TimestampType(), True),
        StructField("process_datetime", StringType(), True)
    ])

    rdd_errors = df_input.rdd.map(
        lambda row: (
            row["id"],
            row["floc"],
            row["nyseg_line_number"],
            row["long_text"],
            datetime.now(),
            row["process_datetime"]))

    new_error_df = spark.createDataFrame(rdd_errors, schema=lsmw_ziuafl002_schema)

    new_error_df.write.mode('append').partitionBy("process_datetime").parquet(f"{refined_path}/ziuafl002")

    df_ziuafl002 = new_error_df.select("FLOC", "Line Number", "Long Text")

    df_ziuafl002 = df_ziuafl002.withColumn("Long Text", when((col("Long Text") == "N/A") | (col("Long Text") == "NaN"), lit("")).otherwise(col("Long Text")))
    df_ziuafl002 = df_ziuafl002.withColumn("Line Number", when((col("Line Number") == "N/A") | (col("Line number") == "NaN"), lit("")).otherwise(col("Line Number")))

    generate_csv_lsmw(df_ziuafl002, 'ziuafl002')
    rename_csv_file("ziuafl002")

    return True

  except Exception as e:
    print(f"[lsmw_ziuafl002_script] Unexpected error: \n")
    traceback.print_exc()
    return False

#ZIUAFL003 Characteristics for equipment and FLOC

In [17]:
def lsmw_ziuafl003_script(df_input):

  try:

    lsmw_ziuafl003_schema = StructType([
        StructField("id_data", StringType(), True),
        StructField("FLOC", StringType(), True),
        StructField("Class", StringType(), True),
        StructField("Characteristic", StringType(), True),
        StructField("Char Value", StringType(), True),
        StructField("date_time", TimestampType(), True),
        StructField("process_datetime", StringType(), True)
    ])

    rdd_errors = df_input.rdd.map(
        lambda row: (
            row["id"],
            row["floc"],
            row["object_type"],
            row["description_of_object_type"],
            "", #Char Value TBD
            datetime.now(),
            row["process_datetime"]))

    new_error_df = spark.createDataFrame(rdd_errors, schema=lsmw_ziuafl003_schema)

    new_error_df.write.mode("append").partitionBy("process_datetime").parquet(f"{refined_path}/ziuasfl003")

    df_ziuafl003 = new_error_df.select("FLOC", "Class", "Characteristic", "Char Value")

    df_ziuafl003 = df_ziuafl003.withColumn("Characteristic", when((col("Characteristic") == "N/A") | (col("Characteristic") == "NaN"), lit("")).otherwise(col("Characteristic")))

    generate_csv_lsmw(df_ziuafl003, 'ziuafl003')
    rename_csv_file("ziuafl003")

    return True

  except Exception as e:
    print(f"[lsmw_ziuafl003_script] Unexpected error: \n")
    traceback.print_exc()
    return False
