In [75]:
import csv

# Removes all none ASCII characters and replaces them with spaces
with open('pickHistoryOrderNo.csv', 'r', encoding='utf-8', errors='ignore') as file_in, open('output_file.csv', 'w', newline='', encoding='utf-8') as file_out:
    reader = csv.reader(file_in)
    writer = csv.writer(file_out)

    # Write the header row to the output file
    header = next(reader)
    writer.writerow(header)

    # Iterate over the rows in the input file and clean each row before writing it to the output file
    for row in reader:
        cleaned_row = []
        for cell in row:
            # Replace any non-unicode characters with spaces
            cleaned_cell = ''.join([char if ord(char) < 128 else ' ' for char in cell])
            cleaned_row.append(cleaned_cell)
        writer.writerow(cleaned_row)


In [76]:
# Remove whitespace and tabs

import csv

with open('output_file.csv', 'r', encoding='utf-8', errors='ignore') as file_in, open('leanPicks.csv', 'w', newline='', encoding='utf-8') as file_out:
    reader = csv.reader(file_in, delimiter='\t')
    writer = csv.writer(file_out)

    # Write the header row to the output file
    header = next(reader)
    header = [col.replace(' ', '').strip() for col in header]  # Remove whitespace and tab in column names
    writer.writerow(header)

    # Iterate over the rows in the input file, remove any whitespace or tabs, and write to the output file
    for row in reader:
        cleaned_row = [cell.replace(' ', '').strip() for cell in row]
        writer.writerow(cleaned_row)


In [77]:
# List refactored column headers

import pandas as pd

df = pd.read_csv("output_file.csv", on_bad_lines="skip")
df.columns.values.tolist()

['ISELL_ORDER_NUMBER\tPICK_ID\tARTNO   \tARTNAME_UNICODE                                   \tART_VOLUME_M3\tORDERED_QTY\tPICKED_QTY\tOPEN_PICK_QTY\tAVAILABLE_STOCK\tPOSSIBLE_TO_FINISH\tORDER_TYPE \tDATE_OF_PAYMENT\tTIME_OF_PAYMENT\tEXCEPTION\tPICK_AREA          \tACTUAL_ORDER_STATUS\tSTORAGE_STATUS\tSTORAGE_USED\tHANDOVER_POINT\tCUT_OFF_DATE\tCUT_OFF_TIME\tUSER_PICKING\tSERVICE_DATE\tSERVICE_WINDOW\tORDER_METHOD\tPICK_LOCATION\tPICK_LOCATION_TYPE\tDELIVERY_METHOD                  ']

In [78]:
# Create a new csv that contains only the columns we are interested in

# Read csv file and select columns of interest
df = pd.read_csv('leanPicks.csv', usecols=['ISELL_ORDER_NUMBER', 'PICK_ID', 'ARTNAME_UNICODE', 'ORDER_TYPE', 'PICK_AREA', 'ARTNO'])

# Write the selected columns to a new csv file
df.to_csv('output_file.csv', index=False)

In [79]:
import pandas as pd

# Read csv file
df = pd.read_csv('output_file.csv')

# Convert ARTNO to string
df['ARTNO'] = df['ARTNO'].astype(str)

# Group by order number and concatenate article numbers
orders = df.groupby('ISELL_ORDER_NUMBER')['ARTNO'].apply(lambda x: ','.join(x)).reset_index()

# Rename columns
orders.columns = ['order_number', 'articles']

# Write to new csv file
orders.to_csv('orders.csv', index=False)



In [86]:
with open('orders.csv', 'r') as csvfile:
    csvreader = csv.reader(csvfile)
    with open('strippedOrders.csv', 'w', newline='') as newfile:
        csvwriter = csv.writer(newfile)

        for row in csvreader:
            newrow = [cell.replace('"', '') for cell in row]
            csvwriter.writerow(newrow)
