In [20]:
import pandas as pd
from pathlib import Path

In [21]:
# set the file path and the sheet names to explore
# base = Path(r"d:\yzy\CMU\26SPRING\95451 pm\project\VSP Vision Datasets")
import os 
base = Path(os.getcwd()+"/VSP Vision Datasets")
sources = [
    ("AO (demand)", "AO-BI275 DEMAND KC KP LA LS KO KS 12.17.25.xlsx", ["Brand View"]),
    ("Calvin Klein", "Calvin Klein_Sept24 ATP.xlsx", ["SUN", "OPH"]),
    ("Lacoste", "LACOSTE_Sept24 ATP.xlsx", ["LACOSTE OPTICAL", "LACOSTE SUN"]),
    ("Nike", "Nike_Sept24 ATP.xlsm", ["Nike Sept 24 Optical", "Nike Sept 24 Sun"]),
]

## 1. Preprocess 'demand' Data

In [22]:
# load and clean the 'damand' data
df_ao_raw = pd.read_excel(
    base / "AO-BI275 DEMAND KC KP LA LS KO KS 12.17.25.xlsx",
    sheet_name="Brand View",
    header=None,
)

n_meta = 7
meta_names = ["Collection", "BrandLine", "Material", "StyleCode", "GridValue", "Style", "Region"]

month_cols = df_ao_raw.iloc[1, n_meta:].tolist()   
demand = df_ao_raw.iloc[2:].copy()                  
demand.columns = meta_names + month_cols

demand = demand[demand.iloc[:, 0].astype(str).str.upper() != "COLLECTION"].copy()

for i in range(n_meta, demand.shape[1]):
    demand.iloc[:, i] = pd.to_numeric(demand.iloc[:, i], errors="coerce")


def _parse_style(val):
    if pd.isna(val):
        return pd.Series({"Size": "", "Color": ""})
    txt = str(val)
    parts = txt.split("/")
    if len(parts) >= 3:
        size = parts[1].strip()
        color = parts[2].strip()
    elif len(parts) == 2:
        size = parts[0].strip()
        color = parts[1].strip()
    else:
        size = ""
        color = ""
    return pd.Series({"Size": size, "Color": color})

style_extras = demand["Style"].apply(_parse_style)
demand = pd.concat([demand, style_extras], axis=1)

print(demand.head())


  Collection         BrandLine Material StyleCode GridValue  \
2         KC  CALVIN KLEIN SUN    45073  CK20541S   5719001   
3         KC  CALVIN KLEIN SUN    45073  CK20541S   5719001   
4         KC  CALVIN KLEIN SUN    45073  CK20541S   5719235   
5         KC  CALVIN KLEIN SUN    45073  CK20541S   5719235   
6         KC  CALVIN KLEIN SUN    45073  CK20541S   5719605   

                  Style Region 09/2023 10/2023 11/2023  ... 02/2024 03/2024  \
2     CK20541S/57/BLACK   AMER    26.0    28.0    27.0  ...    24.0    21.0   
3     CK20541S/57/BLACK   EMEA    10.0    18.0    11.0  ...    36.0    84.0   
4  CK20541S/57/DARK TOR   AMER    33.0    39.0    14.0  ...    15.0    16.0   
5  CK20541S/57/DARK TOR   EMEA    21.0    15.0    11.0  ...    61.0    55.0   
6   CK20541S/57/CRYSTAL   AMER    11.0    21.0    15.0  ...     8.0    11.0   

  04/2024 05/2024 06/2024 07/2024 08/2024 Overall Result Size     Color  
2    35.0    21.0    20.0    32.0    16.0          285.0   57     BLACK 

In [23]:
print('demand shape:', demand.shape)

demand shape: (1866, 22)


In [24]:
# first drop rows without a style (they're empty/aggregate rows)
dropped = demand['Style'].isna().sum()
print(f"dropping {dropped} rows with missing Style")
demand = demand[demand['Style'].notna()].copy()

dropping 402 rows with missing Style


In [25]:
# reshape the demand data to long format
# first drop any styles that were missing (already done earlier)
demand_long = demand.melt(
    id_vars=meta_names + ["Size", "Color"],
    value_vars=month_cols,
    var_name="Month",
    value_name="Demand",
)

# drop any non-month labels such as "Overall Result" before converting
mask_valid = demand_long["Month"].astype(str).str.match(r"^\d{2}/\d{4}$")
demand_long = demand_long[mask_valid].copy()

# diagnostic prints
print("month_cols:", month_cols)
print("filtered demand_long shape:", demand_long.shape)
print(demand_long.head())

# align the date format to "YYYY-MM" using the correct pattern for MM/YYYY
# (the original strings are like '09/2023')
demand_long["Month"] = pd.to_datetime(
    demand_long["Month"],
    format="%m/%Y",         
    errors="coerce"
).dt.strftime("%Y-%m")

# check NaNs in the melted dataframe
nan_counts = demand_long.isna().sum()
print("NaN counts in demand_long:\n", nan_counts)


month_cols: ['09/2023', '10/2023', '11/2023', '12/2023', '01/2024', '02/2024', '03/2024', '04/2024', '05/2024', '06/2024', '07/2024', '08/2024', 'Overall Result']
filtered demand_long shape: (17568, 11)
  Collection         BrandLine Material StyleCode GridValue  \
0         KC  CALVIN KLEIN SUN    45073  CK20541S   5719001   
1         KC  CALVIN KLEIN SUN    45073  CK20541S   5719001   
2         KC  CALVIN KLEIN SUN    45073  CK20541S   5719235   
3         KC  CALVIN KLEIN SUN    45073  CK20541S   5719235   
4         KC  CALVIN KLEIN SUN    45073  CK20541S   5719605   

                  Style Region Size     Color    Month Demand  
0     CK20541S/57/BLACK   AMER   57     BLACK  09/2023   26.0  
1     CK20541S/57/BLACK   EMEA   57     BLACK  09/2023   10.0  
2  CK20541S/57/DARK TOR   AMER   57  DARK TOR  09/2023   33.0  
3  CK20541S/57/DARK TOR   EMEA   57  DARK TOR  09/2023   21.0  
4   CK20541S/57/CRYSTAL   AMER   57   CRYSTAL  09/2023   11.0  
NaN counts in demand_long:
 Collec

In [26]:
# merge by style region and month/ deduplicate
# This will automatically fill the demand=nan with 0.0
demand_monthly = demand_long.groupby(meta_names + ["Size", "Color",'Month'], as_index=False)["Demand"].sum()

In [27]:
# count rows where demand is exactly zero
demand_zero = (demand_monthly['Demand'] == 0).sum()
print(f"rows in demand_monthly with Demand == 0: {demand_zero}")

rows in demand_monthly with Demand == 0: 2563


## Map Color

In [28]:
demand_monthly["Color"].unique()

array(['BLACK', 'DARK TOR', 'CRYSTAL', 'BROWN HA', 'AVIO', 'ROSE',
       'BROWN', 'SAND', 'BLUE', 'OYSTER', 'BUTTERSC', 'AZURE', 'GOLD',
       'DARK HAV', 'VIOLET', 'GREY', 'TOKYO HA', 'MATTE BL', 'SILVER',
       'MATTE GO', 'PETROL', 'BURGUNDY', 'MINT', 'STRIPED', 'GREEN',
       'CHERRY', 'CHALK', 'PEACH', 'TAUPE', 'SATIN B', 'OBSIDIA', '59',
       'MATTE B', 'MT CRYS', 'MATTE D', 'MATTE N', 'BLACK-F', 'BLACK-P',
       'ANTHRAC', 'MIDNIGH', 'SATIN A', 'MATTE W', 'GRIDIRO', 'DARK GR',
       'MINERAL', 'GUNSMOK', 'ARMORY', 'CLEAR', 'MATTE A', 'MATTE T',
       'MATTE S', 'MATTE M', 'MATTE G', 'SATIN N', 'SATIN W', 'BRUSHED',
       'BRUSH', 'MATTE', 'SATIN', 'OIL G', '', 'Matte B', 'Matte D',
       'DARK', 'SATIN G', 'MATTE V', 'Black', 'Footbal', 'Dark Gr',
       'Soft Pi', 'Midnigh', 'Clear', 'GREEN G', 'INDIGO', 'PLUM GR',
       'STADIUM', 'DENIM G', 'OXBLOOD', 'LIGHT S', 'BURNT S', 'SMOKE',
       'DARK TO', 'BURGUND', 'VINTAGE', 'Mystic', 'Matte M', 'MATTE I',
       'NAV

### Prompt (for ChatGPT / LLM)

**Aim:** Get a mapping from raw color strings in our data to standard color categories for modeling.

**Model:** ChatGPT 5.2

**How to use:** Run the cell above to get `demand_monthly["Color"].unique()`, then paste that list into the prompt below in place of `<color list>`.

---

**Prompt (copy & replace <color list> with your list):**

> You are a data scientist working on sales prediction for eyewear. We have a "Color" feature with vendor-specific names (often truncated, e.g. BUTTERSC, DARK HAV). Map each of the following raw color labels into **one** standard category: black, blue, brown, grey, green, gold, silver, red, pink, purple, orange, yellow, beige, white, mint, burgundy, or other. Use lowercase. Avoid ambiguous or creative names (e.g. "wolf", "midnight haze"). For transparent/clear use "clear"; for patterns (e.g. striped) or truly unclear use "other".
>
> Return **only** a valid Python dictionary: keys = exact raw strings below, values = standard category strings. No explanation.
>
> Raw color list:
> ```
> <color list>
> ```


In [29]:
color_mapping = {
    'BLACK': 'black',
    'DARK TOR': "brown",
    'CRYSTAL': "Don't know",
    'BROWN HA': 'brown',
    'AVIO': 'blue',
    'ROSE': 'pink',
    'BROWN': 'brown',
    'SAND': 'beige',
    'BLUE': 'blue',
    'OYSTER': 'grey',
    'BUTTERSC': 'yellow',
    'AZURE': 'blue',
    'GOLD': 'gold',
    'DARK HAV': 'brown',
    'VIOLET': 'purple',
    'GREY': 'grey',
    'TOKYO HA': "Don't know",
    'MATTE BL': 'blue',
    'SILVER': 'silver',
    'MATTE GO': 'gold',
    'PETROL': 'blue',
    'BURGUNDY': 'red',
    'MINT': 'green',
    'STRIPED': "Don't know",
    'GREEN': 'green',
    'CHERRY': 'red',
    'CHALK': 'white',
    'PEACH': 'orange',
    'TAUPE': 'brown',
    'SATIN B': 'blue',
    'OBSIDIA': 'black',
    '59': "Don't know",
    'MATTE B': 'blue',
    'MT CRYS': "Don't know",
    'MATTE D': "Don't know",
    'MATTE N': "Don't know",
    'BLACK-F': 'black',
    'BLACK-P': 'black',
    'ANTHRAC': 'grey',
    'MIDNIGH': 'blue',
    'SATIN A': 'gold',
    'MATTE W': 'white',
    'GRIDIRO': "Don't know",
    'DARK GR': 'grey',
    'MINERAL': "Don't know",
    'GUNSMOK': 'grey',
    'ARMORY': "Don't know",
    'CLEAR': 'transparent',
    'MATTE A': "Don't know",
    'MATTE T': "Don't know",
    'MATTE S': "Don't know",
    'MATTE M': "Don't know",
    'MATTE G': "Don't know",
    'SATIN N': "Don't know",
    'SATIN W': "Don't know",
    'BRUSHED': "Don't know",
    'BRUSH': "Don't know",
    'MATTE': "Don't know",
    'SATIN': "Don't know",
    'OIL G': 'gold',
    '': "Don't know",
    'Matte B': 'blue',
    'Matte D': "Don't know",
    'DARK': "Don't know",
    'SATIN G': 'green',
    'MATTE V': 'purple',
    'Black': 'black',
    'Footbal': "Don't know",
    'Dark Gr': 'grey',
    'Soft Pi': 'pink',
    'Midnigh': 'blue',
    'Clear': 'transparent',
    'GREEN G': 'green',
    'INDIGO': 'blue',
    'PLUM GR': 'purple',
    'STADIUM': "Don't know",
    'DENIM G': 'blue',
    'OXBLOOD': 'red',
    'LIGHT S': "Don't know",
    'BURNT S': 'orange',
    'SMOKE': 'grey',
    'DARK TO': "Don't know",
    'BURGUND': 'red',
    'VINTAGE': "Don't know",
    'Mystic': "Don't know",
    'Matte M': "Don't know",
    'MATTE I': "Don't know",
    'NAVY': 'blue',
    'WOLF': "Don't know",
    'BIO BEI': "Don't know",
    'PLATINU': 'silver',
    'SOFT TO': "Don't know",
    'SOFT': "Don't know",
    'MEDITER': "Don't know",
    'TORTO': 'brown',
    'CRYST': "Don't know",
    'TORTOIS': 'brown',
    'FOREST': 'green',
    'DENIM': 'blue',
    'OLIVE': 'green',
    'CHARCOA': 'grey',
    'MINK': 'brown',
    'LILAC B': 'purple',
    'BLUSH': 'pink',
    'MATTE O': "Don't know",
    'Satin B': 'blue',
    'Satin G': 'green',
    'MATT BLAC': 'black',
    'GREY HAVA': 'grey',
    'BLONDE HA': 'yellow',
    'DARK HAVA': 'brown',
    'ROSE GOLD': 'gold',
    'AMBER GOL': 'gold',
    'MATTE BUR': 'brown',
    'CRYSTAL B': "Don't know",
    'BLUE GREY': 'blue',
    'SHINY CRY': "Don't know",
    'CRYSTAL C': "Don't know",
    'MATTE BLA': 'black',
    'CHARCOAL': 'grey',
    'HONEY TO': 'brown',
    'MILKY GR': 'grey',
    'MILKY BL': 'blue',
    'BROWN HAV': 'brown',
    'LIGHT G': 'green',
    'ANTIQUE': "Don't know",
    'CRYSTAL S': "Don't know",
    'KHAKI T': 'beige',
    'NAVY HO': 'blue',
    'VIOLET HA': 'purple',
    'HAVANA': 'brown',
    'BLUE LILA': 'blue',
    'GREY CORA': 'grey',
    'GREEN HAV': 'green',
    'BLUE HAVA': 'blue',
    'BROWN H': 'brown',
    'TRANSPARE': 'transparent',
    'SAGE': 'green',
    'MATTE GOL': 'gold',
    'LIGHT GOL': 'gold',
    'STRIPED G': "Don't know",
    'STRIPED B': "Don't know",
    'PURPLE': 'purple',
    'LIGHT BRO': 'brown',
    '47': "Don't know",
    '5': "Don't know",
    'TRANSPARENT': 'transparent',
    'MATTE BLACK': 'black',
    'KHAKI': 'beige',
    'BLUE STEEL': 'blue',
    'GUNMETAL': 'grey',
    'MATTE BLUE': 'blue',
    'SHINY GREEN': 'green',
    'MATTE RED': 'red',
    'BLACK MATTE': 'black',
    'GREEN MATTE': 'green',
    'BLUE MATTE': 'blue',
    'BURGUNDY MA': 'red',
    'MATTE CRYS': "Don't know",
    'DARK GREEN': 'green',
    'NAVY BLUE': 'blue',
    'MATTE DARK': "Don't know",
    'MATTE GREEN': 'green',
    'MATTE KHAKI': 'beige',
    'MATTE BLAC': 'black',
    'TRANSPAREN': 'transparent',
    'MATTE GREE': 'green',
    'BLUE AVIO': 'blue',
    'MATTE BLU': 'blue',
    'CRYSTAL G': "Don't know",
    'BLUE NAVY': 'blue',
    'GREY LUMI': 'grey',
    'BLUE LIME L': 'blue',
    'RED WHITE L': 'red',
    'TURQUOISE L': 'blue',
    'MATTE PURPL': 'purple',
    'MATTE GREY': 'grey',
    'LIGHT GREEN': 'green',
    'BLUE LUMI': 'blue',
    'PETROL LUMI': 'blue',
    'BURGUNDY LU': 'red',
    'ONYX MATTE': 'black',
    'BLACK MATT': 'black',
    'AQUA MATTE': 'blue',
    'MID BLUE M': 'blue',
    'SHIN': "Don't know",
    'MATT': "Don't know",
    'DARK BLUE': 'blue',
    'GREY MATTE': 'grey',
    'MATTE BROW': 'brown',
    'MATTE KHAK': 'beige',
    'BRICK': 'red',
    'BEIGE': 'beige',
    'LIGHT GREY': 'grey',
    'HAVANA BRO': 'brown',
    'HAVANA BLO': 'brown',
    'DARK HAVAN': 'brown'
}

demand_monthly["Color_Mapped"] = demand_monthly["Color"].map(color_mapping)
demand_monthly.Color_Mapped.unique()


array(['black', 'brown', "Don't know", 'blue', 'pink', 'beige', 'grey',
       'yellow', 'gold', 'purple', 'silver', 'red', 'green', 'white',
       'orange', 'transparent'], dtype=object)

In [30]:
demand_monthly.Size.unique()

array(['57', '55', '56', '52', '54', '51', '48', '3', '53', '49', '58',
       '50', '47', '59', '60', '', '46', 'NIKE PREMIER EV1071',
       'NIKE SMASH DZ7382', 'NIKE SMASH M DZ7383', 'NIKE LYNK M FD1817',
       'NIKE FIRE P FD1818', 'NIKE RAVE P FD1849', 'NIKE VALIANT FJ1996',
       'NIKE ENDURE FJ2185', 'NIKE EMBAR P FV2409'], dtype=object)

In [31]:
demand_monthly.loc[
    demand_monthly["Style"].str.contains("OPTICAL"),
    "OpticalOrOptical"
] = "Optical"

demand_monthly.loc[
    ~ demand_monthly["Style"].str.contains("OPTICAL"),
    "OpticalOrOptical"
] = "Sun"

In [32]:
demand_monthly.loc[
    demand_monthly["BrandLine"].str.contains("NIKE"),"BrandName"
] = "Nike"
demand_monthly.loc[
    demand_monthly["BrandLine"].str.contains("LACOSTE"),"BrandName"
] = "Lacoste"
demand_monthly.loc[
    demand_monthly["BrandLine"].str.contains("CALVIN KLEIN"),"BrandName"
] = "Calvin Klein"

In [33]:
demand_monthly

Unnamed: 0,Collection,BrandLine,Material,StyleCode,GridValue,Style,Region,Size,Color,Month,Demand,Color_Mapped,OpticalOrOptical,BrandName
0,KC,CALVIN KLEIN SUN,45073,CK20541S,5719001,CK20541S/57/BLACK,AMER,57,BLACK,2023-09,26.0,black,Sun,Calvin Klein
1,KC,CALVIN KLEIN SUN,45073,CK20541S,5719001,CK20541S/57/BLACK,AMER,57,BLACK,2023-10,28.0,black,Sun,Calvin Klein
2,KC,CALVIN KLEIN SUN,45073,CK20541S,5719001,CK20541S/57/BLACK,AMER,57,BLACK,2023-11,27.0,black,Sun,Calvin Klein
3,KC,CALVIN KLEIN SUN,45073,CK20541S,5719001,CK20541S/57/BLACK,AMER,57,BLACK,2023-12,26.0,black,Sun,Calvin Klein
4,KC,CALVIN KLEIN SUN,45073,CK20541S,5719001,CK20541S/57/BLACK,AMER,57,BLACK,2024-01,9.0,black,Sun,Calvin Klein
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17563,LS,LACOSTE SUNS,L995S,L995S,5318401,L995S/53/MATTE BLUE,EMEA,53,MATTE BLUE,2024-04,44.0,blue,Sun,Lacoste
17564,LS,LACOSTE SUNS,L995S,L995S,5318401,L995S/53/MATTE BLUE,EMEA,53,MATTE BLUE,2024-05,97.0,blue,Sun,Lacoste
17565,LS,LACOSTE SUNS,L995S,L995S,5318401,L995S/53/MATTE BLUE,EMEA,53,MATTE BLUE,2024-06,75.0,blue,Sun,Lacoste
17566,LS,LACOSTE SUNS,L995S,L995S,5318401,L995S/53/MATTE BLUE,EMEA,53,MATTE BLUE,2024-07,55.0,blue,Sun,Lacoste


## 2.Merge the products sheet

In [34]:
# load and merge all the products sheet into one dataframe, only keep the common columns

def _norm_cols(df):
    df.columns = [str(c).strip().upper() for c in df.columns]
    return df

product_sheets = [
    ("Calvin Klein", "Calvin Klein_Sept24 ATP.xlsx", "SUN"),
    ("Calvin Klein", "Calvin Klein_Sept24 ATP.xlsx", "OPH"),
    ("Lacoste", "LACOSTE_Sept24 ATP.xlsx", "LACOSTE OPTICAL"),
    ("Lacoste", "LACOSTE_Sept24 ATP.xlsx", "LACOSTE SUN"),
    ("Nike", "Nike_Sept24 ATP.xlsm", "Nike Sept 24 Optical"),
    ("Nike", "Nike_Sept24 ATP.xlsm", "Nike Sept 24 Sun"),
]

dfs = []
for brand, fname, sheet in product_sheets:
    df = pd.read_excel(base / fname, sheet_name=sheet)
    df = _norm_cols(df)
    
    df["_BRAND"] = brand
    df["_SHEET_TYPE"] = "Optical" if any(x in sheet.upper() for x in ["OPTICAL", "OPH"]) else "Sun"
    
    dfs.append(df)

common_cols = set(dfs[0].columns)
for df in dfs[1:]:
    common_cols = common_cols.intersection(set(df.columns))

parts = [df[list(common_cols)] for df in dfs]
products = pd.concat(parts, ignore_index=True)

print("shared cols:", list(products.columns))
print("shape", products.shape)

shared cols: ['COLORCODE', 'MATERIALCODE2', 'BASECURVE', 'FRAMESHAPE', 'RXABLE', 'COLORDESCRIPTION', 'MATERIALNUMBER', 'NOTES', 'SUNOPTICAL', 'FAMILY', 'IMAGE', '_SHEET_TYPE', 'EURORETAILPRICE', 'USWHOLESALEPRICE', 'RECOMMENDEDREASONS', 'MATERIALCODE1', 'BRAND', 'SIZES', 'COLORADD', 'FRAMECONSTRUCTION', 'GENDER', 'FIT', '_BRAND', 'EUROWHOLESALEPRICE', 'RELEASEDATE', 'MADEIN', 'PROTOTYPECODE', 'USRETAILPRICE']
shape (316, 28)


In [35]:
print(products.head())

   COLORCODE MATERIALCODE2 BASECURVE          FRAMESHAPE  RXABLE  \
0          1         METAL         2           BUTTERFLY    True   
1         14         METAL         2           BUTTERFLY    True   
2        200         METAL         2           BUTTERFLY    True   
3        716         METAL         2           BUTTERFLY    True   
4          2         METAL         2  MODIFIED RECTANGLE    True   

  COLORDESCRIPTION MATERIALNUMBER NOTES SUNOPTICAL      FAMILY  ...  COLORADD  \
0            BLACK       CK24110S   NaN        Sun  AVANTGARDE  ...       NaN   
1   LIGHT GUNMETAL       CK24110S   NaN        Sun  AVANTGARDE  ...       NaN   
2            BROWN       CK24110S   NaN        Sun  AVANTGARDE  ...       NaN   
3       LIGHT GOLD       CK24110S   NaN        Sun  AVANTGARDE  ...       NaN   
4      MATTE BLACK       CK24111S   NaN        Sun  AVANTGARDE  ...       NaN   

  FRAMECONSTRUCTION GENDER     FIT        _BRAND EUROWHOLESALEPRICE  \
0          FULL RIM      F  GLOBA

In [36]:
# diagnose NaNs and drop columns that are entirely NaN
nan_counts = products.isna().sum()
print("NaN counts in products:")
print(nan_counts)

all_nan_cols = nan_counts[nan_counts == len(products)].index.tolist()
print(f"dropping {len(all_nan_cols)} all-NaN columns: {all_nan_cols}")
products = products.drop(columns=all_nan_cols)
print("new shape:", products.shape)
print("remaining cols:", list(products.columns))

NaN counts in products:
COLORCODE               0
MATERIALCODE2           0
BASECURVE               0
FRAMESHAPE              0
RXABLE                  0
COLORDESCRIPTION        0
MATERIALNUMBER          0
NOTES                 302
SUNOPTICAL              0
FAMILY                  0
IMAGE                 316
_SHEET_TYPE             0
EURORETAILPRICE         0
USWHOLESALEPRICE        0
RECOMMENDEDREASONS    181
MATERIALCODE1           0
BRAND                   0
SIZES                   0
COLORADD              316
FRAMECONSTRUCTION       0
GENDER                  0
FIT                     0
_BRAND                  0
EUROWHOLESALEPRICE      0
RELEASEDATE             0
MADEIN                  0
PROTOTYPECODE           0
USRETAILPRICE           0
dtype: int64
dropping 2 all-NaN columns: ['IMAGE', 'COLORADD']
new shape: (316, 26)
remaining cols: ['COLORCODE', 'MATERIALCODE2', 'BASECURVE', 'FRAMESHAPE', 'RXABLE', 'COLORDESCRIPTION', 'MATERIALNUMBER', 'NOTES', 'SUNOPTICAL', 'FAMILY', '_SHEET_T