# 쇼핑몰 고객 주문데이터 파악하기
- 현재 상황 파악
- 모델 수립 혹은 목표 설정

- 데이터셋
  - 온라인리테일사이트의 2010.12~2011.12간의 주문기록 데이터
  - 50000
  

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [11]:
df = pd.read_csv('./Data/OnlineRetail.csv')
df.info()
df.tail(1)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
InvoiceNo      541909 non-null object
StockCode      541909 non-null object
Description    540455 non-null object
Quantity       541909 non-null int64
InvoiceDate    541909 non-null object
UnitPrice      541909 non-null float64
CustomerID     406829 non-null float64
Country        541909 non-null object
dtypes: float64(2), int64(1), object(5)
memory usage: 33.1+ MB


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
541908,581587,22138,BAKING SET 9 PIECE RETROSPOT,3,12/9/2011 12:50,4.95,12680.0,France


In [14]:
# for i in df.columns:
#     print(df.columns[i])
df.columns

Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

## 컬럼
```
'InvoiceNo'
'StockCode'
'Description'
'Quantity'
'InvoiceDate'
'UnitPrice'
'CustomerID'
'Country'
```

## Data Cleansing
- null 처리
- CustomorID
- Business 로직에 맞지않은 데이터 처리
- 음수의 아이템 수량
- 가격이 0원



In [16]:
df.isnull().sum()

InvoiceNo           0
StockCode           0
Description      1454
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     135080
Country             0
dtype: int64

In [17]:
len(df)

541909

In [19]:
retail = df[pd.notnull( df['CustomerID'] )]
len(retail)

406829

In [20]:
# 비지니스 로직에 맞지 않는 데이터 제거
retail = retail[retail['Quantity']>0]
retail = retail[retail['UnitPrice']>0]
len(retail)

397884

In [26]:
retail.CustomerID.dtype

dtype('int32')

In [27]:
# 데이터 타입변경(메모리효율화/올바른 데이터타입 매칭)
retail['CustomerID'] = retail['CustomerID'].astype(np.int32)
retail.CustomerID.dtype

dtype('int32')

In [28]:
# 고객 주문 총 금액
retail['CheckoutPrice'] = retail['UnitPrice']*retail['Quantity']
retail.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,CheckoutPrice
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850,United Kingdom,15.3
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850,United Kingdom,22.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850,United Kingdom,20.34


In [29]:
retail.to_csv('./Data/onlineRetailCleansed.csv')