## 월간 보고용 : 매출 분석

In [1]:
import pandas as pd
import re
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import apriori, association_rules
import datetime as dt
from dateutil.relativedelta import relativedelta
import os
from collections import Counter

In [2]:
input_path = 'C:/GMG_R&D/3. 장바구니 분석/1. 한총' 
os.chdir(input_path)
path=os.getcwd()
print(path)

x = dt.datetime.now()
file_name=f'월간보고서_{x.year}년_{(x - relativedelta(months=1)).month}월/매출_{x.month}월 {x.day}일'

if not os.path.exists(file_name):
    os.makedirs(file_name) 

C:\GMG_R&D\3. 장바구니 분석\1. 한총


In [3]:
def frozen_convert(sets) : 
        return [list(sets)]

In [4]:
pd.options.display.float_format = '{:,.1f}'.format

### 1. 한총 데이터 로드

In [5]:
df= pd.read_clipboard(sep='\t')
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5492 entries, Model Code (v41) to PSKS0101G
Data columns (total 2 columns):
 #   Column                    Non-Null Count  Dtype
---  ------                    --------------  -----
 0   Order (purchase event)    5492 non-null   int64
 1   Revenue (purchase event)  5492 non-null   int64
dtypes: int64(2)
memory usage: 128.7+ KB


In [6]:
df=df[1:]
df=df.reset_index().rename(columns={"index": "Model Code (v41)"})
df=df.fillna(0)
df

Unnamed: 0,Model Code (v41),Order (purchase event),Revenue (purchase event)
0,RF85B90023Y,1242,1973103232
1,VS20B957D5G,980,721538145
2,VCA-ADB95A,685,6966975
3,HAF-HIN,667,77849421
4,VCA-SBT90/VT,658,63826294
...,...,...,...
5486,NZ60R7703PW,1,0
5487,SMS-S20W,1,0
5488,HTRPremium,1,0
5489,JBLT760NCBLS,1,0


### 2. 카테고리

In [7]:
#카테고리 가져오기
rex= pd.read_clipboard(sep='\t')
rex=rex.astype(str)
rex.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38 entries, 0 to 37
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   depth1            38 non-null     object
 1   Category1D        38 non-null     object
 2   Category2D        38 non-null     object
 3   Category2D_Value  38 non-null     object
dtypes: object(4)
memory usage: 1.3+ KB


### 3. 부문별 매출 (CE+CE, MX+MX, CE+MX)

In [8]:
only_order=[i for i in df.columns if 'Order' in i]
only_order

['Order (purchase event)']

In [9]:
only_reve=[i for i in df.columns if 'Revenue' in i]
only_reve

['Revenue (purchase event)']

In [10]:
new_df=[]
for y , revenue_y in zip(only_order,only_reve):
    
    y_df = df[['Model Code (v41)',y, revenue_y]]
    y_df=y_df[y_df['Model Code (v41)'].str.contains(',')]
    y_df = y_df[y_df[y]!=0]
    y_df = y_df.reset_index(drop=True)
    
    y_df[revenue_y]=y_df[revenue_y].astype(str)
    #전체 SKU 하나의 리스트로
    a = y_df['Model Code (v41)'].tolist()
    b = ','.join(a) #리스트 문자열로
    c = b.split(',') #문자열 개별 문자로 리스트화 
    model = list(set(c)) #중복 제거

    dic_category={}
    for m in model :
        flag = 1
        for i in range(len(rex)):
            r = re.compile(rex.Category2D_Value[i])
            if r.search(m):
                dic_category[m] = rex.depth1[i] 
                flag = 0
                break
        if flag :
            dic_category[m] = '기타'

    #데이터셋 만들기
    dataset = []   # 데이터셋 구조 [ [a,b], [a,c], [a,b,c] .... ]

    for i in range(len(y_df)) :
        sku = y_df['Model Code (v41)'][i]
        revised = sku.split(",")
        code = []
        for j in range(len(revised)) :
            code.append(dic_category[revised[j]]+'_'+y_df[revenue_y][i])   #각 SKU 리스트에 담기
        dataset.append(code) 

    ALL=pd.DataFrame(dataset)[0].apply(lambda x: x.split(sep='_'))
    two_sum=sum([abs(int(i[1])) for i in ALL])

    ######################## 1. MX #####################
    MX =[]
    for s in dataset:
        if all('MX' in x for x in s ):
            MX.append(s)

    AA=pd.DataFrame(MX)[0].apply(lambda x: x.split(sep='_'))
    mx_reve=sum([abs(int(i[1])) for i in AA])
    #print('MX', len(MX))
    #print('MX 비중', len(MX)/len(dataset))
    print('MX 총 매출', mx_reve)
    
    
    ################## 2.MX+CE 함께 구매 ###################
    MX_CE1=[]
    for s in dataset:
        if any('CE' in l for l in s):
            MX_CE1.append(s)

    MX_CE=[]
    for s in MX_CE1:
        if any('MX' in l for l in s):
            MX_CE.append(s)

    BB=pd.DataFrame(MX_CE)[0].apply(lambda x: x.split(sep='_'))
    mx_ce_reve=sum([abs(int(i[1])) for i in BB])
    #print('MX+CE', len(MX_CE))
    #print('MX+CE 비중', len(MX_CE)/len(dataset))
    print('MX+CE 총 매출', mx_ce_reve)

    ###################### 3.CE #########################
    CE =[]
    for s in dataset:
        if all('CE' in x for x in s ):
            CE.append(s)
    CC=pd.DataFrame(CE)[0].apply(lambda x: x.split(sep='_'))
    ce_reve=sum([abs(int(i[1])) for i in CC])
    #print('CE', len(CE))
    #print('CE 비중', len(CE)/len(dataset))
    print('CE 총 매출', ce_reve)
    
    all_rev=df[revenue_y].sum() #전체
    ratio_two=two_sum/df[revenue_y].sum()  #공동구매 비율
    etc=two_sum-(mx_ce_reve + ce_reve + mx_reve)  #기타
    new_df1=[]
    for i in [mx_ce_reve,  ce_reve, mx_reve, ratio_two, all_rev, two_sum, etc]:
        new_df1.append(i)
    new_df.append(new_df1)

    
#데이터 저장3: 데이터 기준
pd.DataFrame(new_df,columns=['CE+MX','CE+CE','MX+MX','공동구매 비율','전체 매출','공동구매(2개이상)','기타'],index=[only_reve[num] for num in range(len(only_reve))]).to_excel(f'{path}/{file_name}/[매출]부문별 통계.xlsx')
pd.DataFrame(new_df,columns=['CE+MX','CE+CE','MX+MX','공동구매 비율','전체 매출','공동구매(2개이상)','기타'])


MX 총 매출 757465000
MX+CE 총 매출 130905100
CE 총 매출 4668864703


Unnamed: 0,CE+MX,CE+CE,MX+MX,공동구매 비율,전체 매출,공동구매(2개이상),기타
0,130905100,4668864703,757465000,0.2,37434451310,5681908163,124673360


### 4. 단독구매 원할 경우, 주문 및 매출 기초통계

In [11]:
new_df=[]

for sample in df.columns[1:]:
    
    all=df[sample].sum()
    one=df[~df[['Model Code (v41)',sample]]['Model Code (v41)'].str.contains(',')][sample].sum()
    two=df[df[['Model Code (v41)',sample]]['Model Code (v41)'].str.contains(',')][sample].sum()
    percent_one=round((df[~df[['Model Code (v41)',sample]]['Model Code (v41)'].str.contains(',')][sample].sum()/all)*100,2)
    percent_two=round((df[df[['Model Code (v41)',sample]]['Model Code (v41)'].str.contains(',')][sample].sum()/all)*100,2)

    
    
    print('전체: ',all)
    print('1개: ',one)
    print('2개이상: ',two)
    print("제품 1개 구매 비중: ",percent_one)
    print("제품 2개이상 구매 비중: ",percent_two)

    new_df1=[]
    for i in [all, one, two, percent_one,percent_two]:
        new_df1.append(i)
    new_df.append(new_df1)

# 데이터 저장3: 데이터 기준
pd.DataFrame(new_df,columns=['전체','1개','2개이상','제품 1개 구매 비중', '제품 2개이상 구매 비중'],
            index=[i[:3] for i in df.columns[1:]]).to_csv(f'{path}/{file_name}/[참고용]주문_매출_기초통계량결과.csv', encoding='utf-8-sig')
pd.DataFrame(new_df,columns=['전체','1개','2개이상','제품 1개 구매 비중', '제품 2개이상 구매 비중'])


전체:  57236
1개:  51000
2개이상:  6236
제품 1개 구매 비중:  89.1
제품 2개이상 구매 비중:  10.9
전체:  37434451310
1개:  31752543147
2개이상:  5681908163
제품 1개 구매 비중:  84.82
제품 2개이상 구매 비중:  15.18


Unnamed: 0,전체,1개,2개이상,제품 1개 구매 비중,제품 2개이상 구매 비중
0,57236,51000,6236,89.1,10.9
1,37434451310,31752543147,5681908163,84.8,15.2


#### 참고) 매출 계산시 2020년도와 2021년에는 float의 형태로 필수로 진행 (2022년도는 패스)

In [None]:
# for i in only_reve[:-1]:
#     df[i]=df[i].astype(str).apply(lambda x: x[:-2])
# df[only_reve[:-1]]

In [None]:
# df['2020_revenue']=df['2020_revenue'].astype(str)
# df