# 과제
아래 가이드에 따라 고객별 연간 구매데이터를 생성하고 아래사항을 수행하기(관측치 1,000개)

## 1. 데이터 생성

In [233]:
import pandas as pd
import numpy as np
import random
from datetime import datetime 
import time

### 1) 고객 데이터 생성

In [234]:
np.random.seed(1)

#고객별 속성 데이터프레임 만들기

cusno=np.arange(1001,2001)

gender=np.random.choice([0,1],size=1000)

age=np.random.randint(10,71,size=1000) # 랜덤 : 주문고객 10세 ~ 70세 

distance=np.random.uniform(0,3,size=1000) # 랜덤 : 반경 15 km 이내 주문 가능

In [235]:
#배열-> 시리즈
cusno=pd.Series(cusno)
gender=pd.Series(gender)
age=pd.Series(age)
distance=pd.Series(distance)

#데이터프레임 만들기
customer=pd.concat([cusno,gender,age,distance],axis=1)

#컬럼명 지정
customer.columns=['cusno','gender','age','distance']

customer.head()

Unnamed: 0,cusno,gender,age,distance
0,1001,1,66,2.614095
1,1002,1,64,2.646252
2,1003,0,14,1.289024
3,1004,0,19,0.821209
4,1005,1,70,2.544889


### 2) 상품 데이터 생성

In [236]:
#빈 데이터프레임 만들기
product =pd.DataFrame(columns=['menu','price'])
menu = ['후라이드-기본','후라이드-스파이시','양념-기본','양념-스파이시','반반']
price = [18000,19000,20000,20000,19000]


product['menu'] = pd.Series(menu)
product['price'] = pd.Series(price)
product

Unnamed: 0,menu,price
0,후라이드-기본,18000
1,후라이드-스파이시,19000
2,양념-기본,20000
3,양념-스파이시,20000
4,반반,19000


###  3) 거래 데이터 생성

In [237]:
#빈 데이터프레임 만들기
purchase=pd.DataFrame(columns=['cusno','quantity'])

In [238]:
np.random.seed(1)
#기본 데이터 만들기

#1. 중복 cusno 만들기
cusno=np.random.randint(1001,2001,size=10000)      #기존 cusno가 중복될 수 있도록 만들기

#2. 기본거래 데이터 만들기(치킨 마리수 별 가중치 부여)

li = []
for i in range(1,51):
    if i == 1 :
        for j in range(400): # 40.0%
            li.append(i)
    elif i== 2 :
        for j in range(200): # 20.0%
            li.append(i)
    elif i == 3 :
        for j in range(150): # 15.0%
            li.append(i)
    elif i < 6 :
        for j in range(75): # 15.0% 
            li.append(i)
    elif i < 11 :
        for j in range(10): #  5.0% 
            li.append(i)
    elif i < 21 :
        for j in range(2): # 2.0%
            li.append(i)
    else :
        for j in range(1): # 3.0%
            li.append(i)

#len(order)            
quantity=np.random.choice(li,size=10000)


In [239]:
#데이터프레임 채우기


purchase['cusno']=cusno
purchase['quantity']=quantity


purchase.head()

Unnamed: 0,cusno,quantity
0,1038,1
1,1236,1
2,1909,1
3,1073,3
4,1768,19


In [240]:
#날짜 데이터 만들기

    
#1.date 
date_list=list(pd.date_range('2020-01-01','2021-12-31',freq='D'))     #랜덤 선택을 위한 date 리스트

purchase['date']=np.random.choice(date_list,size=10000)               # date열 생성. 위 범위에서 랜덤1만개

#2. day(요일) 
purchase['day']=purchase['date'].apply(lambda x: x.weekday())         #date별 요일을 숫자로 나타냄
purchase['day']=purchase['day'].replace(list(range(7)),['월','화','수','목','금','토','일'])

#3. 시간 : 영업시간 오전 11시 ~ 라스트오더 23시
purchase['time']=np.random.randint(11,24,size=10000)

purchase.quantity.value_counts()   # 치킨 마리수별 거래횟수

1     3943
2     2009
3     1549
5      786
4      722
6      116
8      111
7       99
9       92
10      75
11      27
17      27
16      24
12      23
19      18
18      17
25      17
20      17
13      17
14      15
37      14
30      13
33      13
43      13
22      13
31      13
49      12
21      12
15      12
35      11
27      11
40      10
46      10
47      10
41      10
29      10
36      10
39       9
42       9
26       9
23       8
34       8
44       8
50       8
24       7
38       7
48       7
32       7
45       6
28       6
Name: quantity, dtype: int64

In [241]:
# 보기좋게 정렬
purchase = purchase.reindex(columns=['date','day','time','cusno','quantity']) # 컬럼 정렬
purchase = purchase.sort_values(by='date', ascending=True) # 날짜순(오름차순) 정렬
purchase.head()

Unnamed: 0,date,day,time,cusno,quantity
7747,2020-01-01,수,13,1205,20
6169,2020-01-01,수,22,1673,1
820,2020-01-01,수,11,1601,3
2473,2020-01-01,수,13,1295,4
9768,2020-01-01,수,12,1117,2


In [242]:
# 컬럼 추가
order = []
np.random.seed(1)
for i in range(len(purchase.quantity)):
    order.append(np.random.choice(menu,purchase.quantity[i]).tolist())
    
purchase['후라이드-기본'] = pd.Series(np.zeros(10000))
purchase['후라이드-스파이시'] = pd.Series(np.zeros(10000))
purchase['양념-기본'] = pd.Series(np.zeros(10000))
purchase['양념-스파이시'] = pd.Series(np.zeros(10000))
purchase['반반'] = pd.Series(np.zeros(10000))

purchase

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반
7747,2020-01-01,수,13,1205,20,0.0,0.0,0.0,0.0,0.0
6169,2020-01-01,수,22,1673,1,0.0,0.0,0.0,0.0,0.0
820,2020-01-01,수,11,1601,3,0.0,0.0,0.0,0.0,0.0
2473,2020-01-01,수,13,1295,4,0.0,0.0,0.0,0.0,0.0
9768,2020-01-01,수,12,1117,2,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2767,2021-12-31,금,16,1988,1,0.0,0.0,0.0,0.0,0.0
8947,2021-12-31,금,17,1510,4,0.0,0.0,0.0,0.0,0.0
5463,2021-12-31,금,21,1177,3,0.0,0.0,0.0,0.0,0.0
9564,2021-12-31,금,16,1291,5,0.0,0.0,0.0,0.0,0.0


In [243]:
for i in range(len(order)):    
    for j in order[i]:
        if j == '후라이드-기본':
            purchase.loc[i,'후라이드-기본'] += 1
        elif j == '후라이드-스파이시':
            purchase.loc[i,'후라이드-스파이시'] += 1
        elif j == '양념-기본':
            purchase.loc[i,'양념-기본'] += 1
        elif j == '양념-스파이시':
            purchase.loc[i,'양념-스파이시'] += 1
        else :
            purchase.loc[i,'반반'] += 1

In [244]:
purchase

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반
7747,2020-01-01,수,13,1205,20,2.0,4.0,4.0,5.0,5.0
6169,2020-01-01,수,22,1673,1,0.0,1.0,0.0,0.0,0.0
820,2020-01-01,수,11,1601,3,0.0,0.0,1.0,0.0,2.0
2473,2020-01-01,수,13,1295,4,0.0,1.0,2.0,0.0,1.0
9768,2020-01-01,수,12,1117,2,0.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2767,2021-12-31,금,16,1988,1,0.0,1.0,0.0,0.0,0.0
8947,2021-12-31,금,17,1510,4,1.0,2.0,0.0,1.0,0.0
5463,2021-12-31,금,21,1177,3,0.0,1.0,1.0,1.0,0.0
9564,2021-12-31,금,16,1291,5,2.0,1.0,1.0,0.0,1.0


In [245]:
purchase['amount'] = purchase['후라이드-기본']*18000 + purchase['후라이드-스파이시']*19000 + purchase['양념-기본']*20000 + purchase['양념-스파이시']*20000 + purchase['반반']*19000

In [246]:
purchase

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount
7747,2020-01-01,수,13,1205,20,2.0,4.0,4.0,5.0,5.0,387000.0
6169,2020-01-01,수,22,1673,1,0.0,1.0,0.0,0.0,0.0,19000.0
820,2020-01-01,수,11,1601,3,0.0,0.0,1.0,0.0,2.0,58000.0
2473,2020-01-01,수,13,1295,4,0.0,1.0,2.0,0.0,1.0,78000.0
9768,2020-01-01,수,12,1117,2,0.0,1.0,1.0,0.0,0.0,39000.0
...,...,...,...,...,...,...,...,...,...,...,...
2767,2021-12-31,금,16,1988,1,0.0,1.0,0.0,0.0,0.0,19000.0
8947,2021-12-31,금,17,1510,4,1.0,2.0,0.0,1.0,0.0,76000.0
5463,2021-12-31,금,21,1177,3,0.0,1.0,1.0,1.0,0.0,59000.0
9564,2021-12-31,금,16,1291,5,2.0,1.0,1.0,0.0,1.0,94000.0


---
---

In [247]:
# 주말 가중치 부여
date_list=list(pd.date_range('2020-01-01','2021-12-31',freq='D'))     #랜덤 선택을 위한 date 리스트

fri1 = list(pd.date_range('2020-01-01','2021-12-31', freq='W-FRI'))
sat1 = list(pd.date_range('2020-01-01','2021-12-31', freq='W-SAT'))
sun1 = list(pd.date_range('2020-01-01','2021-12-31', freq='W-SUN'))

In [248]:
date_list.extend(fri1)
date_list.extend(sat1)
date_list.extend(sun1)

In [249]:
len(date_list)

1044

In [250]:
df2['date']=np.random.choice(date_list,size=10000)               # date열 생성. 위 범위에서 랜덤1만개
df2['day']=df2['date'].apply(lambda x: x.weekday())         #date별 요일을 숫자로 나타냄
df2['day']=df2['day'].replace(list(range(7)),['월','화','수','목','금','토','일'])
df2

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
0,2021-01-16,토,13,1205,20,2.0,4.0,4.0,5.0,5.0,387000.0,1,28,1.910030
1,2020-11-24,화,23,1205,1,0.0,0.0,0.0,1.0,0.0,20000.0,1,28,1.910030
2,2020-03-16,월,16,1205,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.910030
3,2020-12-17,목,11,1205,2,0.0,1.0,0.0,0.0,1.0,38000.0,1,28,1.910030
4,2021-05-09,일,17,1205,1,0.0,0.0,1.0,0.0,0.0,20000.0,1,28,1.910030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2021-09-17,금,20,1928,1,1.0,0.0,0.0,0.0,0.0,18000.0,1,60,0.885712
9996,2020-06-16,화,21,1928,1,0.0,0.0,0.0,0.0,1.0,19000.0,1,60,0.885712
9997,2021-06-25,금,21,1928,5,2.0,2.0,1.0,0.0,0.0,94000.0,1,60,0.885712
9998,2021-07-24,토,15,1928,6,2.0,2.0,0.0,1.0,1.0,113000.0,1,60,0.885712


In [251]:
df2.day.value_counts()

일    2051
토    2033
금    2008
목    1029
수    1013
월     937
화     929
Name: day, dtype: int64

In [152]:
df2 =df.copy()
df2

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
0,2020-01-01,수,13,1205,20,2.0,4.0,4.0,5.0,5.0,387000.0,1,28,1.910030
1,2020-04-28,화,23,1205,1,0.0,0.0,0.0,1.0,0.0,20000.0,1,28,1.910030
2,2020-05-11,월,16,1205,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.910030
3,2020-06-23,화,11,1205,2,0.0,1.0,0.0,0.0,1.0,38000.0,1,28,1.910030
4,2020-06-30,화,17,1205,1,0.0,0.0,1.0,0.0,0.0,20000.0,1,28,1.910030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2021-05-29,토,20,1928,1,1.0,0.0,0.0,0.0,0.0,18000.0,1,60,0.885712
9996,2021-06-07,월,21,1928,1,0.0,0.0,0.0,0.0,1.0,19000.0,1,60,0.885712
9997,2021-06-11,금,21,1928,5,2.0,2.0,1.0,0.0,0.0,94000.0,1,60,0.885712
9998,2021-07-16,금,15,1928,6,2.0,2.0,0.0,1.0,1.0,113000.0,1,60,0.885712


In [155]:
# df2.groupby('day')

aaa = purchase.groupby('day')

In [156]:
for key, group in aaa : 
    print('요일 :', key)  # 지역 숫자로 출력
    print('요일수:', len(group))  # 거래횟수 요약출력
    print(group.head())

요일 : 금
요일수: 1388
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
2473 2021-06-18   금    13   1295         4      0.0        2.0    4.0   
9768 2020-10-02   금    12   1117         2      0.0        2.0    3.0   
470  2021-07-30   금    18   1250         5      0.0        3.0    0.0   
759  2020-12-11   금    20   1543         1      0.0        0.0    0.0   
663  2021-04-09   금    13   1534         8      3.0        3.0    3.0   

      양념-스파이시   반반    amount  
2473      0.0  4.0   78000.0  
9768      0.0  0.0   39000.0  
470       2.0  5.0   96000.0  
759       2.0  0.0   20000.0  
663       2.0  6.0  153000.0  
요일 : 목
요일수: 1438
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
9500 2020-10-08   목    19   1117         3      5.0        0.0    0.0   
8476 2021-07-08   목    23   1125         4      4.0        0.0    0.0   
9427 2021-05-13   목    20   1613         1      0.0        0.0    3.0   
9670 2021-08-05   목    13   1728         2      0

In [103]:
customer.head()

Unnamed: 0,cusno,gender,age,distance
0,1001,1,66,2.614095
1,1002,1,64,2.646252
2,1003,0,14,1.289024
3,1004,0,19,0.821209
4,1005,1,70,2.544889


---

In [104]:
product # 가격표

Unnamed: 0,menu,price
0,후라이드-기본,18000
1,후라이드-스파이시,19000
2,양념-기본,20000
3,양념-스파이시,20000
4,반반,19000


In [105]:
#데이터프레임 통합: 공통 변수 cusno를 활용해 데이터 결합

df=purchase.merge(customer,on='cusno',how='outer')
df.head(10)

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
0,2020-01-01,수,13,1205,20,2.0,4.0,4.0,5.0,5.0,387000.0,1,28,1.91003
1,2020-04-28,화,23,1205,1,0.0,0.0,0.0,1.0,0.0,20000.0,1,28,1.91003
2,2020-05-11,월,16,1205,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.91003
3,2020-06-23,화,11,1205,2,0.0,1.0,0.0,0.0,1.0,38000.0,1,28,1.91003
4,2020-06-30,화,17,1205,1,0.0,0.0,1.0,0.0,0.0,20000.0,1,28,1.91003
5,2020-08-11,화,23,1205,2,0.0,0.0,0.0,1.0,1.0,39000.0,1,28,1.91003
6,2020-11-13,금,13,1205,1,0.0,0.0,0.0,1.0,0.0,20000.0,1,28,1.91003
7,2021-05-31,월,16,1205,5,2.0,0.0,0.0,1.0,2.0,94000.0,1,28,1.91003
8,2021-06-26,토,16,1205,4,0.0,1.0,0.0,3.0,0.0,79000.0,1,28,1.91003
9,2021-08-07,토,13,1205,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.91003


In [107]:
# 날짜순 나열
df.sort_values(by='date', ascending=True)

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
0,2020-01-01,수,13,1205,20,2.0,4.0,4.0,5.0,5.0,387000.0,1,28,1.910030
76,2020-01-01,수,23,1199,1,0.0,0.0,0.0,0.0,1.0,19000.0,1,18,0.288805
87,2020-01-01,수,17,1982,5,0.0,0.0,3.0,2.0,0.0,100000.0,0,11,1.573302
98,2020-01-01,수,14,1104,1,0.0,0.0,1.0,0.0,0.0,20000.0,0,48,2.559929
40,2020-01-01,수,18,1250,5,0.0,2.0,0.0,1.0,2.0,96000.0,1,44,2.603963
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4412,2021-12-31,금,16,1978,3,0.0,2.0,1.0,0.0,0.0,58000.0,1,66,1.981629
7188,2021-12-31,금,18,1853,1,1.0,0.0,0.0,0.0,0.0,18000.0,0,55,1.119325
8017,2021-12-31,금,22,1322,3,0.0,1.0,0.0,1.0,1.0,58000.0,1,65,1.985203
518,2021-12-31,금,12,1119,1,0.0,0.0,0.0,0.0,1.0,19000.0,0,48,2.079442


In [108]:
# 고객 1000명 기준 나열  (고객번호 1001~2000)
df.sort_values(by='cusno', ascending=True)

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
5808,2020-03-01,일,23,1001,4,1.0,0.0,1.0,2.0,0.0,78000.0,1,66,2.614095
5810,2020-08-14,금,11,1001,3,0.0,1.0,0.0,2.0,0.0,59000.0,1,66,2.614095
5811,2020-08-20,목,17,1001,2,0.0,0.0,0.0,1.0,1.0,39000.0,1,66,2.614095
5812,2020-12-17,목,12,1001,2,0.0,0.0,2.0,0.0,0.0,40000.0,1,66,2.614095
5813,2021-06-01,화,14,1001,1,0.0,0.0,1.0,0.0,0.0,20000.0,1,66,2.614095
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8577,2021-07-29,목,16,2000,1,0.0,0.0,1.0,0.0,0.0,20000.0,0,23,2.910026
8579,2021-08-22,일,20,2000,1,0.0,0.0,0.0,0.0,1.0,19000.0,0,23,2.910026
8580,2021-11-10,수,20,2000,2,1.0,0.0,0.0,1.0,0.0,38000.0,0,23,2.910026
8572,2021-06-12,토,12,2000,1,0.0,1.0,0.0,0.0,0.0,19000.0,0,23,2.910026


In [109]:
# 2020년 데이터
df[df['date'].dt.year == 2020]

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
0,2020-01-01,수,13,1205,20,2.0,4.0,4.0,5.0,5.0,387000.0,1,28,1.910030
1,2020-04-28,화,23,1205,1,0.0,0.0,0.0,1.0,0.0,20000.0,1,28,1.910030
2,2020-05-11,월,16,1205,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.910030
3,2020-06-23,화,11,1205,2,0.0,1.0,0.0,0.0,1.0,38000.0,1,28,1.910030
4,2020-06-30,화,17,1205,1,0.0,0.0,1.0,0.0,0.0,20000.0,1,28,1.910030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9938,2020-11-20,금,13,1409,3,1.0,0.0,0.0,2.0,0.0,58000.0,0,30,1.315616
9942,2020-12-14,월,15,1906,5,0.0,2.0,1.0,0.0,2.0,96000.0,0,49,0.967072
9948,2020-12-15,화,19,1670,2,0.0,0.0,0.0,1.0,1.0,39000.0,1,52,0.621244
9952,2020-12-24,목,16,1091,1,0.0,0.0,1.0,0.0,0.0,20000.0,0,65,1.577723


In [110]:
# 2021년 데이터 
thisyear = df[df['date'].dt.year == 2021]
thisyear

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
7,2021-05-31,월,16,1205,5,2.0,0.0,0.0,1.0,2.0,94000.0,1,28,1.910030
8,2021-06-26,토,16,1205,4,0.0,1.0,0.0,3.0,0.0,79000.0,1,28,1.910030
9,2021-08-07,토,13,1205,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.910030
10,2021-12-28,화,22,1205,1,0.0,0.0,0.0,0.0,1.0,19000.0,1,28,1.910030
14,2021-05-06,목,12,1673,1,0.0,1.0,0.0,0.0,0.0,19000.0,0,22,1.101119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2021-05-29,토,20,1928,1,1.0,0.0,0.0,0.0,0.0,18000.0,1,60,0.885712
9996,2021-06-07,월,21,1928,1,0.0,0.0,0.0,0.0,1.0,19000.0,1,60,0.885712
9997,2021-06-11,금,21,1928,5,2.0,2.0,1.0,0.0,0.0,94000.0,1,60,0.885712
9998,2021-07-16,금,15,1928,6,2.0,2.0,0.0,1.0,1.0,113000.0,1,60,0.885712


In [111]:
len(set(thisyear.cusno))

992

In [112]:
## 혼동주의.
# 이 식을 데이터프레임 a에 지정해야함
# 그러면 작년 대비 올해 치킨주문량 상승
for i in range(len(order)):    
    for j in order[i]:
        if j == '후라이드-기본':
            purchase.loc[i,'후라이드-기본'] += 2
        elif j == '후라이드-스파이시':
            purchase.loc[i,'후라이드-스파이시'] += 1
        elif j == '양념-기본':
            purchase.loc[i,'양념-기본'] += 2
        elif j == '양념-스파이시':
            purchase.loc[i,'양념-스파이시'] += 1
        else :
            purchase.loc[i,'반반'] += 3

In [113]:
mon = df[df['day'] == '월']
mon

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
2,2020-05-11,월,16,1205,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.910030
7,2021-05-31,월,16,1205,5,2.0,0.0,0.0,1.0,2.0,94000.0,1,28,1.910030
21,2020-05-25,월,21,1601,3,1.0,2.0,0.0,0.0,0.0,56000.0,0,37,2.152954
27,2020-03-09,월,17,1295,2,1.0,0.0,0.0,0.0,1.0,37000.0,0,36,1.255237
35,2020-05-04,월,11,1117,1,0.0,0.0,0.0,0.0,1.0,19000.0,0,11,0.926378
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9969,2021-10-25,월,19,1946,2,2.0,0.0,0.0,0.0,0.0,36000.0,1,39,2.477090
9984,2021-03-22,월,11,1170,5,1.0,3.0,0.0,1.0,0.0,95000.0,0,14,2.543157
9990,2021-05-31,월,22,1715,2,1.0,1.0,0.0,0.0,0.0,37000.0,1,25,1.439544
9991,2021-06-21,월,19,1715,5,3.0,0.0,1.0,0.0,1.0,93000.0,1,25,1.439544


In [114]:
while not mon :
    for i in range(len(order)):    
    for j in order[i]:
        if j == '후라이드-기본':
            purchase.loc[i,'후라이드-기본'] += 2
        elif j == '후라이드-스파이시':
            purchase.loc[i,'후라이드-스파이시'] += 2
        elif j == '양념-기본':
            purchase.loc[i,'양념-기본'] += 2
        elif j == '양념-스파이시':
            purchase.loc[i,'양념-스파이시'] += 2
        else :
            purchase.loc[i,'반반'] += 2

IndentationError: expected an indented block (417009572.py, line 3)

In [115]:
a = df.pivot_table(index= ['cusno'], values = 'amount', aggfunc = 'sum')
a

Unnamed: 0_level_0,amount
cusno,Unnamed: 1_level_1
1001,618000.0
1002,829000.0
1003,1226000.0
1004,659000.0
1005,823000.0
...,...
1996,370000.0
1997,424000.0
1998,323000.0
1999,406000.0


In [116]:
a.max()

amount    3216000.0
dtype: float64

In [118]:
df2.set_index('cusno') # 고객넘버 인덱스로

Unnamed: 0_level_0,date,day,time,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
cusno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1205,2020-01-01,수,13,20,2.0,4.0,4.0,5.0,5.0,387000.0,1,28,1.910030
1205,2020-04-28,화,23,1,0.0,0.0,0.0,1.0,0.0,20000.0,1,28,1.910030
1205,2020-05-11,월,16,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.910030
1205,2020-06-23,화,11,2,0.0,1.0,0.0,0.0,1.0,38000.0,1,28,1.910030
1205,2020-06-30,화,17,1,0.0,0.0,1.0,0.0,0.0,20000.0,1,28,1.910030
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1928,2021-05-29,토,20,1,1.0,0.0,0.0,0.0,0.0,18000.0,1,60,0.885712
1928,2021-06-07,월,21,1,0.0,0.0,0.0,0.0,1.0,19000.0,1,60,0.885712
1928,2021-06-11,금,21,5,2.0,2.0,1.0,0.0,0.0,94000.0,1,60,0.885712
1928,2021-07-16,금,15,6,2.0,2.0,0.0,1.0,1.0,113000.0,1,60,0.885712


In [129]:
# df2['fri']= 
# df2['sat']= 
# df2['sun']= 
# df2.head()
# df2.groupby('day')
df2.pivot_table(index= ['cusno'], values = 'amount', aggfunc = 'sum')


In [136]:
# 2년치 주말 데이터 
fri = df2[(df2['day']) == '금']
sat = df2[(df2['day']) == '토']
sun = df2[(df2['day']) == '일']


fri.set_index('day')
sat.set_index('day')
sun.set_index('day')

weekend = pd.concat([fri,sat,sun])
weekend

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
6,2020-11-13,금,13,1205,1,0.0,0.0,0.0,1.0,0.0,20000.0,1,28,1.910030
13,2020-10-16,금,22,1673,5,0.0,2.0,0.0,2.0,1.0,97000.0,0,22,1.101119
22,2021-05-28,금,14,1601,1,0.0,0.0,1.0,0.0,0.0,20000.0,0,37,2.152954
23,2021-07-30,금,19,1601,31,6.0,7.0,6.0,9.0,3.0,598000.0,0,37,2.152954
32,2021-12-10,금,20,1295,1,1.0,0.0,0.0,0.0,0.0,18000.0,0,36,1.255237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9975,2021-02-07,일,21,1938,4,1.0,0.0,0.0,1.0,2.0,76000.0,0,48,0.013912
9976,2021-03-21,일,11,1938,22,1.0,5.0,6.0,3.0,7.0,426000.0,0,48,0.013912
9978,2021-10-17,일,14,1938,5,1.0,0.0,1.0,0.0,3.0,95000.0,0,48,0.013912
9979,2021-02-21,일,11,1430,1,0.0,0.0,0.0,0.0,1.0,19000.0,0,28,0.565942


In [137]:
# 2년치 평일 데이터
mon = df2[(df2['day']) == '월']
tue = df2[(df2['day']) == '화']
wed = df2[(df2['day']) == '수']
thu = df2[(df2['day']) == '목']


mon.set_index('day')
tue.set_index('day')
wed.set_index('day')
thu.set_index('day')

weekday = pd.concat([mon, tue, wed, thu])
weekday

Unnamed: 0,date,day,time,cusno,quantity,후라이드-기본,후라이드-스파이시,양념-기본,양념-스파이시,반반,amount,gender,age,distance
2,2020-05-11,월,16,1205,1,0.0,1.0,0.0,0.0,0.0,19000.0,1,28,1.910030
7,2021-05-31,월,16,1205,5,2.0,0.0,0.0,1.0,2.0,94000.0,1,28,1.910030
21,2020-05-25,월,21,1601,3,1.0,2.0,0.0,0.0,0.0,56000.0,0,37,2.152954
27,2020-03-09,월,17,1295,2,1.0,0.0,0.0,0.0,1.0,37000.0,0,36,1.255237
35,2020-05-04,월,11,1117,1,0.0,0.0,0.0,0.0,1.0,19000.0,0,11,0.926378
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9952,2020-12-24,목,16,1091,1,0.0,0.0,1.0,0.0,0.0,20000.0,0,65,1.577723
9967,2021-10-21,목,12,1946,1,0.0,0.0,0.0,1.0,0.0,20000.0,1,39,2.477090
9970,2021-12-23,목,13,1946,2,0.0,0.0,1.0,1.0,0.0,40000.0,1,39,2.477090
9977,2021-08-12,목,13,1938,1,0.0,0.0,0.0,0.0,1.0,19000.0,0,48,0.013912


In [None]:
np.random.seed(1)
#기본 데이터 만들기

#2. 기본거래 데이터 만들기(주말 가중치 부여)

li = []
for i in range():
    if i == 0 :                            # 월요일
        for j in range(400): # 40.0%
            li.append(i)
    elif i== 1 :
        for j in range(200): # 20.0%
            li.append(i)
    elif i== 2 :
        for j in range(150): # 15.0%
            li.append(i)
    elif i== 3 :
        for j in range(75): # 15.0% 
            li.append(i)
    elif i== 4 :
        for j in range(10): #  5.0% 
            li.append(i)
    elif i== 5 :
        for j in range(2): # 2.0%
            li.append(i)
    elif i== 6 :
        for j in range(2): # 2.0%
            li.append(i)

    else :
        for j in range(1): # 3.0%
            li.append(i)

#len(order)            
quantity=np.random.choice(li,size=10000)


요일 : 2020-01-01 00:00:00
요일수: 11
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
9221 2020-01-01   수    18   1658         1      0.0        0.0    0.0   
6410 2020-01-01   수    21   1627         1      0.0        0.0    0.0   
1082 2020-01-01   수    17   1434         3      3.0        0.0    3.0   
3220 2020-01-01   수    13   1746         1      0.0        2.0    0.0   
7494 2020-01-01   수    19   1328         1      0.0        0.0    0.0   

      양념-스파이시   반반   amount  
9221      2.0  0.0  20000.0  
6410      0.0  4.0  19000.0  
1082      0.0  4.0  57000.0  
3220      0.0  0.0  19000.0  
7494      0.0  4.0  19000.0  
요일 : 2020-01-02 00:00:00
요일수: 14
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
5752 2020-01-02   목    15   1679         4      3.0        0.0    3.0   
7970 2020-01-02   목    23   1992         1      0.0        0.0    0.0   
8294 2020-01-02   목    17   1054         1      0.0        2.0    0.0   
8536 2020-01-02   목    

           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
2874 2020-03-03   화    12   1175         3      3.0        2.0    0.0   
8962 2020-03-03   화    15   1188         1      0.0        2.0    0.0   
8253 2020-03-03   화    17   1157         1      3.0        0.0    0.0   
834  2020-03-03   화    17   1945         1      0.0        0.0    0.0   
1261 2020-03-03   화    16   1137         4      0.0        2.0    0.0   

      양념-스파이시   반반   amount  
2874      0.0  4.0  56000.0  
8962      0.0  0.0  19000.0  
8253      0.0  0.0  18000.0  
834       0.0  4.0  19000.0  
1261      2.0  5.0  77000.0  
요일 : 2020-03-04 00:00:00
요일수: 9
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
7483 2020-03-04   수    15   1825         4      3.0        0.0    4.0   
4279 2020-03-04   수    13   1884         6      0.0        4.0    0.0   
1922 2020-03-04   수    16   1741        29      7.0        4.0    9.0   
273  2020-03-04   수    14   1254         5      3.0      

요일수: 13
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
2875 2020-07-02   목    19   1831        25      5.0        6.0    9.0   
7133 2020-07-02   목    19   1913         1      3.0        0.0    0.0   
1442 2020-07-02   목    19   1323         2      3.0        0.0    3.0   
7294 2020-07-02   목    12   1525         3      0.0        3.0    0.0   
3841 2020-07-02   목    17   1778         1      0.0        0.0    3.0   

      양념-스파이시   반반    amount  
2875      8.0  6.0  486000.0  
7133      0.0  0.0   18000.0  
1442      0.0  0.0   38000.0  
7294      2.0  0.0   58000.0  
3841      0.0  0.0   20000.0  
요일 : 2020-07-03 00:00:00
요일수: 19
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
9112 2020-07-03   금    11   1119         2      3.0        0.0    0.0   
1436 2020-07-03   금    15   1356         1      0.0        2.0    0.0   
3577 2020-07-03   금    23   1106         3      4.0        0.0    0.0   
6017 2020-07-03   금    14   1612         1

요일수: 15
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
9041 2020-10-12   월    11   1816         3      3.0        2.0    0.0   
3839 2020-10-12   월    19   1648         4      0.0        0.0    0.0   
2829 2020-10-12   월    14   1581         2      3.0        2.0    0.0   
2542 2020-10-12   월    19   1524         5      3.0        0.0    3.0   
7514 2020-10-12   월    11   1923         2      3.0        0.0    0.0   

      양념-스파이시   반반   amount  
9041      2.0  0.0  57000.0  
3839      3.0  5.0  78000.0  
2829      0.0  0.0  37000.0  
2542      3.0  4.0  97000.0  
7514      0.0  4.0  37000.0  
요일 : 2020-10-13 00:00:00
요일수: 17
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
5420 2020-10-13   화    15   1289         3      0.0        2.0    0.0   
7621 2020-10-13   화    11   1489         1      3.0        0.0    0.0   
3332 2020-10-13   화    13   1984         1      0.0        0.0    3.0   
8077 2020-10-13   화    16   1642         1      

1461      0.0  4.0  19000.0  
요일 : 2020-11-27 00:00:00
요일수: 14
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
2916 2020-11-27   금    20   1537         2      0.0        0.0    0.0   
5456 2020-11-27   금    18   1317         4      4.0        2.0    0.0   
9466 2020-11-27   금    13   1473         1      0.0        0.0    3.0   
8550 2020-11-27   금    17   1224         1      0.0        0.0    0.0   
2905 2020-11-27   금    14   1860         1      3.0        0.0    0.0   

      양념-스파이시   반반   amount  
2916      2.0  4.0  39000.0  
5456      2.0  0.0  75000.0  
9466      0.0  0.0  20000.0  
8550      2.0  0.0  20000.0  
2905      0.0  0.0  18000.0  
요일 : 2020-11-28 00:00:00
요일수: 10
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
7702 2020-11-28   토    16   1961         1      0.0        0.0    3.0   
784  2020-11-28   토    16   1748         1      0.0        0.0    0.0   
374  2020-11-28   토    14   1052         1      3.0        0.0    

           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
6324 2021-02-11   목    11   1197         1      0.0        0.0    3.0   
6170 2021-02-11   목    23   1653         4      0.0        0.0    4.0   
7450 2021-02-11   목    21   1411         3      3.0        0.0    3.0   
8551 2021-02-11   목    17   1569         1      0.0        0.0    0.0   
20   2021-02-11   목    18   1255         1      0.0        0.0    3.0   

      양념-스파이시   반반   amount  
6324      0.0  0.0  20000.0  
6170      2.0  4.0  79000.0  
7450      0.0  4.0  57000.0  
8551      2.0  0.0  20000.0  
20        0.0  0.0  20000.0  
요일 : 2021-02-12 00:00:00
요일수: 13
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
1992 2021-02-12   금    13   1143         9      4.0        3.0    3.0   
4276 2021-02-12   금    22   1003         1      0.0        0.0    0.0   
8460 2021-02-12   금    11   1205         2      0.0        2.0    0.0   
2527 2021-02-12   금    18   1776         2      0.0     

요일 : 2021-04-16 00:00:00
요일수: 10
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
694  2021-04-16   금    19   1806         3      3.0        0.0    4.0   
4322 2021-04-16   금    22   1221         1      0.0        0.0    3.0   
70   2021-04-16   금    17   1027         1      0.0        0.0    0.0   
1778 2021-04-16   금    15   1729         5      0.0        2.0    3.0   
7465 2021-04-16   금    18   1109         2      3.0        0.0    3.0   

      양념-스파이시   반반   amount  
694       0.0  0.0  58000.0  
4322      0.0  0.0  20000.0  
70        2.0  0.0  20000.0  
1778      2.0  5.0  97000.0  
7465      0.0  0.0  38000.0  
요일 : 2021-04-17 00:00:00
요일수: 10
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
6268 2021-04-17   토    13   1879         1      0.0        0.0    0.0   
5209 2021-04-17   토    20   1520         3      0.0        2.0    3.0   
2940 2021-04-17   토    18   1765         5      3.0        2.0    3.0   
8684 2021-04-17   토    

           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
7304 2021-06-20   일    22   1506         3      0.0        2.0    3.0   
7192 2021-06-20   일    13   1880         2      3.0        2.0    0.0   
6697 2021-06-20   일    13   1032         3      3.0        2.0    0.0   
3157 2021-06-20   일    21   1362         2      3.0        0.0    3.0   
6113 2021-06-20   일    17   1314         1      0.0        0.0    0.0   

      양념-스파이시   반반   amount  
7304      0.0  4.0  58000.0  
7192      0.0  0.0  37000.0  
6697      2.0  0.0  57000.0  
3157      0.0  0.0  38000.0  
6113      0.0  4.0  19000.0  
요일 : 2021-06-21 00:00:00
요일수: 14
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
4592 2021-06-21   월    17   1565         3      0.0        3.0    3.0   
3585 2021-06-21   월    19   1807         1      0.0        0.0    0.0   
9402 2021-06-21   월    21   1251         4      0.0        3.0    3.0   
405  2021-06-21   월    16   1926         3      3.0     

           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
1444 2021-08-17   화    11   1033         3      0.0        0.0    3.0   
8503 2021-08-17   화    14   1649         1      0.0        0.0    0.0   
1413 2021-08-17   화    16   1747         3      4.0        0.0    0.0   
3876 2021-08-17   화    21   1475         1      0.0        0.0    3.0   
7903 2021-08-17   화    20   1902         1      0.0        0.0    3.0   

      양념-스파이시   반반   amount  
1444      2.0  4.0  59000.0  
8503      2.0  0.0  20000.0  
1413      0.0  4.0  55000.0  
3876      0.0  0.0  20000.0  
7903      0.0  0.0  20000.0  
요일 : 2021-08-18 00:00:00
요일수: 10
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
3391 2021-08-18   수    13   1203         1      0.0        0.0    3.0   
3294 2021-08-18   수    16   1038         3      0.0        0.0    4.0   
7318 2021-08-18   수    16   1756         4      0.0        0.0    4.0   
5876 2021-08-18   수    18   1142         1      0.0     

           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
7045 2021-11-02   화    11   1566         5      4.0        2.0    0.0   
4486 2021-11-02   화    18   1196         1      0.0        0.0    0.0   
9871 2021-11-02   화    12   1808         1      3.0        0.0    0.0   
8347 2021-11-02   화    22   1438         2      3.0        0.0    0.0   
5547 2021-11-02   화    19   1203         1      0.0        0.0    0.0   

      양념-스파이시   반반   amount  
7045      2.0  4.0  94000.0  
4486      2.0  0.0  20000.0  
9871      0.0  0.0  18000.0  
8347      0.0  4.0  37000.0  
5547      0.0  4.0  19000.0  
요일 : 2021-11-03 00:00:00
요일수: 16
           date day  time  cusno  quantity  후라이드-기본  후라이드-스파이시  양념-기본  \
1130 2021-11-03   수    11   1051         2      0.0        2.0    3.0   
9830 2021-11-03   수    14   1167         1      0.0        0.0    0.0   
6395 2021-11-03   수    17   1693         1      3.0        0.0    0.0   
9899 2021-11-03   수    19   1970        16      3.0     