In [0]:
from datetime import datetime, timedelta

# Exercise 1

The file Christmas.txt contains some data about the visits of customers in an e-commerce website during December 2015. In this exercise we are going to save this information in a more Pythonic way which facilitates the analysis of the data. We will use a dictionary of `Customer` - a class we will create especially for this exercise.

**Part 1** - Create the class `Customer` based on the structure given below.


In [0]:
class Customer:
    def __init__(self, cid):
        self.cid = cid
        self.t_login = None
        self.t_logout = None
        self.duration = None
        self.revenue = 0
        
    def process_login_line(self, line, dt_fmt='%Y-%m-%d %H:%M:%S'):
        pass
    
    def process_logout_line(self, line, dt_fmt='%Y-%m-%d %H:%M:%S'):    
        pass
        

**Part 2** - Write a script which reads the content of the file and populates a dictionary `customers` with items of the form `{c_id: Customer}`.

> **Note:** Every customer appears exactly twice in the file. You don’t have to test that.

**Part 3** - Use the dictionary `customers` to answer the following questions:
1. How many customers do I have? How many buyers?
2. Which customer made the biggest purchase?
3.	What percent of the customers stayed more than 10 minutes?
4. What was the date with the highest number of logins?

## Solution

### Part 1

In [0]:
class Customer:
    def __init__(self, cid):
        self.cid = cid
        self.t_login = None
        self.t_logout = None
        self.duration = None
        self.revenue = 0
        
    def process_login_line(self, line, dt_fmt='%Y-%m-%d %H:%M:%S'):
        data = line.split()
        t_login_str = data[0] + ' ' + data[1]
        self.t_login = datetime.strptime(t_login_str, dt_fmt)
    
    def process_logout_line(self, line, dt_fmt='%Y-%m-%d %H:%M:%S'):    
        data = line.split()
        t_logout_str = data[0] + ' ' + data[1]
        self.t_logout = datetime.strptime(t_logout_str, dt_fmt)
        self.duration = self.t_logout - self.t_login
        self.revenue = float(data[-1])

### Part 2

In [3]:
import sys

if 'google.colab' in sys.modules:
    from google.colab import files
    uploaded = files.upload()

Saving christmas.txt to christmas.txt


In [0]:
fname = 'christmas.txt'
customers = {}
with open(fname) as f:
    for line in f:
        data = line.split()
        c_id = int(data[3][1:])
        if c_id not in customers:  # login data
            customers[c_id] = Customer(c_id)
            customers[c_id].process_login_line(line)
        else:  # logout data
            customers[c_id].process_logout_line(line)


### Part 3

#### Question 1

In [16]:
n_customers = len(customers)
n_buyers = len([c for c in customers.values() if c.revenue>0])
print(f'{n_buyers} buyers of {n_customers} customers')

2184 buyers of 9946 customers


#### Question 2

In [18]:
sorted(customers, key=lambda c: customers[c].revenue)[-1]

523239

In [19]:
customers[523239].revenue

611.34

#### Question 3

In [24]:
ten_min = timedelta(minutes=10)
len([c for c in customers.values() if c.duration>ten_min])

3526

#### Question 4

In [0]:
per_date = {}
for c in customers.values():
    c_date = c.t_login.date()
    if c_date not in per_date:
        per_date[c_date] = 1
    else:
        per_date[c_date] += 1

In [28]:
sorted(per_date, key=lambda d: per_date[d])[-1]

datetime.date(2015, 12, 5)