In [None]:
!pip install featuretools



In [None]:
import featuretools as ft
import pandas as pd

##Load Dataset

In [None]:
# Create DataFrames for customers, products, orders, and order details

customers = {
    "CustomerID": [101, 102, 103, 104, 105],
    "Name": ['John Doe', 'Jane Smith', 'Mike Jordan', 'Emma Stone', 'Chris Evans'],
    "Email": ['john.doe@example.com', 'jane.smith@example.com', 'mike.jordan@example.com', 'emma.stone@example.com', 'chris.evans@example.com'],
    "SignupDate": ['2023-01-10', '2023-01-15', '2023-01-20', '2023-02-01', '2023-02-05']
}

products = {
    'ProductID': [201, 202, 203, 204, 205],
    'Name': ['Laptop', 'Tablet', 'Smartphone', 'Headphones', 'Smartwatch'],
    'Category': ['Electronics', 'Electronics', 'Electronics', 'Accessories', 'Electronics'],
    'Price': [1000, 500, 800, 200, 300]
}

orders = {
    "OrderID": [301, 302, 303, 304, 305, 306, 307],
    "CustomerID": [101, 102, 103, 104, 105, 101, 102],
    "OrderDate": ['2023-02-01', '2023-02-05', '2023-02-10', '2023-02-12', '2023-02-15', '2023-02-20', '2023-02-22'],
    "ShipDate": ['2023-02-03', '2023-02-07', '2023-02-12', '2023-02-14', '2023-02-17', '2023-02-22', '2023-02-25']
}

order_details = {
    "OrderID": [301, 302, 303, 304, 305, 306, 307, 304, 306, 307],
    "ProductID": [201, 202, 203, 204, 205, 201, 202, 203, 204, 205],
    "Quantity": [1, 2, 1, 1, 3, 2, 1, 1, 2, 1],
    "Discount": [0, 0.1, 0, 0.05, 0.15, 0, 0.1, 0.05, 0, 0.1]
}

customers_df = pd.DataFrame(customers)
products_df = pd.DataFrame(products)
orders_df = pd.DataFrame(orders)
order_details_df = pd.DataFrame(order_details)

# Adding a combined key in the order details
order_details_df['OrderDetailsID'] = order_details_df['OrderID'].astype(str) + '_' + order_details_df['ProductID'].astype(str)


## Entity Set

In [None]:
# creating and entity set 'entitySet'
entitySet = ft.EntitySet(id="e-commerce_es")

#Add DataFrame to the EntitySet
#Customers
customers_entity = entitySet.add_dataframe(
    dataframe=customers_df,  #  Dataframe containing the data.
    dataframe_name='Customers',  # Unique name to associate with this dataframe
    index='CustomerID',  # Unique identifier
    time_index='SignupDate'
)

#Products
products_entity = entitySet.add_dataframe(
    dataframe=products_df,
    dataframe_name='Products',
    index='ProductID'
)

#Orders
orders_entity = entitySet.add_dataframe(
    dataframe=orders_df,
    dataframe_name='Orders',
    index='OrderID' ,
    time_index='OrderDate',
)

#OrdersDetails
orderDetails_entity = entitySet.add_dataframe(
    dataframe=order_details_df,
    dataframe_name='OrderDetails',
    index='OrderDetailsID'
)

  pd.to_datetime(
  pd.to_datetime(
  pd.to_datetime(
  pd.to_datetime(
  pd.to_datetime(
  pd.to_datetime(
  pd.to_datetime(
  pd.to_datetime(
  pd.to_datetime(
  pd.to_datetime(


####Establish relationship

In [None]:
# Define relationships
entitySet.add_relationship(parent_dataframe_name='Customers', parent_column_name='CustomerID',
                    child_dataframe_name='Orders', child_column_name='CustomerID')

entitySet.add_relationship(parent_dataframe_name='Products', parent_column_name='ProductID',
                    child_dataframe_name='OrderDetails', child_column_name='ProductID')

entitySet.add_relationship(parent_dataframe_name='Orders', parent_column_name='OrderID',
                    child_dataframe_name='OrderDetails', child_column_name='OrderID')

Entityset: e-commerce_es
  DataFrames:
    Customers [Rows: 5, Columns: 4]
    Products [Rows: 5, Columns: 4]
    Orders [Rows: 7, Columns: 4]
    OrderDetails [Rows: 10, Columns: 5]
  Relationships:
    Orders.CustomerID -> Customers.CustomerID
    OrderDetails.ProductID -> Products.ProductID
    OrderDetails.OrderID -> Orders.OrderID

####check entity

In [None]:
print(entitySet)

Entityset: e-commerce_es
  DataFrames:
    Customers [Rows: 5, Columns: 4]
    Products [Rows: 5, Columns: 4]
    Orders [Rows: 7, Columns: 4]
    OrderDetails [Rows: 10, Columns: 5]
  Relationships:
    Orders.CustomerID -> Customers.CustomerID
    OrderDetails.ProductID -> Products.ProductID
    OrderDetails.OrderID -> Orders.OrderID


#Deep Feature Synthesis (DFS)

In [None]:
# Generate features with DFS for Orders fact table
feature_matrix, feature_defs = ft.dfs(
    entityset=entitySet,
    target_dataframe_name="Orders",
    verbose=True,
    max_depth=3,
    agg_primitives=['sum', 'mean', 'count', 'max', 'min'],
    trans_primitives=['month', 'year', 'weekday']
)


Built 95 features
Elapsed: 00:00 | Progress:  30%|██▉       

  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)


Elapsed: 00:00 | Progress: 100%|██████████


In [None]:
# PRINT THE GENERATED FEATURE MATRIX
print(feature_matrix.head())
print(feature_defs)

         CustomerID  COUNT(OrderDetails)  MAX(OrderDetails.Discount)  \
OrderID                                                                
301             101                    1                        0.00   
302             102                    1                        0.10   
303             103                    1                        0.00   
304             104                    2                        0.05   
305             105                    1                        0.15   

         MAX(OrderDetails.Quantity)  MEAN(OrderDetails.Discount)  \
OrderID                                                            
301                             1.0                         0.00   
302                             2.0                         0.10   
303                             1.0                         0.00   
304                             1.0                         0.05   
305                             3.0                         0.15   

         MEAN(Orde