In [41]:
import pandas as pd
from numpy import isnan


In [42]:
inventory = pd.DataFrame(
    {
        "id": [1, 2, 3, 4, 5, 6, 7],
        "name": [
            "black pen",
            "stapler",
            "ruler",
            "tissues",
            "eraser",
            "office chair",
            "foot rest",
        ],
        "wholesale price": [0.25, 1.5, 1.15, 1.0, 0.3, 43.6, 12],
        "retail price": [1.75, 5.99, 2.0, 2.5, 1.5, 170, 59],
        "sales": [22351, 375, 520, 38902, 242, 93, 41],
    }
)

new_items = pd.DataFrame(
    {
        "id": [24, 16, 17],
        "name": ["Phone", "Apple", "Pear"],
        "wholesale price": [200, 0.5, 0.6],
        "retail price": [500, 1, 1.2],
    }
)

# ignore_index will rebuild the index of the new dataframe
# otherwise you can have duplicate indices

inventory = pd.concat([inventory, new_items], ignore_index=True)
inventory

Unnamed: 0,id,name,wholesale price,retail price,sales
0,1,black pen,0.25,1.75,22351.0
1,2,stapler,1.5,5.99,375.0
2,3,ruler,1.15,2.0,520.0
3,4,tissues,1.0,2.5,38902.0
4,5,eraser,0.3,1.5,242.0
5,6,office chair,43.6,170.0,93.0
6,7,foot rest,12.0,59.0,41.0
7,24,Phone,200.0,500.0,
8,16,Apple,0.5,1.0,
9,17,Pear,0.6,1.2,


In [43]:
# get a series for the new values which have NaN as the sales
# value - need to use the numpy NaN check here, can't use the
# regular Python float("nan") comparison
invalid_sales = inventory["sales"][isnan(inventory["sales"])]

# assign some sales values to the NaN column values
inventory.loc[invalid_sales.keys(), "sales"] = [100, 200, 75]

# the book recommended to specify the indices, but I don't like
# that, particularly since I got the index to be rebuilt in the
# last step, unlike the book's manual method
# inventory.loc[[7,8,9], "sales"] = [100, 200, 75]
inventory

Unnamed: 0,id,name,wholesale price,retail price,sales
0,1,black pen,0.25,1.75,22351.0
1,2,stapler,1.5,5.99,375.0
2,3,ruler,1.15,2.0,520.0
3,4,tissues,1.0,2.5,38902.0
4,5,eraser,0.3,1.5,242.0
5,6,office chair,43.6,170.0,93.0
6,7,foot rest,12.0,59.0,41.0
7,24,Phone,200.0,500.0,100.0
8,16,Apple,0.5,1.0,200.0
9,17,Pear,0.6,1.2,75.0


In [44]:
# calculate the total sales amount
((inventory["retail price"] - inventory["wholesale price"]) * inventory["sales"]).sum()

np.float64(138122.85)

# Extension questions
1. Add one new product to the dataframe without using `pd.concat` - what's the advantage of using `concat` and when should you use it?
2. Add a new column to the dataframe `"department"`. Place each product in a department. Calculate `"current_net"` on each food products.
3. Use the `query` method to get descriptive statistics for food items.

In [None]:
# 1. Add a new product without using concat
inventory.loc[len(inventory)] = [10, "red pen", 0.25, 1.99, 16923]
inventory

Unnamed: 0,id,name,wholesale price,retail price,sales
0,1,black pen,0.25,1.75,22351.0
1,2,stapler,1.5,5.99,375.0
2,3,ruler,1.15,2.0,520.0
3,4,tissues,1.0,2.5,38902.0
4,5,eraser,0.3,1.5,242.0
5,6,office chair,43.6,170.0,93.0
6,7,foot rest,12.0,59.0,41.0
7,24,Phone,200.0,500.0,100.0
8,16,Apple,0.5,1.0,200.0
9,17,Pear,0.6,1.2,75.0


The previous method modifies the dataframe in place, rather than creating a copy (which might be a problem if the dataframe is very big).

However, it feels a bit fragile since it doesn't work with non-numeric indices.

In [69]:
# 2. Add a new "department" column, then put each product into a department.
#    Calculate the net profit on food category.
inventory["department"] = [
    "stationary",
    "stationary",
    "school",
    "consumables",
    "school",
    "furniture",
    "furniture",
    "technology",
    "food",
    "food",
    "stationary",
]
inventory["current_net"] = (
    inventory["retail price"] - inventory["wholesale price"]
) * inventory["sales"]
foods = inventory[inventory["department"] == "food"]
foods["current_net"].describe()

count      2.000000
mean      72.500000
std       38.890873
min       45.000000
25%       58.750000
50%       72.500000
75%       86.250000
max      100.000000
Name: current_net, dtype: float64

In [70]:
# 3. Now we'll use a query string
inventory.query('department == "food"')["current_net"].describe()

count      2.000000
mean      72.500000
std       38.890873
min       45.000000
25%       58.750000
50%       72.500000
75%       86.250000
max      100.000000
Name: current_net, dtype: float64