https://towardsdatascience.com/3-python-operations-for-solving-specific-data-processing-tasks-efficiently-551c8ed41c02

# Explode a Sequence of Dates

In [1]:
import pandas as pd

lifecycle = pd.DataFrame({
    "store_id": [1130, 1130, 1130, 1460, 1460],
    "product_id": [103, 104, 112, 130, 160],
    "start_date": ["2022-10-01", "2022-09-14", "2022-07-20", "2022-06-30", "2022-12-10"],
    "end_date": ["2022-10-15", "2022-11-06", "2022-09-10", "2022-07-20", "2023-01-10"]
})

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
lifecycle.dtypes

store_id       int64
product_id     int64
start_date    object
end_date      object
dtype: object

In [3]:
lifecycle = lifecycle.astype(
   {"start_date": "datetime64[ns]", "end_date": "datetime64[ns]"}
)

lifecycle.dtypes

store_id               int64
product_id             int64
start_date    datetime64[ns]
end_date      datetime64[ns]
dtype: object

In [5]:
pd.date_range(lifecycle.start_date[0], lifecycle.end_date[0])


DatetimeIndex(['2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04',
               '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-08',
               '2022-10-09', '2022-10-10', '2022-10-11', '2022-10-12',
               '2022-10-13', '2022-10-14', '2022-10-15'],
              dtype='datetime64[ns]', freq='D')

In [7]:
lifecycle.loc[:, "date"] = [
    pd.date_range(x, y) for x, y in zip(lifecycle["start_date"], lifecycle["end_date"])
]

  return asarray(a).ndim


In [8]:
lifecycle

Unnamed: 0,store_id,product_id,start_date,end_date,date
0,1130,103,2022-10-01,2022-10-15,"DatetimeIndex(['2022-10-01', '2022-10-02', '20..."
1,1130,104,2022-09-14,2022-11-06,"DatetimeIndex(['2022-09-14', '2022-09-15', '20..."
2,1130,112,2022-07-20,2022-09-10,"DatetimeIndex(['2022-07-20', '2022-07-21', '20..."
3,1460,130,2022-06-30,2022-07-20,"DatetimeIndex(['2022-06-30', '2022-07-01', '20..."
4,1460,160,2022-12-10,2023-01-10,"DatetimeIndex(['2022-12-10', '2022-12-11', '20..."


In [9]:
lifecycle = lifecycle.explode(column="date")

# drop the start and end date columns
lifecycle = lifecycle.drop(["start_date", "end_date"], axis=1)

lifecycle.head()

Unnamed: 0,store_id,product_id,date
0,1130,103,2022-10-01
0,1130,103,2022-10-02
0,1130,103,2022-10-03
0,1130,103,2022-10-04
0,1130,103,2022-10-05


## Alternative Approach

In [30]:
df.category.value_counts()

category
B    6
C    4
A    2
Name: count, dtype: int64

In [29]:
pd.merge(df, pd.DataFrame(df.category.value_counts().reset_index()))

Unnamed: 0,category,issue_level,month,year,frequency,count
0,B,1,Mar,2023,6,6
1,B,1,Feb,2021,6,6
2,B,1,June,2023,6,6
3,B,4,Dec,2021,6,6
4,B,3,Feb,2022,6,6
5,B,5,Apr,2023,6,6
6,C,5,Feb,2022,4,4
7,C,2,July,2020,4,4
8,C,2,Mar,2022,4,4
9,C,2,Feb,2022,4,4


# Extract Multiple Items from a List

In [10]:
names = ["Jane", "John", "Max", "Ashley", "Max"]

# get second item
names[1]

'John'

In [11]:
names = ["Jane", "John", "Max", "Ashley", "Max"]

# get the first three items
names[:3]

['Jane', 'John', 'Max']

In [12]:
scores = [1225, 598, 922, 1565, 225, 1173, 1658, 1112, 1339, 1521, 737, 
          1374, 1676, 1183, 1598, 1948, 1123, 1185, 560, 1335, 1867, 270, 
          1901, 1089, 905, 1282, 1205, 873, 1860, 456, 1645, 1499, 525, 
          1696, 1703, 1157, 1791, 1930, 1268, 1760]

In [13]:
items_to_extract = [0, 3, 4, 8, 12, 23, 30, 32, 36]

In [14]:
from operator import itemgetter

items_requested = itemgetter(*items_to_extract)(scores)

print(items_requested)

(1225, 1565, 225, 1339, 1676, 1089, 1645, 525, 1791)


## Alternative Approach

In [27]:
import numpy as np
s=np.array(scores)
s[items_to_extract]

array([1225, 1565,  225, 1339, 1676, 1089, 1645,  525, 1791])

# Sort DataFrame by Frequency

In [15]:
df = pd.DataFrame({
    "category": ["A", "A", "C", "B", "B", "B", "C", "B", "C", "B", "B", "C"],
    "issue_level": [5, 3, 5, 1, 1, 1, 2, 4, 2, 3, 5, 2],
    "month": ["Jan", "Mar", "Feb", "Mar", "Feb", "June", "July", "Dec", "Mar", "Feb", "Apr", "Feb"],
    "year": [2023, 2022, 2022, 2023, 2021, 2023, 2020, 2021, 2022, 2022, 2023, 2022]
})

In [16]:
display(df)

Unnamed: 0,category,issue_level,month,year
0,A,5,Jan,2023
1,A,3,Mar,2022
2,C,5,Feb,2022
3,B,1,Mar,2023
4,B,1,Feb,2021
5,B,1,June,2023
6,C,2,July,2020
7,B,4,Dec,2021
8,C,2,Mar,2022
9,B,3,Feb,2022


In [17]:
df["category"].value_counts()

category
B    6
C    4
A    2
Name: count, dtype: int64

In [18]:
df.loc[:, "frequency"] = df.groupby("category")["category"].transform(pd.Series.count)

In [25]:
print(list(df.groupby("category")))

[('A',    category  issue_level month  year  frequency
10        A            5   Jan  2023          2
11        A            3   Mar  2022          2), ('B',   category  issue_level month  year  frequency
0        B            1   Mar  2023          6
1        B            1   Feb  2021          6
2        B            1  June  2023          6
3        B            4   Dec  2021          6
4        B            3   Feb  2022          6
5        B            5   Apr  2023          6), ('C',   category  issue_level month  year  frequency
6        C            5   Feb  2022          4
7        C            2  July  2020          4
8        C            2   Mar  2022          4
9        C            2   Feb  2022          4)]


In [31]:
print(list(df.groupby("category")['category']))

[('A', 10    A
11    A
Name: category, dtype: object), ('B', 0    B
1    B
2    B
3    B
4    B
5    B
Name: category, dtype: object), ('C', 6    C
7    C
8    C
9    C
Name: category, dtype: object)]


In [19]:
display(df)

Unnamed: 0,category,issue_level,month,year,frequency
0,A,5,Jan,2023,2
1,A,3,Mar,2022,2
2,C,5,Feb,2022,4
3,B,1,Mar,2023,6
4,B,1,Feb,2021,6
5,B,1,June,2023,6
6,C,2,July,2020,4
7,B,4,Dec,2021,6
8,C,2,Mar,2022,4
9,B,3,Feb,2022,6


In [20]:
df = df.sort_values(by="frequency", ascending=False).reset_index(drop=True)

In [21]:
display(df)

Unnamed: 0,category,issue_level,month,year,frequency
0,B,1,Mar,2023,6
1,B,1,Feb,2021,6
2,B,1,June,2023,6
3,B,4,Dec,2021,6
4,B,3,Feb,2022,6
5,B,5,Apr,2023,6
6,C,5,Feb,2022,4
7,C,2,July,2020,4
8,C,2,Mar,2022,4
9,C,2,Feb,2022,4
