# Import Libraries

In [1]:
# Imports

import pandas as pd
import numpy as np
import csv

# Load Datasets

In [2]:
# Load NBER categories

f_name = "nber.tsv"
dtypes = {'category_id': np.int8, 'subcategory_id': np.int8}
nber   = pd.read_csv(f_name, delimiter="\t", dtype=dtypes, quoting=csv.QUOTE_NONNUMERIC)

In [3]:
# Load patent data
f_name = "patent.tsv"
dtypes = {'num_claims': np.int16, 'withdrawn': np.float32}
patent = pd.read_csv(f_name, delimiter="\t", dtype=dtypes, parse_dates = ['date'])

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


# Clean Datasets

In [4]:
# Get relevant columns
nber = nber[['patent_id','subcategory_id']]
patent = patent[['id', 'type', 'date']]

# Rename patent 'id' column to 'patent_id'
patent = patent.rename(columns={"id":"patent_id"})

In [5]:
# Filter for only utility patents
patent = patent[patent['type'] == 'utility']

In [6]:
patent

Unnamed: 0,patent_id,type,date
0,10000000,utility,2018-06-19
1,10000001,utility,2018-06-19
2,10000002,utility,2018-06-19
3,10000003,utility,2018-06-19
4,10000004,utility,2018-06-19
...,...,...,...
7414146,RE31700,utility,1984-10-09
7414147,RE31701,utility,1984-10-09
7414150,RE31704,utility,1984-10-09
7416761,RE34357,utility,1993-08-24


In [7]:
# Create year column based on date
patent['year'] = patent['date'].astype(str).str[:4]

# Merge Datasets

In [8]:
# Merge patent and nber on patent_id
patentNber = patent.merge(nber, on="patent_id", how='inner')\
        .rename(columns={"subcategory_id": "patent_subcategory_id"})

In [9]:
patentNber.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 29 entries, 0 to 28
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   patent_id              29 non-null     object        
 1   type                   29 non-null     object        
 2   date                   29 non-null     datetime64[ns]
 3   year                   29 non-null     object        
 4   patent_subcategory_id  29 non-null     int8          
dtypes: datetime64[ns](1), int8(1), object(3)
memory usage: 1.2+ KB


In [10]:
patentNber

Unnamed: 0,patent_id,type,date,year,patent_subcategory_id
0,RE28707,utility,1976-02-10,1976,64
1,RE28712,utility,1976-02-17,1976,69
2,RE28714,utility,1976-02-17,1976,53
3,RE28715,utility,1976-02-17,1976,15
4,RE28840,utility,1976-06-08,1976,51
5,RE29000,utility,1976-10-12,1976,53
6,RE29024,utility,1976-11-02,1976,55
7,RE29103,utility,1977-01-04,1977,42
8,RE29219,utility,1977-05-17,1977,64
9,RE29412,utility,1977-09-27,1977,69


# Aggregate

In [11]:
# Get subcategories
list_subcat = sorted(patentNber['patent_subcategory_id'].dropna().unique())

In [12]:
# Dataframe for storing aggregate data
df_patentNber = pd.DataFrame()

# Iterate through the subcategories
for patent_subcategory_id in list_subcat:
    # Generate one column for each year
    temp = patentNber[patentNber['patent_subcategory_id'] == patent_subcategory_id].groupby("year").count()\
        [['patent_subcategory_id']].rename(columns={'patent_subcategory_id': patent_subcategory_id})

    # Merge the temp column into the result matrix
    df_patentNber = df_patentNber.merge(temp, left_index=True, right_index=True, how = "outer")


In [13]:

df_patentNber = df_patentNber.replace(np.nan,0)

df_patentNber.style.applymap(lambda x: 'background-color : yellow' if x > 0 else '').format('{:2}')

Unnamed: 0_level_0,15,19,21,31,42,43,44,45,51,52,53,55,59,61,64,65,66,67,69
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1976,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1977,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0
1978,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1979,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1982,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1983,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,0.0,1.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1993,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1995,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
