# Import Libraries

In [1]:
# Imports

import pandas as pd
import numpy as np
import csv

# Load Datasets

In [2]:
# Load NBER categories

f_name = "nber.tsv"
dtypes = {'category_id': np.int8, 'subcategory_id': np.int8}
nber   = pd.read_csv(f_name, delimiter="\t", dtype=dtypes, quoting=csv.QUOTE_NONNUMERIC)

In [3]:
# Load patent data
f_name = "patent.tsv"
dtypes = {'num_claims': np.int16, 'withdrawn': np.float32}
patent = pd.read_csv(f_name, delimiter="\t", dtype=dtypes, parse_dates = ['date'])

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


# Clean Datasets

In [4]:
# Get relevant columns
nber = nber[['patent_id','subcategory_id']]
patent = patent[['id', 'type', 'date']]

# Rename patent 'id' column to 'patent_id'
patent = patent.rename(columns={"id":"patent_id"})

In [5]:
patent

Unnamed: 0,patent_id,type,date
0,10000000,utility,2018-06-19
1,10000001,utility,2018-06-19
2,10000002,utility,2018-06-19
3,10000003,utility,2018-06-19
4,10000004,utility,2018-06-19
...,...,...,...
7430868,T998013,defensive publication,1980-09-02
7430869,T998014,defensive publication,1980-09-02
7430870,T999001,defensive publication,1980-10-07
7430871,T999002,defensive publication,1980-10-07


In [6]:
# Create year column based on date
patent['year'] = patent['date'].astype(str).str[:4]

# Merge Datasets

In [7]:
# Merge patent and nber on patent_id
patentNber = patent.merge(nber, on="patent_id", how='inner')\
        .rename(columns={"subcategory_id": "patent_subcategory_id"})

In [8]:
patentNber.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17342 entries, 0 to 17341
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   patent_id              17342 non-null  object        
 1   type                   17342 non-null  object        
 2   date                   17342 non-null  datetime64[ns]
 3   year                   17342 non-null  object        
 4   patent_subcategory_id  17342 non-null  int8          
dtypes: datetime64[ns](1), int8(1), object(3)
memory usage: 694.4+ KB


In [19]:
patentNber

Unnamed: 0,patent_id,type,date,year,patent_subcategory_id
0,RE28671,reissue,1976-01-06,1976,69
1,RE28672,reissue,1976-01-06,1976,59
2,RE28673,reissue,1976-01-06,1976,68
3,RE28674,reissue,1976-01-06,1976,32
4,RE28675,reissue,1976-01-06,1976,55
...,...,...,...,...,...
17337,RE46257,reissue,2016-12-27,2016,24
17338,RE46258,reissue,2016-12-27,2016,24
17339,RE46267,reissue,2017-01-10,2017,70
17340,RE46289,reissue,2017-01-31,2017,70


# Aggregate

In [10]:
# Get subcategories
list_subcat = sorted(patentNber['patent_subcategory_id'].dropna().unique())

In [14]:
# Dataframe for storing aggregate data
df_patentNber = pd.DataFrame()

# Iterate through the subcategories
for patent_subcategory_id in list_subcat:
    # Generate one row for each year
    temp = patentNber[patentNber['patent_subcategory_id'] == patent_subcategory_id].groupby("year").count()\
        [['patent_subcategory_id']].rename(columns={'patent_subcategory_id': patent_subcategory_id})

    # Merge the temp column into the result matrix
    df_patentNber = df_patentNber.merge(temp, left_index=True, right_index=True, how = "outer")


In [12]:

df_patentNber = df_patentNber.replace(np.nan,0)

df_patentNber.style.applymap(lambda x: 'background-color : yellow' if x > 0 else '').format('{:2}')

Unnamed: 0_level_0,11,12,13,14,15,19,21,22,23,24,25,31,32,33,39,41,42,43,44,45,46,49,51,52,53,54,55,59,61,62,63,64,65,66,67,68,69,70
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
1976,1.0,3.0,0.0,15.0,14.0,34.0,21.0,8.0,4.0,4.0,0.0,12.0,9.0,0.0,5.0,8.0,8.0,16.0,3.0,18.0,3.0,14.0,26.0,30.0,12.0,4.0,20.0,27.0,9.0,1.0,15.0,13.0,5.0,2.0,4.0,13.0,38.0,0.0
1977,4.0,5.0,2.0,25.0,11.0,55.0,10.0,7.0,1.0,9.0,0.0,13.0,6.0,0.0,1.0,13.0,8.0,12.0,3.0,12.0,4.0,16.0,26.0,9.0,18.0,5.0,12.0,18.0,7.0,2.0,13.0,9.0,6.0,5.0,4.0,10.0,46.0,0.0
1978,3.0,2.0,1.0,27.0,20.0,33.0,10.0,9.0,3.0,3.0,0.0,14.0,6.0,0.0,5.0,12.0,4.0,4.0,2.0,17.0,3.0,19.0,26.0,19.0,16.0,4.0,9.0,18.0,4.0,2.0,8.0,9.0,12.0,4.0,2.0,5.0,27.0,0.0
1979,0.0,5.0,1.0,13.0,7.0,34.0,14.0,8.0,1.0,4.0,0.0,6.0,5.0,0.0,1.0,4.0,10.0,9.0,12.0,9.0,4.0,6.0,18.0,14.0,12.0,2.0,13.0,12.0,6.0,1.0,9.0,8.0,4.0,8.0,4.0,11.0,32.0,1.0
1980,2.0,6.0,2.0,20.0,24.0,25.0,7.0,4.0,0.0,3.0,0.0,8.0,10.0,0.0,4.0,12.0,4.0,5.0,2.0,14.0,2.0,5.0,16.0,5.0,23.0,4.0,10.0,14.0,9.0,2.0,3.0,6.0,3.0,4.0,3.0,4.0,20.0,0.0
1981,2.0,14.0,3.0,19.0,18.0,34.0,6.0,3.0,3.0,6.0,0.0,14.0,7.0,0.0,4.0,10.0,6.0,7.0,2.0,20.0,2.0,10.0,17.0,10.0,15.0,9.0,12.0,16.0,11.0,1.0,12.0,11.0,8.0,6.0,2.0,9.0,36.0,0.0
1982,4.0,2.0,1.0,8.0,5.0,22.0,8.0,5.0,0.0,2.0,0.0,9.0,3.0,0.0,2.0,8.0,4.0,7.0,6.0,7.0,3.0,7.0,19.0,11.0,14.0,3.0,8.0,16.0,3.0,1.0,6.0,8.0,8.0,7.0,8.0,9.0,36.0,0.0
1983,4.0,3.0,2.0,18.0,20.0,41.0,16.0,10.0,7.0,7.0,0.0,6.0,9.0,0.0,1.0,16.0,2.0,13.0,4.0,18.0,0.0,11.0,21.0,14.0,17.0,1.0,5.0,21.0,5.0,3.0,4.0,9.0,4.0,7.0,7.0,1.0,35.0,0.0
1984,2.0,8.0,0.0,9.0,14.0,26.0,16.0,12.0,2.0,0.0,1.0,7.0,6.0,0.0,8.0,14.0,4.0,9.0,6.0,6.0,5.0,17.0,19.0,14.0,11.0,2.0,7.0,16.0,10.0,1.0,6.0,9.0,2.0,2.0,1.0,5.0,24.0,0.0
1985,2.0,2.0,0.0,6.0,14.0,29.0,8.0,11.0,2.0,3.0,1.0,7.0,14.0,0.0,5.0,3.0,10.0,10.0,1.0,6.0,1.0,8.0,19.0,10.0,13.0,1.0,9.0,9.0,17.0,2.0,3.0,7.0,3.0,4.0,0.0,5.0,27.0,0.0


In [22]:
# Get patents created in the US
f_name   = "location.tsv"
location = pd.read_csv(f_name, delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
f_name   = "patent_inventor.tsv"
inventors = pd.read_csv(f_name, delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)

In [23]:
location

Unnamed: 0,id,city,state,country,latitude,longitude,county,state_fips,county_fips
0,0000472c-5ed2-49cf-be3a-d92f39cd8c44,Lavrensberg-by-Aachen,,DT,50.7966,6.0639,,,
1,0000c02c-2ddb-4dda-8cd3-67beedef82a2,Astoria,IL,US,40.2275,-90.3594,Fulton,17.0,17057.0
2,00019416-c951-49ea-8c1a-5e4021b7f907,Zuidlaarderveen,,NL,53.1039,6.7494,,,
3,0001e836-cea8-4038-a00f-b16dc3250854,Allenmarkt,,AT,47.3784,13.4232,,,
4,0002426c-54e8-4ec9-bf71-c56ddda28a8f,Croom,MD,US,38.7525,-76.7642,Prince George's,24.0,24033.0
...,...,...,...,...,...,...,...,...,...
144264,fffe673a-4542-47be-8b45-be190a4dbf49,,,FR,49.0188,2.3789,,,
144265,ffff0f82-b6c9-4f7a-b61d-22ce49a70be9,Tsuru,,JP,32.6333,130.9830,,,
144266,ffff4345-ff97-4a1f-827a-d04e1c020e7c,Guagnano,,IT,40.4000,17.9500,,,
144267,ffff4aad-6556-432c-bf22-b8e63c55490e,Edison Township,NJ,US,40.5187,-74.4121,,34.0,


In [24]:
inventors

Unnamed: 0,patent_id,inventor_id,location_id
0,6584128,6584128-1,
1,4789863,4789863-1,
2,6795487,6795487-2,
3,D474886,4193524-1,201be16e-d9bf-45cc-a6c9-262245baaf94
4,7646155,4341225-2,d24ae3d3-798c-4d7f-8b9e-dcbec2cdf476
...,...,...,...
17991893,10261193,6952005-1,b2c701fe-cbde-4210-ba84-598c1e8e9280
17991894,5441952,4260806-3,af8ed238-a350-40f5-b393-2fd21f922658
17991895,4828281,4828281-1,85a57f48-c08b-49ca-92a0-3ed47634f576
17991896,10541390,10541390-1,0d28b11b-386a-43d6-9eeb-4585dfdc1e1e


In [26]:
inventors_patents = patentNber.merge(inventors, on='patent_id')
inventors_patents

Unnamed: 0,patent_id,type,date,year,patent_subcategory_id,inventor_id,location_id
0,RE28671,reissue,1976-01-06,1976,69,RE28671-1,e6e4c99f-06ce-47bf-b9ce-be42691ce588
1,RE28672,reissue,1976-01-06,1976,59,RE28672-1,45aceb71-670b-4955-a192-e167d4377011
2,RE28673,reissue,1976-01-06,1976,68,RE28673-1,201be16e-d9bf-45cc-a6c9-262245baaf94
3,RE28674,reissue,1976-01-06,1976,32,RE28674-1,cfd46a23-20da-4f88-bb38-1bf6069c1f94
4,RE28675,reissue,1976-01-06,1976,55,RE28675-1,4a79c129-1356-4e3c-ac9b-3ef663c9686f
...,...,...,...,...,...,...,...
38613,RE46258,reissue,2016-12-27,2016,24,7062297-1,29b44cd4-3e3f-49ce-9a14-f1493809aa53
38614,RE46267,reissue,2017-01-10,2017,70,5903256-1,dc99cbee-a0c4-4352-ac1c-075080b66011
38615,RE46289,reissue,2017-01-31,2017,70,D679819-1,460a624f-2bd4-48a8-ba3e-bf2ac0963988
38616,RE46320,reissue,2017-02-28,2017,70,D349271-2,b2595cc6-f223-4109-9ab4-f7610d5f04fd


In [28]:
location_patents = inventors_patents.merge(location, left_on='location_id', right_on='id')
location_patents

Unnamed: 0,patent_id,type,date,year,patent_subcategory_id,inventor_id,location_id,id,city,state,country,latitude,longitude,county,state_fips,county_fips
0,RE28671,reissue,1976-01-06,1976,69,RE28671-1,e6e4c99f-06ce-47bf-b9ce-be42691ce588,e6e4c99f-06ce-47bf-b9ce-be42691ce588,Jackson,MI,US,42.2458,-84.4014,Jackson,26.0,26075.0
1,RE29266,reissue,1977-06-14,1977,42,RE29266-2,e6e4c99f-06ce-47bf-b9ce-be42691ce588,e6e4c99f-06ce-47bf-b9ce-be42691ce588,Jackson,MI,US,42.2458,-84.4014,Jackson,26.0,26075.0
2,RE29266,reissue,1977-06-14,1977,42,RE29266-3,e6e4c99f-06ce-47bf-b9ce-be42691ce588,e6e4c99f-06ce-47bf-b9ce-be42691ce588,Jackson,MI,US,42.2458,-84.4014,Jackson,26.0,26075.0
3,RE29266,reissue,1977-06-14,1977,42,RE29266-4,e6e4c99f-06ce-47bf-b9ce-be42691ce588,e6e4c99f-06ce-47bf-b9ce-be42691ce588,Jackson,MI,US,42.2458,-84.4014,Jackson,26.0,26075.0
4,RE29266,reissue,1977-06-14,1977,42,RE29266-1,e6e4c99f-06ce-47bf-b9ce-be42691ce588,e6e4c99f-06ce-47bf-b9ce-be42691ce588,Jackson,MI,US,42.2458,-84.4014,Jackson,26.0,26075.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38587,RE46250,reissue,2016-12-27,2016,24,7371582-10,7559a2df-62b9-4212-8f3a-63b61b89a9fc,7559a2df-62b9-4212-8f3a-63b61b89a9fc,Gangwon-do,,KR,38.1466,127.3130,,,
38588,RE46250,reissue,2016-12-27,2016,24,5473915-2,7559a2df-62b9-4212-8f3a-63b61b89a9fc,7559a2df-62b9-4212-8f3a-63b61b89a9fc,Gangwon-do,,KR,38.1466,127.3130,,,
38589,RE46250,reissue,2016-12-27,2016,24,8093057-11,7559a2df-62b9-4212-8f3a-63b61b89a9fc,7559a2df-62b9-4212-8f3a-63b61b89a9fc,Gangwon-do,,KR,38.1466,127.3130,,,
38590,RE46250,reissue,2016-12-27,2016,24,8093057-1,7559a2df-62b9-4212-8f3a-63b61b89a9fc,7559a2df-62b9-4212-8f3a-63b61b89a9fc,Gangwon-do,,KR,38.1466,127.3130,,,


In [29]:
# Dataframe for storing aggregate data
usonly_df_patentNber = pd.DataFrame()

# Iterate through the subcategories
for patent_subcategory_id in list_subcat:
    # Generate one row for each year
    temp = location_patents[location_patents['patent_subcategory_id'] == patent_subcategory_id].groupby("year").count()\
        [['patent_subcategory_id']].rename(columns={'patent_subcategory_id': patent_subcategory_id})

    # Merge the temp column into the result matrix
    usonly_df_patentNber = usonly_df_patentNber.merge(temp, left_index=True, right_index=True, how = "outer")


In [30]:
usonly_df_patentNber = usonly_df_patentNber.replace(np.nan,0)
usonly_df_patentNber.style.applymap(lambda x: 'background-color : yellow' if x > 0 else '').format('{:2}')

Unnamed: 0_level_0,11,12,13,14,15,19,21,22,23,24,25,31,32,33,39,41,42,43,44,45,46,49,51,52,53,54,55,59,61,62,63,64,65,66,67,68,69,70
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
1976,1.0,5.0,0.0,41.0,28.0,69.0,35.0,13.0,6.0,9.0,0.0,26.0,13.0,0.0,7.0,11.0,9.0,26.0,5.0,25.0,8.0,23.0,42.0,53.0,18.0,9.0,23.0,37.0,11.0,1.0,25.0,21.0,7.0,2.0,5.0,17.0,56.0,0.0
1977,9.0,7.0,3.0,53.0,21.0,106.0,19.0,13.0,1.0,20.0,0.0,25.0,9.0,0.0,3.0,20.0,14.0,22.0,8.0,17.0,7.0,29.0,35.0,18.0,26.0,12.0,15.0,22.0,9.0,4.0,18.0,14.0,7.0,8.0,5.0,12.0,67.0,0.0
1978,16.0,5.0,1.0,55.0,38.0,51.0,16.0,13.0,3.0,5.0,0.0,21.0,8.0,0.0,7.0,16.0,7.0,6.0,2.0,24.0,7.0,26.0,38.0,34.0,29.0,4.0,11.0,30.0,8.0,4.0,12.0,14.0,15.0,7.0,5.0,6.0,35.0,0.0
1979,0.0,15.0,1.0,29.0,12.0,54.0,19.0,10.0,1.0,6.0,0.0,15.0,11.0,0.0,1.0,5.0,17.0,13.0,18.0,11.0,6.0,11.0,28.0,29.0,23.0,5.0,24.0,14.0,9.0,1.0,23.0,11.0,8.0,11.0,7.0,19.0,46.0,1.0
1980,4.0,12.0,3.0,34.0,51.0,43.0,8.0,8.0,0.0,6.0,0.0,24.0,16.0,0.0,6.0,19.0,5.0,7.0,3.0,32.0,4.0,7.0,23.0,7.0,35.0,6.0,11.0,21.0,21.0,2.0,5.0,8.0,6.0,4.0,6.0,4.0,26.0,0.0
1981,7.0,27.0,8.0,43.0,33.0,63.0,9.0,4.0,8.0,9.0,0.0,28.0,14.0,0.0,4.0,10.0,10.0,13.0,3.0,39.0,4.0,18.0,27.0,17.0,17.0,17.0,13.0,23.0,14.0,1.0,27.0,23.0,14.0,11.0,3.0,15.0,63.0,0.0
1982,7.0,4.0,1.0,20.0,11.0,46.0,9.0,11.0,0.0,3.0,0.0,21.0,6.0,0.0,2.0,14.0,5.0,15.0,7.0,9.0,8.0,16.0,26.0,42.0,18.0,9.0,12.0,22.0,6.0,2.0,7.0,13.0,15.0,10.0,11.0,12.0,63.0,0.0
1983,9.0,4.0,5.0,42.0,45.0,76.0,31.0,19.0,22.0,10.0,0.0,9.0,23.0,0.0,1.0,20.0,3.0,19.0,7.0,30.0,0.0,22.0,29.0,28.0,29.0,1.0,6.0,28.0,18.0,4.0,9.0,25.0,8.0,10.0,8.0,1.0,48.0,0.0
1984,8.0,13.0,0.0,22.0,20.0,49.0,24.0,21.0,7.0,0.0,1.0,17.0,11.0,0.0,10.0,18.0,5.0,15.0,10.0,11.0,12.0,27.0,26.0,22.0,13.0,4.0,10.0,26.0,13.0,1.0,7.0,14.0,3.0,4.0,3.0,5.0,34.0,0.0
1985,7.0,2.0,0.0,11.0,32.0,55.0,14.0,20.0,2.0,6.0,6.0,19.0,23.0,0.0,6.0,4.0,20.0,15.0,1.0,12.0,3.0,16.0,31.0,14.0,19.0,2.0,13.0,15.0,34.0,2.0,5.0,11.0,4.0,7.0,0.0,6.0,35.0,0.0
