![image.png](attachment:image.png)

**Rebrickable** (https://rebrickable.com/) is a website that shows which other LEGO sets you can build from the sets and parts you already own. It has established a database with the LEGO Parts, Sets, Colors, and a lot more design-relation information of every official LEGO set. The information is update in real-time and can be accessed through an API.

The following information was gathered and they are up to date as of January 3, 2021:

lego_sets, lego_colors, lego_part_categories, lego_parts, lego_themes, inventories, inventory_minifigs, inventory_parts, inventory_sets.

In [1]:
# Import required packages
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

# For adding delays so that I don't spam requests
import time

#### Sets

In [4]:
# Load in one of the pages and inspect data structure
# Note that the actual Activation Key is replaced with 'xxxx' below
curl = 'https://rebrickable.com/api/v3/lego/sets/?key=xxxx&page_size=1000&page=17'

In [5]:
r = requests.get(curl)

In [6]:
pd.DataFrame(r.json()['results'])

Unnamed: 0,set_num,name,year,theme_id,num_parts,set_img_url,set_url,last_modified_dt
0,NINJATRU-1,Kai's Mask,2015,435,97,https://cdn.rebrickable.com/media/sets/ninjatr...,https://rebrickable.com/sets/NINJATRU-1/kais-m...,2020-01-05T12:07:36.194608Z
1,NINJATRU-2,Lloyd's Mask,2015,435,97,https://cdn.rebrickable.com/media/sets/ninjatr...,https://rebrickable.com/sets/NINJATRU-2/lloyds...,2020-09-10T00:01:45.501853Z
2,NINJATRU-3,Cole's Mask,2015,435,97,https://cdn.rebrickable.com/media/sets/ninjatr...,https://rebrickable.com/sets/NINJATRU-3/coles-...,2020-01-05T12:05:01.964927Z
3,NINJATRU-4,Jay's Mask,2015,435,97,https://cdn.rebrickable.com/media/sets/ninjatr...,https://rebrickable.com/sets/NINJATRU-4/jays-m...,2020-01-05T12:05:54.019424Z
4,NINJATRU-5,Zane's Mask,2015,435,97,https://cdn.rebrickable.com/media/sets/ninjatr...,https://rebrickable.com/sets/NINJATRU-5/zanes-...,2020-01-05T12:07:09.555909Z
...,...,...,...,...,...,...,...,...
190,XMASTREE-1,Christmas Tree,2019,410,26,https://cdn.rebrickable.com/media/sets/xmastre...,https://rebrickable.com/sets/XMASTREE-1/christ...,2019-11-29T07:42:16.643436Z
191,XWING-1,Mini X-Wing Fighter,2019,158,60,https://cdn.rebrickable.com/media/sets/xwing-1...,https://rebrickable.com/sets/XWING-1/mini-x-wi...,2019-11-03T15:08:31.000303Z
192,XWING-2,X-Wing Trench Run,2019,158,52,https://cdn.rebrickable.com/media/sets/xwing-2...,https://rebrickable.com/sets/XWING-2/x-wing-tr...,2019-11-03T15:15:29.879154Z
193,YODACHRON-1,Yoda Chronicles Promotional Set,2013,158,413,https://cdn.rebrickable.com/media/sets/yodachr...,https://rebrickable.com/sets/YODACHRON-1/yoda-...,2019-06-01T22:24:48.768032Z


In [7]:
#Loop through all 17 pages, scrape contents associated with key 'results'
# Note that the actual Activation Key is replaced with 'xxxx' below
set_list = []
for page in range(1,18):
    url = f'https://rebrickable.com/api/v3/lego/sets/?key=xxxx&page_size=1000&page={page}'
    result = requests.get(url).json()['results']
    set_list += result

In [8]:
lego_sets = pd.DataFrame(set_list).iloc[:, 0:5]
lego_sets.head()

Unnamed: 0,set_num,name,year,theme_id,num_parts
0,001-1,Gears,1965,1,43
1,0011-2,Town Mini-Figures,1978,84,12
2,0011-3,Castle 2 for 1 Bonus Offer,1987,199,0
3,0012-1,Space Mini-Figures,1979,143,12
4,0013-1,Space Mini-Figures,1979,143,12


In [11]:
lego_sets.to_csv('data/lego_sets_01_03_2021.csv',index=False)

#### Theme

In [13]:
# Note that the actual Activation Key is replaced with 'xxxx' below
turl = 'https://rebrickable.com/api/v3/lego/themes/?key=xxxx&page_size=1000&page=1'
t = requests.get(turl)
pd.DataFrame(t.json()['results'])

Unnamed: 0,id,parent_id,name
0,1,,Technic
1,2,1.0,Arctic Technic
2,3,1.0,Competition
3,4,1.0,Expert Builder
4,5,1.0,Model
...,...,...,...
592,708,697.0,The LEGO Batman Movie
593,709,,LEGO Art
594,710,207.0,Harry Potter
595,711,535.0,The LEGO Batman Movie Series 2


In [14]:
lego_themes = pd.DataFrame(t.json()['results'])
lego_themes.to_csv('data/lego_themes_01_03_2021.csv',index=False)

#### Colors

In [15]:
# Note that the actual Activation Key is replaced with 'xxxx' below
curl = 'https://rebrickable.com/api/v3/lego/colors/?key=xxxx&page_size=1000&page=1'
c = requests.get(curl)
pd.DataFrame(c.json()['results'])

Unnamed: 0,id,name,rgb,is_trans,external_ids
0,-1,[Unknown],0033B2,False,"{'BrickOwl': {'ext_ids': [0, 28, 29, 30, 31, 3..."
1,0,Black,05131D,False,"{'LDraw': {'ext_ids': [256, 0], 'ext_descrs': ..."
2,1,Blue,0055BF,False,"{'BrickLink': {'ext_ids': [7], 'ext_descrs': [..."
3,2,Green,237841,False,"{'BrickLink': {'ext_ids': [6], 'ext_descrs': [..."
4,3,Dark Turquoise,008F9B,False,"{'BrickLink': {'ext_ids': [39], 'ext_descrs': ..."
...,...,...,...,...,...
181,1055,Trans-Clear Opal,FCFCFC,True,"{'LEGO': {'ext_ids': [360], 'ext_descrs': [['T..."
182,1056,Trans-Brown Opal,583927,True,"{'LEGO': {'ext_ids': [360], 'ext_descrs': [['T..."
183,1057,Trans-Light Bright Green,C9E788,True,"{'LEGO': {'ext_ids': [227], 'ext_descrs': [['T..."
184,1058,Trans-Light Green,94E5AB,True,"{'LEGO': {'ext_ids': [285], 'ext_descrs': [['T..."


In [16]:
lego_colors = pd.DataFrame(c.json()['results'])
lego_colors.to_csv('data/lego_colors_01_03_2021.csv',index=False)

#### Parts

In [17]:
# Load in one of the pages and inspect data structure
# Note that the actual Activation Key is replaced with 'xxxx' below
purl = 'https://rebrickable.com/api/v3/lego/parts/?key=xxxx&page_size=1000&page=38'
p = requests.get(purl)
pd.DataFrame(p.json()['results'])

Unnamed: 0,part_num,name,part_cat_id
0,upn0040c02,"Primo Figure Boy with Red Base, Red Suspenders...",57
1,upn0040c03,"Primo Figure Boy with Red Base, Glasses and Gr...",57
2,upn0041,"Body Giant, Road Hog Black Vest, Sand Blue Legs",13
3,upn0042,"Pencil Topper, Integrated 2x2 Plate",9
4,upn0043,"Animal Body Part, Dinosaur, Tyrannosaurus Rex ...",28
...,...,...,...
511,znapupn0002,Znap Connector 3 x 3 - 4 way B (Beam),43
512,znapupn0003,Znap Connector 1 x 3 - 2 way A,43
513,znapupn0004,"Znap Beam 3, 1 Hole",43
514,znapupn0005,Znap Connector 3 x 3 - 4 way C (Closed),43


In [29]:
pd.DataFrame(p.json()['results']).iloc[:,0:3]

Unnamed: 0,part_num,name,part_cat_id
0,003434,Sticker Sheet for Set 653-1,58
1,004219,"Sticker Sheet for Set 939-1 with flags for AU, IE",58
2,004229,Sticker Sheet for Set 295-1,58
3,004284,Sticker Sheet for Set 723-2,58
4,004285,Sticker Sheet for Set 725-2,58
...,...,...,...
995,120070,"Poster, U.S.S. Constellation",17
996,12017,Duplo Train Locomotive Front with Silver Headl...,4
997,120317,"Instruction Manual, Blacktron Super Model",17
998,12044,Duplo Animal Lion Adult Male,4


In [18]:
#Loop through all 38 pages, scrape contents associated with key 'results'
# Note that the actual Activation Key is replaced with 'xxxx' below
parts_list = []
for page in range(1,39):
    url = f'https://rebrickable.com/api/v3/lego/parts/?key=xxxx&page_size=1000&page={page}'
    result = requests.get(url).json()['results']
    parts_list += result

In [19]:
lego_parts = pd.DataFrame(parts_list).iloc[:, 0:3]
print(lego_parts.shape)
lego_parts.head()

(37516, 3)


Unnamed: 0,part_num,name,part_cat_id
0,3434,Sticker Sheet for Set 653-1,58
1,4219,"Sticker Sheet for Set 939-1 with flags for AU, IE",58
2,4229,Sticker Sheet for Set 295-1,58
3,4284,Sticker Sheet for Set 723-2,58
4,4285,Sticker Sheet for Set 725-2,58


In [20]:
lego_parts.to_csv('data/lego_parts_01_03_2021.csv',index=False)

#### Parts Categories

In [21]:
# Note that the actual Activation Key is replaced with 'xxxx' below
pcurl = 'https://rebrickable.com/api/v3/lego/part_categories/?key=xxxx&page_size=1000&page=1'
pc = requests.get(pcurl)
pd.DataFrame(pc.json()['results'])

Unnamed: 0,id,name,part_count
0,1,Baseplates,234
1,3,Bricks Sloped,506
2,4,"Duplo, Quatro and Primo",3425
3,5,Bricks Special,145
4,6,Bricks Wedged,253
...,...,...,...
60,63,Minidoll Upper Body,346
61,64,Minidoll Lower Body,237
62,65,Minifig Headwear,1523
63,66,Modulex,219


In [22]:
lego_part_categories = pd.DataFrame(pc.json()['results'])
lego_part_categories.to_csv('data/lego_part_categories_01_03_2021.csv',index=False)

#### minifigs

In [45]:
# Note that the actual Activation Key is replaced with 'xxxx' below
imurl = 'https://rebrickable.com/api/v3/lego/minifigs/?key=xxxx&page_size=1000&page=1'
im = requests.get(imurl)
pd.DataFrame(im.json()['results'])

Unnamed: 0,set_num,name,num_parts,set_img_url,set_url,last_modified_dt
0,fig-000001,Toy Store Employee,4,https://cdn.rebrickable.com/media/sets/fig-000...,https://rebrickable.com/minifigs/fig-000001/to...,2020-05-27T21:47:00.694941Z
1,fig-000002,Customer Kid,4,https://cdn.rebrickable.com/media/sets/fig-000...,https://rebrickable.com/minifigs/fig-000002/cu...,2020-05-27T21:47:11.750470Z
2,fig-000003,"Assassin Droid, White",8,https://cdn.rebrickable.com/media/sets/fig-000...,https://rebrickable.com/minifigs/fig-000003/as...,2020-06-20T00:49:06.811000Z
3,fig-000004,Basic Figure,4,https://cdn.rebrickable.com/media/sets/fig-000...,https://rebrickable.com/minifigs/fig-000004/ba...,2020-04-09T06:06:08.127005Z
4,fig-000005,Captain America with Short Legs,3,https://cdn.rebrickable.com/media/sets/fig-000...,https://rebrickable.com/minifigs/fig-000005/ca...,2020-04-23T00:09:38.460466Z
...,...,...,...,...,...,...
995,fig-001011,"Shogun, Armor and Helmet with Horn, Blue and B...",6,https://cdn.rebrickable.com/media/sets/fig-001...,https://rebrickable.com/minifigs/fig-001011/sh...,2020-05-02T19:26:25.437850Z
996,fig-001012,Farmer (CMF),4,,https://rebrickable.com/minifigs/fig-001012/fa...,2020-09-10T15:10:13.935333Z
997,fig-001013,Red Skull with Short Legs,3,https://cdn.rebrickable.com/media/sets/fig-001...,https://rebrickable.com/minifigs/fig-001013/re...,2020-04-23T00:09:00.583930Z
998,fig-001014,Astronaut,6,https://cdn.rebrickable.com/media/sets/fig-001...,https://rebrickable.com/minifigs/fig-001014/as...,2020-05-31T17:59:15.299295Z


All the information above is linked through "Inventory" which can only be downloaded from Rebrickable's website (https://rebrickable.com/downloads/). There is not any explicit documentation about the "Inventory" but it has been interpreted as the smaller bags of parts that come with the lego sets.

In [4]:
# Perform some basic cleanning
lego_sets = pd.read_csv('datalego_sets_01_03_2021.csv')
inventories = pd.read_csv('data/inventories_01_03_2021.csv')
inventory_minifigs = pd.read_csv('data/inventory_minifigs_01_03_2021.csv')
inventory_parts = pd.read_csv('data/inventory_parts_01_03_2021.csv')
inventory_sets = pd.read_csv('data/inventory_sets_01_03_2021.csv')
lego_colors = pd.read_csv('data/lego_colors_01_03_2021.csv')
lego_part_categories = pd.read_csv('data/lego_part_categories_01_03_2021.csv') 
lego_parts = pd.read_csv('data/lego_parts_01_03_2021.csv') 
lego_themes = pd.read_csv('data/lego_themes_01_03_2021.csv') 

In [5]:
# Inspect data structure
display(lego_sets.head(2))
display(lego_sets.shape)

display(inventories.head(2))
display(inventories.shape)

Unnamed: 0,set_num,name,year,theme_id,num_parts
0,001-1,Gears,1965,1,43
1,0011-2,Town Mini-Figures,1978,84,12


(16195, 5)

Unnamed: 0,id,version,set_num
0,1,1,7922-1
1,3,1,3931-1


(27377, 3)

In [6]:
# filter inventories to only contain sets that are in the lego_sets dataframe
inventories_filtered = inventories[[i in list(lego_sets['set_num']) for i in inventories['set_num']]]\
.reset_index(drop = True)

inventories_filtered.shape

(16721, 3)

In [7]:
inventories_filtered.to_csv('data/inventories_filtered_01_03_2021.csv',index = False)

In [10]:
# Do the same to the inventory_minifig dataframe
inventory_minifigs_filtered =\
inventory_minifigs[[i in list(inventories_filtered['id']) for i in inventory_minifigs['inventory_id']]].\
reset_index(drop = True)

inventory_minifigs_filtered.shape

(16280, 3)

In [11]:
inventory_minifigs_filtered.to_csv('data/inventory_minifigs_filtered_01_03_2021.csv',index = False)

In [12]:
# Do the same to the inventory_parts dataframe
inventory_parts_filtered =\
inventory_parts[[i in list(inventories_filtered['id']) for i in inventory_parts['inventory_id']]].\
reset_index(drop = True)

inventory_parts_filtered.shape

(827553, 5)

In [13]:
inventory_parts_filtered.to_csv('data/inventory_parts_filtered_01_03_2021.csv',index = False)