In [73]:
import requests
import time
import json
import os

import pandas as pd
import boto3
import awswrangler as wr

from bs4 import BeautifulSoup

# Get the list of IMO numbers from the database

In [2]:
DATABASE = ""
TABLE = ""
OUTPUT_LOCATION = ""

In [3]:
my_session = boto3.session.Session(
    region_name=os.environ['REGION'], 
    aws_access_key_id=os.environ['ACCESS_KEY'], 
    aws_secret_access_key=os.environ['SECRET']
)

In [4]:
query = f"""
    WITH latest_versions AS (
        SELECT CAST(year AS INTEGER) AS year, MAX(CAST(version AS INTEGER)) AS latest_version
        FROM "{DATABASE}"."{TABLE}"
        GROUP BY CAST(year AS INTEGER)
    ),

    latest_data AS (
        SELECT *
        FROM "{DATABASE}"."{TABLE}" se
        JOIN latest_versions lv
        ON CAST(se.year AS INT) = lv.year
        AND CAST(se.version AS INT) = lv.latest_version
    )
    
    SELECT DISTINCT(imo_number) FROM latest_data;
"""

In [5]:
distinc_imo_numbers = wr.athena.read_sql_query(query, database=DATABASE, boto3_session=my_session)

In [6]:
distinc_imo_numbers.head()

Unnamed: 0,imo_number
0,7037806
1,7128332
2,7305253
3,7310507
4,7349039


In [7]:
distinc_imo_numbers.shape

(21014, 1)

# Load the AIS ship data from Kaggle

In [8]:
df1 = pd.read_csv('../data/raw/ais_data.csv')

In [9]:
df1.head()

Unnamed: 0.1,Unnamed: 0,mmsi,navigationalstatus,sog,cog,heading,shiptype,width,length,draught
0,0,219019621,Unknown value,0.0,86.0,86.0,Fishing,4.0,9.0,
1,1,265628170,Unknown value,0.0,334.5,,Port tender,8.0,27.0,
2,2,219005719,Unknown value,0.0,208.7,,Fishing,4.0,11.0,
3,3,219028066,Unknown value,0.0,,,Pleasure,3.0,12.0,
4,4,212584000,Moored,0.0,153.0,106.0,Cargo,13.0,99.0,6.3


In [10]:
df1.shape

(358351, 10)

In [11]:
df1['mmsi'].nunique()

3894

In [12]:
df2 = pd.read_csv('../data/raw/AIS_UNACORN_Seatracks_past12-hours.csv')

In [13]:
df2.head()

Unnamed: 0.1,Unnamed: 0,cog,sog,beam,callsign,cargo,heading,imonumber,latitude,length,longitude,mmsi,name,navstatus,timeoffix,vesseltype,date
0,0,64.0,11.0,65.0,9V9123,0-AllShips,64.0,9532599.0,-25.42,360.0,84.633333,563950000,Yuan Zhuo Hai,0-Underway(Engine),1607606344,7-Cargo,2020-12-10
1,1,65.4,12.1,65.0,9V9123,0-AllShips,64.0,9532599.0,-25.294915,360.0,84.933403,563950000,Yuan Zhuo Hai,0-Underway(Engine),1607611680,7-Cargo,2020-12-10
2,2,323.0,14.1,35.0,V7QX3,0-AllShips,320.0,9813113.0,-24.770147,228.0,106.33189,538008009,Barramundi,0-Underway(Engine),1607611196,7-Cargo,2020-12-10
3,3,339.8,19.8,32.0,HOMQ,0-AllShips,342.0,9536818.0,-24.511267,199.0,111.501532,371158000,Gaia Leader,0-Underway(Engine),1607611210,7-Cargo,2020-12-10
4,4,337.8,10.7,32.0,V7JI6,0-AllShips,338.0,9712709.0,-25.042857,199.0,111.283643,538006056,Star Lutas,0-Underway(Engine),1607611211,7-Cargo,2020-12-10


In [14]:
df2.shape

(103997, 17)

In [15]:
df2['imonumber'].nunique()

13577

In [16]:
df2['imonumber'] = df2['imonumber'].fillna(0)
df2['imonumber'] = df2['imonumber'].astype(int)

# Filter Kaggle dataset for the ships I have in database

In [20]:
thetis_ships = df2[df2['imonumber'].isin(distinc_imo_numbers['imo_number'].to_list())].copy()

In [21]:
thetis_ships.drop(['Unnamed: 0', 'latitude', 'longitude', 'navstatus', 'timeoffix', 'date', 'cog', 'sog', 'heading'], axis=1, inplace=True)

In [22]:
thetis_ships.head()

Unnamed: 0,beam,callsign,cargo,imonumber,length,mmsi,name,vesseltype
3,32.0,HOMQ,0-AllShips,9536818,199.0,371158000,Gaia Leader,7-Cargo
4,32.0,V7JI6,0-AllShips,9712709,199.0,538006056,Star Lutas,7-Cargo
5,25.0,A8YA2,2-HazCatB,9459539,155.0,636015005,M/t Stolt Ocelot,8-Tanker
7,32.0,V7MD5,0-AllShips,9280770,225.0,538002838,Anna Smile,7-Cargo
10,32.0,3ELT6,0-AllShips,9738820,200.0,374875000,Nord Bering,7-Cargo


In [23]:
thetis_ships.shape

(8646, 8)

In [24]:
thetis_ships.duplicated().value_counts()

True     4934
False    3712
dtype: int64

In [25]:
thetis_ships.drop_duplicates(subset=['imonumber'], inplace=True)

In [26]:
thetis_ships.shape

(3704, 8)

In [27]:
thetis_ships['imonumber'].nunique()

3704

In [28]:
thetis_ships.head()

Unnamed: 0,beam,callsign,cargo,imonumber,length,mmsi,name,vesseltype
3,32.0,HOMQ,0-AllShips,9536818,199.0,371158000,Gaia Leader,7-Cargo
4,32.0,V7JI6,0-AllShips,9712709,199.0,538006056,Star Lutas,7-Cargo
5,25.0,A8YA2,2-HazCatB,9459539,155.0,636015005,M/t Stolt Ocelot,8-Tanker
7,32.0,V7MD5,0-AllShips,9280770,225.0,538002838,Anna Smile,7-Cargo
10,32.0,3ELT6,0-AllShips,9738820,200.0,374875000,Nord Bering,7-Cargo


In [29]:
thetis_ships.set_index('imonumber', inplace=True)

In [30]:
thetis_ships.to_json('../data/raw/kaggle_ship_data.json', orient="index", indent=4)

In [39]:
thetis_ships = json.loads(thetis_ships.to_json(orient="index"))

In [40]:
thetis_ships

{'9536818': {'beam': 32.0,
  'callsign': 'HOMQ',
  'cargo': '0-AllShips',
  'length': 199.0,
  'mmsi': 371158000,
  'name': 'Gaia Leader',
  'vesseltype': '7-Cargo'},
 '9712709': {'beam': 32.0,
  'callsign': 'V7JI6',
  'cargo': '0-AllShips',
  'length': 199.0,
  'mmsi': 538006056,
  'name': 'Star Lutas',
  'vesseltype': '7-Cargo'},
 '9459539': {'beam': 25.0,
  'callsign': 'A8YA2',
  'cargo': '2-HazCatB',
  'length': 155.0,
  'mmsi': 636015005,
  'name': 'M/t Stolt Ocelot',
  'vesseltype': '8-Tanker'},
 '9280770': {'beam': 32.0,
  'callsign': 'V7MD5',
  'cargo': '0-AllShips',
  'length': 225.0,
  'mmsi': 538002838,
  'name': 'Anna Smile',
  'vesseltype': '7-Cargo'},
 '9738820': {'beam': 32.0,
  'callsign': '3ELT6',
  'cargo': '0-AllShips',
  'length': 200.0,
  'mmsi': 374875000,
  'name': 'Nord Bering',
  'vesseltype': '7-Cargo'},
 '9188154': {'beam': 40.0,
  'callsign': '3FZA9',
  'cargo': '0-AllShips',
  'length': 285.0,
  'mmsi': 351934000,
  'name': 'Ever Utile',
  'vesseltype': '7-

# Bring in the wikipedia data and check the IMO numbers if they are unique

In [32]:
with open('../data/raw/wikipedia_ship_data.json') as f:
    wikipedia_data = json.load(f)

In [41]:
wikipedia_imo_numbers = list((wikipedia_data.keys()))

In [43]:
kaggle_imo_numbers = list(thetis_ships.keys()) 

In [44]:
len(wikipedia_imo_numbers)

488

In [45]:
len(kaggle_imo_numbers)

3704

In [46]:
wikipedia_imo_numbers in kaggle_imo_numbers

False

In [47]:
ship_technical_specs = wikipedia_data | thetis_ships

In [48]:
with open('../data/raw/ship_technical_specs.json', 'w') as f:
    json.dump(ship_technical_specs, f, indent=4)

# Add another dataset to the technical specifications data

In [49]:
with open('../data/raw/sflcorp_ship_data.json') as f:
    sflcorp_data = json.load(f)

In [50]:
sflcorp_data

{'SFL Yangtze': {'Type': 'Bulk',
  'Flag': 'Hong Kong',
  'Built year': '2012',
  'Builder': 'LONGXUE SHIPBUILDING - GUANGZHOU',
  'IMO Number': '9617947',
  'Call sign': 'VRJN5',
  'Class society': 'AMERICAN BUREAU OF SHIPPING',
  'Capacity': '82.000 DWT',
  'Length': '229m',
  'Breadth': '32m',
  'Charterer': 'Short term',
  'url': 'https://www.sflcorp.com/fleet/sfl-yangtze'},
 'SFL Yukon': {'Type': 'Bulk',
  'Flag': 'Hong Kong',
  'Built year': '2010',
  'Builder': 'XIAMEN SHIPBUILDING INDUSTRY - XIAMEN',
  'IMO Number': '9600839',
  'Call sign': 'VRHQ3',
  'Class society': 'BUREAU VERITAS',
  'Capacity': '57.000 DWT',
  'Length': '190m',
  'Breadth': '33m',
  'Charterer': 'Short term',
  'url': 'https://www.sflcorp.com/fleet/sfl-yukon'},
 'SFL Sara': {'Type': 'Bulk',
  'Flag': 'Hong Kong',
  'Built year': '2011',
  'Builder': 'XIAMEN SHIPBUILDING INDUSTRY - XIAMEN',
  'IMO Number': '9539834',
  'Call sign': 'VRHZ6',
  'Class society': 'BUREAU VERITAS',
  'Capacity': '57.000 DWT',
 

In [51]:
new_dict = {}
for item in sflcorp_data.items():
    print(item)
    new_dict[item[1]['IMO Number']] = item[1] | {"Name": item[0]}

('SFL Yangtze', {'Type': 'Bulk', 'Flag': 'Hong Kong', 'Built year': '2012', 'Builder': 'LONGXUE SHIPBUILDING - GUANGZHOU', 'IMO Number': '9617947', 'Call sign': 'VRJN5', 'Class society': 'AMERICAN BUREAU OF SHIPPING', 'Capacity': '82.000 DWT', 'Length': '229m', 'Breadth': '32m', 'Charterer': 'Short term', 'url': 'https://www.sflcorp.com/fleet/sfl-yangtze'})
('SFL Yukon', {'Type': 'Bulk', 'Flag': 'Hong Kong', 'Built year': '2010', 'Builder': 'XIAMEN SHIPBUILDING INDUSTRY - XIAMEN', 'IMO Number': '9600839', 'Call sign': 'VRHQ3', 'Class society': 'BUREAU VERITAS', 'Capacity': '57.000 DWT', 'Length': '190m', 'Breadth': '33m', 'Charterer': 'Short term', 'url': 'https://www.sflcorp.com/fleet/sfl-yukon'})
('SFL Sara', {'Type': 'Bulk', 'Flag': 'Hong Kong', 'Built year': '2011', 'Builder': 'XIAMEN SHIPBUILDING INDUSTRY - XIAMEN', 'IMO Number': '9539834', 'Call sign': 'VRHZ6', 'Class society': 'BUREAU VERITAS', 'Capacity': '57.000 DWT', 'Length': '190m', 'Breadth': '32m', 'Charterer': 'Short ter

In [52]:
new_dict

{'9617947': {'Type': 'Bulk',
  'Flag': 'Hong Kong',
  'Built year': '2012',
  'Builder': 'LONGXUE SHIPBUILDING - GUANGZHOU',
  'IMO Number': '9617947',
  'Call sign': 'VRJN5',
  'Class society': 'AMERICAN BUREAU OF SHIPPING',
  'Capacity': '82.000 DWT',
  'Length': '229m',
  'Breadth': '32m',
  'Charterer': 'Short term',
  'url': 'https://www.sflcorp.com/fleet/sfl-yangtze',
  'Name': 'SFL Yangtze'},
 '9600839': {'Type': 'Bulk',
  'Flag': 'Hong Kong',
  'Built year': '2010',
  'Builder': 'XIAMEN SHIPBUILDING INDUSTRY - XIAMEN',
  'IMO Number': '9600839',
  'Call sign': 'VRHQ3',
  'Class society': 'BUREAU VERITAS',
  'Capacity': '57.000 DWT',
  'Length': '190m',
  'Breadth': '33m',
  'Charterer': 'Short term',
  'url': 'https://www.sflcorp.com/fleet/sfl-yukon',
  'Name': 'SFL Yukon'},
 '9539834': {'Type': 'Bulk',
  'Flag': 'Hong Kong',
  'Built year': '2011',
  'Builder': 'XIAMEN SHIPBUILDING INDUSTRY - XIAMEN',
  'IMO Number': '9539834',
  'Call sign': 'VRHZ6',
  'Class society': 'BUREA

In [53]:
with open('../data/raw/sflcorp_ship_data_updated.json', 'w') as f:
    json.dump(new_dict, f, indent=4)

In [56]:
ship_technical_specs = ship_technical_specs | new_dict

In [57]:
len(ship_technical_specs.keys())

4228

In [58]:
with open('../data/raw/ship_technical_specs.json', 'w') as f:
    json.dump(ship_technical_specs, f, indent=4)

# Add another dataset

In [59]:
with open('../data/raw/pleiadesgr_fleet.json') as f:
    pleiadesgr_data = json.load(f)

In [60]:
pleiades_new_dict = {}
for item in pleiadesgr_data.items():
    print(item)
    pleiades_new_dict[item[1]['IMO Number']] = item[1] | {"Name": item[0]}

('Ladon', {'url': 'https://pleiades.gr/ships/ladon/', 'dateDelivered': '2018-04-30', 'Built year': 2018, 'deadweight': {'value': 50028, 'unit': 'tonnes'}, 'Builder': 'Hyundai Mipo Dockyard Co., Ltd.', 'Flag': 'Liberian', 'portOfRegistry': 'Monrovia', 'callSign': 'D5OD2', 'IMO Number': '9798961', 'Type': 'Oil Tanker (Scrubber Fitted)', 'typeOfHull': '6 Grades (7 with Slop)', 'Length': {'value': 183.0, 'unit': 'm'}, 'Breadth': {'value': 32.2, 'unit': 'm'}, 'mouldedDepth': {'value': 19.1, 'unit': 'm'}, 'lengthBetweenPerpendiculars': {'value': 175.15, 'unit': 'm'}, 'grossTonnage': 29878, 'netTonnage': 13917})
('Strimon', {'url': 'https://pleiades.gr/ships/strimon/', 'dateDelivered': '2018-05-31', 'Built year': 2018, 'deadweight': {'value': 50028, 'unit': 'tonnes'}, 'Builder': 'Hyundai Mipo Dockyard Co., Ltd.', 'Flag': 'Liberian', 'portOfRegistry': 'Monrovia', 'callSign': 'D5OD3', 'IMO Number': '9798973', 'Type': 'Oil Tanker (Scrubber Fitted)', 'Length': {'value': 183.0, 'unit': 'm'}, 'Brea

In [61]:
ship_technical_specs = ship_technical_specs | pleiades_new_dict

In [62]:
len(ship_technical_specs.keys())

4239

In [64]:
with open('../data/raw/ship_technical_specs.json', 'w') as f:
    json.dump(ship_technical_specs, f, indent=4)

# Add another dataset to the technical specs

In [66]:
cmb_data = pd.read_csv('../data/raw/cmb-tech-fleet-export-2025-01-09_134234.csv')

In [67]:
cmb_data

Unnamed: 0,Brand,Name,Type,Flag,Built,Owned,Interest,Spot TC,Draft (m),Length (m),IMO Number,Shipyard
0,Euronav,Aegean,VLCC,BE,2016,Yes,100.0,SPOT,21.62,332.97,9732553,"Hyundai Samho Heavy Industries Co., Ltd."
1,Euronav,Alsace,VLCC,FR,2012,Yes,100.0,SPOT,22.50,330.00,9530905,Samsung
2,Euronav,Antigone,VLCC,FR,2015,Yes,100.0,SPOT,21.60,333.00,9709075,"Hyundai Samho Heavy Industries Co., Ltd."
3,Euronav,Brest,Suezmax,GR,2023,Yes,100.0,SPOT,17.65,270.00,9941867,"Hyundai Samho Heavy Industries Co., Ltd."
4,Euronav,Bristol,Suezmax,GR,2024,Yes,100.0,SPOT,17.65,270.00,9941879,"Hyundai Samho Heavy Industries Co., Ltd."
...,...,...,...,...,...,...,...,...,...,...,...,...
153,Windcat,Windcat 63,CTV,,2025,Yes,,TBA,1.80,27.00,,Neptune
154,Windcat,Windcat 7,CTV,UK,2007,Yes,100.0,TC Out,1.80,15.86,913037,Island Boats Inc
155,Tugboats & ferries,HydroBingo,Ferry,JP,2021,Yes,50.0,,1.75,19.40,,"Tsuneishi Facilities and Craft, Urasaki factory"
156,Tugboats & ferries,Hydrotug,Tugboat,BE,2022,Yes,,Bareboat,,30.00,,Armon


In [68]:
cmb_data.dropna(subset=['IMO Number'], inplace=True)

In [69]:
cmb_data.shape

(122, 12)

In [70]:
cmb_data.set_index('IMO Number', inplace=True)

In [72]:
cmb_data_json = json.loads(cmb_data.to_json(orient='index'))

In [90]:
for index, ship in enumerate(cmb_data_json.items()):
    ship_name = ship[1]['Name']
    
    html_page = requests.get(f"https://cmb.tech/fleet/{ship_name}")
    soup = BeautifulSoup(html_page.text, "html.parser")
    rows = soup.find_all('div', attrs={'class': 'Specs-item Specs-item--dwt'})
    
    for row in rows:
        label = row.find('div', class_='Specs-item-label').text.strip()
        value = row.find('div', class_='Specs-item-value').text.strip()
        ship[1][label] = value

    print(f"{index} -> {ship_name}")
    time.sleep(8)

0 -> Aegean
1 -> Alsace
2 -> Antigone
3 -> Brest
4 -> Bristol
5 -> Brugge
6 -> Cap Corpus Christi
7 -> Cap Lara
8 -> Cap Pembroke
9 -> Cap Port Arthur
10 -> Cap Quebec
11 -> Cap Theodora
12 -> Captain Michael
13 -> Cedar
14 -> Cypres
15 -> Daishan
16 -> Dalma
17 -> Dia
18 -> Donoussa
19 -> Fraternity
20 -> FSO Africa
21 -> FSO Asia
22 -> H5105
23 -> H5106
24 -> Hakata
25 -> Hakone
26 -> Helios
27 -> Hirado
28 -> Hojo
29 -> Ilma
30 -> Ingrid
31 -> Iris
32 -> Maria
33 -> Orion
34 -> Sienna
35 -> Sofia
36 -> Stella
37 -> TK300K-3
38 -> BC210K-47
39 -> BC210K-48
40 -> BC210K-49
41 -> BC210K-50
42 -> BC210K-51
43 -> BC210K-52
44 -> BC210K-53
45 -> BC210K-54
46 -> BC210K-55
47 -> Mineral Belgie
48 -> Mineral Denmark
49 -> Mineral Deutschland
50 -> Mineral Eire
51 -> Mineral Espana
52 -> Mineral France
53 -> Mineral Hellas
54 -> Mineral Italia
55 -> Mineral Luxembourg
56 -> Mineral Nederland
57 -> CMA CGM Dolomites
58 -> CMA CGM Etosha
59 -> CMA CGM Masai Mara
60 -> CMA CGM Zingaro
61 -> Boch

In [92]:
with open('../data/raw/cmb-tech-fleet.json', 'w') as f:
    json.dump(cmb_data_json, f, indent=4)

In [93]:
len(ship_technical_specs.keys())

4239

In [94]:
ship_technical_specs = ship_technical_specs | cmb_data_json

In [95]:
len(ship_technical_specs.keys())

4357

In [96]:
with open('../data/raw/ship_technical_specs.json', 'w') as f:
    json.dump(ship_technical_specs, f, indent=4)

# Add the final dataset

In [97]:
new_df = pd.read_csv('../data/raw/Cleaned_ships_data.csv')

In [98]:
new_df

Unnamed: 0,Company_Name,ship_name,built_year,gt,dwt,length,width
0,PRELUDE,Offshore Support Vessel,2017,499167,394330,489,74
1,MSC LORETO,Container Ship,2023,236184,240000,399,60
2,MSC FEBE,Container Ship,2019,232618,228149,400,62
3,MSC ARINA,Container Ship,2019,228741,228111,400,61
4,EVER GOVERN,Container Ship,2019,219688,198937,400,59
...,...,...,...,...,...,...,...
195,HONORSHIP,Bulk Carrier,2010,90423,180242,289,45
196,MARAN TRANSPORTER,Bulk Carrier,2010,90136,179719,292,45
197,MARAN ASTRONOMER,Bulk Carrier,2012,90104,179719,292,45
198,WINNING LOYALTY,Bulk Carrier,2004,90091,180310,289,45


In [109]:
query = f"""
    WITH latest_versions AS (
        SELECT CAST(year AS INTEGER) AS year, MAX(CAST(version AS INTEGER)) AS latest_version
        FROM "{DATABASE}"."{TABLE}"
        GROUP BY CAST(year AS INTEGER)
    ),

    latest_data AS (
        SELECT *
        FROM "{DATABASE}"."{TABLE}" se
        JOIN latest_versions lv
        ON CAST(se.year AS INT) = lv.year
        AND CAST(se.version AS INT) = lv.latest_version
    )
    
    SELECT DISTINCT(imo_number), name FROM latest_data;
"""
athena_ship_name = wr.athena.read_sql_query(query, database=DATABASE, boto3_session=my_session)

In [118]:
athena_ship_name.head()

Unnamed: 0,imo_number,name
0,7128332,SEA WIND
1,7305253,CORSICA VICTORIA
2,7310507,FIDELITY
3,7360605,MOBY VINCENT
4,7360617,SARDINIA VERA


In [117]:
athena_ship_name.shape

(24738, 2)

In [125]:
new_df

Unnamed: 0,Company_Name,ship_name,built_year,gt,dwt,length,width
0,PRELUDE,Offshore Support Vessel,2017,499167,394330,489,74
1,MSC LORETO,Container Ship,2023,236184,240000,399,60
2,MSC FEBE,Container Ship,2019,232618,228149,400,62
3,MSC ARINA,Container Ship,2019,228741,228111,400,61
4,EVER GOVERN,Container Ship,2019,219688,198937,400,59
...,...,...,...,...,...,...,...
195,HONORSHIP,Bulk Carrier,2010,90423,180242,289,45
196,MARAN TRANSPORTER,Bulk Carrier,2010,90136,179719,292,45
197,MARAN ASTRONOMER,Bulk Carrier,2012,90104,179719,292,45
198,WINNING LOYALTY,Bulk Carrier,2004,90091,180310,289,45


In [133]:
data_dict = {}
unmatched_names = []

for index, row in new_df.iterrows():
    print(row['Company_Name'], row.to_json(orient='index'))
    
    if row['Company_Name'] in athena_ship_name['name'].to_list():
        imo_number = athena_ship_name[athena_ship_name['name'] == row['Company_Name']]['imo_number'].values[0]
        data_dict[imo_number] = row.to_json(orient='index')
    else:
        unmatched_names.append(row['Company_Name'])
    print()

PRELUDE {"Company_Name":"PRELUDE","ship_name":"Offshore Support Vessel","built_year":2017,"gt":499167,"dwt":394330,"length":489,"width":74}

MSC LORETO {"Company_Name":"MSC LORETO","ship_name":"Container Ship","built_year":2023,"gt":236184,"dwt":240000,"length":399,"width":60}

MSC FEBE {"Company_Name":"MSC FEBE","ship_name":"Container Ship","built_year":2019,"gt":232618,"dwt":228149,"length":400,"width":62}

MSC ARINA {"Company_Name":"MSC ARINA","ship_name":"Container Ship","built_year":2019,"gt":228741,"dwt":228111,"length":400,"width":61}

EVER GOVERN {"Company_Name":"EVER GOVERN","ship_name":"Container Ship","built_year":2019,"gt":219688,"dwt":198937,"length":400,"width":59}

MAASTRICHT MAERSK {"Company_Name":"MAASTRICHT MAERSK","ship_name":"Container Ship","built_year":2019,"gt":214286,"dwt":190326,"length":399,"width":59}

ONE TREASURE {"Company_Name":"ONE TREASURE","ship_name":"Container Ship","built_year":2018,"gt":210691,"dwt":189766,"length":400,"width":58}

ORE SHENZHEN {"Co

In [138]:
len(ship_technical_specs.keys())

4357

In [137]:
ship_technical_specs = data_dict | ship_technical_specs

{9810654: '{"Company_Name":"PRELUDE","ship_name":"Offshore Support Vessel","built_year":2017,"gt":499167,"dwt":394330,"length":489,"width":74}',
 9934735: '{"Company_Name":"MSC LORETO","ship_name":"Container Ship","built_year":2023,"gt":236184,"dwt":240000,"length":399,"width":60}',
 9839478: '{"Company_Name":"MSC FEBE","ship_name":"Container Ship","built_year":2019,"gt":232618,"dwt":228149,"length":400,"width":62}',
 9839284: '{"Company_Name":"MSC ARINA","ship_name":"Container Ship","built_year":2019,"gt":228741,"dwt":228111,"length":400,"width":61}',
 9832717: '{"Company_Name":"EVER GOVERN","ship_name":"Container Ship","built_year":2019,"gt":219688,"dwt":198937,"length":400,"width":59}',
 9780483: '{"Company_Name":"MAASTRICHT MAERSK","ship_name":"Container Ship","built_year":2019,"gt":214286,"dwt":190326,"length":399,"width":59}',
 9773222: '{"Company_Name":"ONE TREASURE","ship_name":"Container Ship","built_year":2018,"gt":210691,"dwt":189766,"length":400,"width":58}',
 9708851: '{"C

In [None]:
with open('../data/raw/ship_technical_specs.json', 'w') as f:
    json.dump(ship_technical_specs, f, indent=4)