In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy import units as u
from astropy import constants as const

# 2.1.1 


df = pd.read_csv("solar_system.csv")

print("Shape before transpose:", df.shape)

df = df.set_index("Attribute").T
df.index.name = "Planet"
df.reset_index(inplace=True)
df.columns.name = None

print("Shape after transpose:", df.shape)
print("Columns:", df.columns.tolist())

unit_cols = [c for c in df.columns if "(" in c]
non_unit_cols = [c for c in df.columns if "(" not in c]
print("Columns with units:", unit_cols)
print("Columns without units:", non_unit_cols)

# 2.1.2 & 2.1.3 

for col in df.columns:
    if col not in ["Planet", "Ring System?", "Global Magnetic Field?"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

print("\nData types after conversion:")
print(df.dtypes)

# 2.2

def attach_units(col_name, unit, new_col_name=None):
    """
    Safely attach Astropy units to each value in a column using .apply().
    """
    if new_col_name is None:
        new_col_name = col_name

    df[col_name] = pd.to_numeric(df[col_name], errors="coerce")

    df[new_col_name] = df[col_name].apply(lambda x: x * unit)

attach_units("Mass (10^24kg)", 1e24 * u.kg, "Mass (kg)")

print("\nMass column values (kg):")
print(df["Mass (kg)"].apply(lambda x: x.value))
print("Units of Mass column:", df["Mass (kg)"].iloc[0].unit)

# 2.3.1

df["Semi-Major Axis (10^6 km)"] = df.apply(
    lambda row: ((row["Perihelion (10^6 km)"] + row["Aphelion (10^6 km)"]) / 2) * u.Mm, axis=1
)

print("\nSemi-Major Axis column (values only):")
print(df["Semi-Major Axis (10^6 km)"].apply(lambda x: x.value))
print("Units:", df["Semi-Major Axis (10^6 km)"].iloc[0].unit)

# 2.3.2 

df["Orbital Period (years)"] = df["Orbital Period (days)"].apply(
    lambda x: (x * u.day).to(u.year)
)
df.drop(columns=["Orbital Period (days)"], inplace=True)

print("\nOrbital period in years (values only):")
print(df["Orbital Period (years)"].apply(lambda x: x.value))
print("Units:", df["Orbital Period (years)"].iloc[0].unit)

planet = "Earth"
val = df.loc[df["Planet"] == planet, "Orbital Period (years)"].iloc[0]
print(f"{planet}'s orbital period in years = {val.value:.4f}")

# 2.3.3 

dist_cols = [c for c in df.columns if "(km)" in c]

for col in dist_cols:
    new_col = col.replace("(km)", "(AU)")
    df[new_col] = df[col].apply(lambda x: (x * u.km).to(u.au))
    df.drop(columns=[col], inplace=True)

print("\nDistance columns in AU for Earth:")
for col in df.columns:
    if "(AU)" in col:
        print(col, "=", df.loc[df["Planet"] == "Earth", col].iloc[0].value)
        print("Units:", df.loc[df["Planet"] == "Earth", col].iloc[0].unit)

# 2.4 

df.to_csv("units.csv", index=False)
print("\nSaved cleaned DataFrame as units.csv successfully!")



Shape before transpose: (20, 11)
Shape after transpose: (10, 21)
Columns: ['Planet', 'Mass (10^24kg)', 'Diameter (km)', 'Density (kg/m^3)', 'Gravity (m/s^2)', 'Escape Velocity (km/s)', 'Rotation Period (hours)', 'Length of Day (hours)', 'Distance from Sun (10^6 km)', 'Perihelion (10^6 km)', 'Aphelion (10^6 km)', 'Orbital Period (days)', 'Orbital Velocity (km/s)', 'Orbital Inclination (deg)', 'Orbital Eccentricity', 'Obliquity to Orbit (deg)', 'Mean Temperature (C)', 'Surface Pressure (bars)', 'Number of Moons', 'Ring System?', 'Global Magnetic Field?']
Columns with units: ['Mass (10^24kg)', 'Diameter (km)', 'Density (kg/m^3)', 'Gravity (m/s^2)', 'Escape Velocity (km/s)', 'Rotation Period (hours)', 'Length of Day (hours)', 'Distance from Sun (10^6 km)', 'Perihelion (10^6 km)', 'Aphelion (10^6 km)', 'Orbital Period (days)', 'Orbital Velocity (km/s)', 'Orbital Inclination (deg)', 'Obliquity to Orbit (deg)', 'Mean Temperature (C)', 'Surface Pressure (bars)']
Columns without units: ['Planet