In [2]:
import boto3

s3 = boto3.client(
    "s3", 
    endpoint_url = "http://localhost:9000",
    aws_access_key_id = "minioadmin",
    aws_secret_access_key = "minioadmin", 
    region_name = "us-east-1",
)

In [3]:
s3.list_buckets()

{'ResponseMetadata': {'RequestId': '188969E2FE7D0464',
  'HostId': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '275',
   'content-type': 'application/xml',
   'server': 'MinIO AIStor',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'vary': 'Origin, Accept-Encoding',
   'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
   'x-amz-request-id': '188969E2FE7D0464',
   'x-content-type-options': 'nosniff',
   'x-ratelimit-limit': '220',
   'x-ratelimit-remaining': '220',
   'x-xss-protection': '1; mode=block',
   'date': 'Sat, 10 Jan 2026 15:59:12 GMT'},
  'RetryAttempts': 0},
 'Buckets': [],
 'Owner': {'DisplayName': 'minio',
  'ID': '02d6176db174dc93cb1b899f7c6078f08654445fe8cf1b6ce98d8855f66bdbf4'}}

# Upload files 

In [9]:
# Create the bucket 
bucket = "market-data"

existing = [b["Name"] for b in s3.list_buckets().get("Buckets", [])]
print("Existing buckets:", existing)

if bucket not in existing:
    s3.create_bucket(Bucket=bucket)
    print("Bucket created:", bucket)
else:
    print("Bucket already exists")


Existing buckets: ['market-data']
Bucket already exists


In [None]:
# upload just one file 
local_file = r"data\symbol=SPY\year=2020\month=01.parquet"
s3_key = "symbol=SPY/year=2020/month=01.parquet"

s3.upload_file(local_file, bucket, s3_key)

print("Uploaded!")


Uploaded!


In [None]:
# upload whole folder 
from pathlib import Path

bucket = "market-data"
base_dir = Path("data")   # VERY IMPORTANT: start from data/

for file in base_dir.rglob("*.parquet"):
    s3_key = file.relative_to(base_dir).as_posix()  # keep folders, fix slashes
    s3.upload_file(str(file), bucket, s3_key)
    print("Uploaded:", s3_key)


Uploaded: symbol=SPY/year=2020/month=01.parquet
Uploaded: symbol=SPY/year=2020/month=02.parquet
Uploaded: symbol=SPY/year=2020/month=03.parquet
Uploaded: symbol=SPY/year=2020/month=04.parquet
Uploaded: symbol=SPY/year=2020/month=05.parquet
Uploaded: symbol=SPY/year=2020/month=06.parquet
Uploaded: symbol=SPY/year=2020/month=07.parquet
Uploaded: symbol=SPY/year=2020/month=08.parquet
Uploaded: symbol=SPY/year=2020/month=09.parquet
Uploaded: symbol=SPY/year=2020/month=10.parquet
Uploaded: symbol=SPY/year=2020/month=11.parquet
Uploaded: symbol=SPY/year=2020/month=12.parquet
Uploaded: symbol=SPY/year=2021/month=01.parquet
Uploaded: symbol=SPY/year=2021/month=02.parquet
Uploaded: symbol=SPY/year=2021/month=03.parquet
Uploaded: symbol=SPY/year=2021/month=04.parquet
Uploaded: symbol=SPY/year=2021/month=05.parquet
Uploaded: symbol=SPY/year=2021/month=06.parquet
Uploaded: symbol=SPY/year=2021/month=07.parquet
Uploaded: symbol=SPY/year=2021/month=08.parquet
Uploaded: symbol=SPY/year=2021/month=09.

In [14]:
# delete all from the buckets 

bucket = "market-data"

resp = s3.list_objects_v2(Bucket=bucket)

if "Contents" in resp:
    for obj in resp["Contents"]:
        s3.delete_object(Bucket=bucket, Key=obj["Key"])
        print("Deleted: ", obj["Key"])

Deleted:  symbol=SPY/year=2020/month=01.parquet
Deleted:  ticks/month=01.parquet
Deleted:  ticks/month=02.parquet
Deleted:  ticks/month=03.parquet
Deleted:  ticks/month=04.parquet
Deleted:  ticks/month=05.parquet
Deleted:  ticks/month=06.parquet
Deleted:  ticks/month=07.parquet
Deleted:  ticks/month=08.parquet
Deleted:  ticks/month=09.parquet
Deleted:  ticks/month=10.parquet
Deleted:  ticks/month=11.parquet
Deleted:  ticks/month=12.parquet


# Verify upload with Polars

In [21]:
resp = s3.list_objects_v2(Bucket="market-data", Prefix="symbol=SPY/")
print([o["Key"] for o in resp.get("Contents", [])][:5])


['symbol=SPY/year=2020/month=01.parquet', 'symbol=SPY/year=2020/month=02.parquet', 'symbol=SPY/year=2020/month=03.parquet', 'symbol=SPY/year=2020/month=04.parquet', 'symbol=SPY/year=2020/month=05.parquet']


In [24]:
import polars as pl

df = pl.scan_parquet(
    "s3://market-data/symbol=SPY/**/*.parquet",
    storage_options={
        "aws_access_key_id": "minioadmin",
        "aws_secret_access_key": "minioadmin",
        "endpoint_url": "http://localhost:9000",
    },
)

print(df.select(pl.len()).collect())


shape: (1, 1)
┌─────────┐
│ len     │
│ ---     │
│ u32     │
╞═════════╡
│ 1238488 │
└─────────┘


In [27]:
df.head()

In [28]:
df.select(pl.len()).collect()


len
u32
1238488


In [31]:
df.schema

  df.schema


Schema([('symbol', String),
        ('timestamp', Datetime(time_unit='ns', time_zone='UTC')),
        ('open', Float64),
        ('high', Float64),
        ('low', Float64),
        ('close', Float64),
        ('volume', Float64),
        ('trade_count', Float64),
        ('vwap', Float64)])

In [None]:
df.select(pl.all()).limit(5).collect()


symbol,timestamp,open,high,low,close,volume,trade_count,vwap
str,"datetime[ns, UTC]",f64,f64,f64,f64,f64,f64,f64
"""SPY""",2020-01-02 17:15:00 UTC,323.07,323.1,323.02,323.08,91603.0,917.0,323.044572
"""SPY""",2020-01-16 00:38:00 UTC,328.76,328.76,328.75,328.75,1125.0,4.0,328.759091
"""SPY""",2020-01-06 12:56:00 UTC,321.26,321.26,321.23,321.26,3711.0,11.0,321.237671
"""SPY""",2020-01-09 14:24:00 UTC,326.1,326.13,326.1,326.13,1741.0,19.0,326.122601
"""SPY""",2020-01-08 09:22:00 UTC,322.22,322.22,322.22,322.22,301.0,2.0,322.22
