### Categorize ocmal file by line number

In [10]:
import os

# Set folder path and extension to match ('.md', '.ml', etc.)
folder_path = "./100"
file_ext = ".ml"  # Change to '.md' if you're using Markdown

# Define categories
categories = {
    '🟢 Tiny (<100)': [],
    '🟡 Small (100–499)': [],
    '🟠 Medium (500–999)': [],
    '🔵 Large (1000–2999)': [],
    '🟣 Huge (3000+)': []
}

# Walk through all files (recursively)
for root, _, files in os.walk(folder_path):
    for fname in files:
        if fname.endswith(file_ext):
            fpath = os.path.join(root, fname)
            try:
                with open(fpath, 'r', encoding='utf-8') as f:
                    n_lines = sum(1 for _ in f)
            except Exception as e:
                print(f"Could not read {fpath}: {e}")
                continue

            rel_path = os.path.relpath(fpath, folder_path)
            label = f"{n_lines} lines - {rel_path}"

            if n_lines < 100:
                categories['🟢 Tiny (<100)'].append(label)
            elif n_lines < 500:
                categories['🟡 Small (100–499)'].append(label)
            elif n_lines < 1000:
                categories['🟠 Medium (500–999)'].append(label)
            elif n_lines < 3000:
                categories['🔵 Large (1000–2999)'].append(label)
            else:
                categories['🟣 Huge (3000+)'].append(label)

# Print results
print("\n=== File Counts by Category ===")
for cat, files in categories.items():
    total_lines = sum(int(f.split()[0]) for f in files)
    print(f"{cat}: {len(files)} files, {total_lines} total lines")
print("\n=== Detailed File List ===")
for cat, files in categories.items():
    print(f"\n## {cat}")
    for f in sorted(files, key=lambda x: int(x.split()[0])):
        print(f"  - {f}")



=== File Counts by Category ===
🟢 Tiny (<100): 16 files, 945 total lines
🟡 Small (100–499): 27 files, 7263 total lines
🟠 Medium (500–999): 9 files, 6720 total lines
🔵 Large (1000–2999): 6 files, 13497 total lines
🟣 Huge (3000+): 6 files, 45459 total lines

=== Detailed File List ===

## 🟢 Tiny (<100)
  - 13 lines - arithmetic.ml
  - 24 lines - combinations.ml
  - 27 lines - div3.ml
  - 30 lines - pythagoras.ml
  - 41 lines - gcd.ml
  - 41 lines - sqrt.ml
  - 42 lines - heron.ml
  - 65 lines - chords.ml
  - 68 lines - ratcountable.ml
  - 69 lines - ptolemy.ml
  - 70 lines - triangular.ml
  - 78 lines - divharmonic.ml
  - 87 lines - thales.ml
  - 95 lines - arithmetic_geometric_mean.ml
  - 97 lines - cantor.ml
  - 98 lines - cubic.ml

## 🟡 Small (100–499)
  - 131 lines - subsequence.ml
  - 136 lines - circle.ml
  - 178 lines - fta.ml
  - 185 lines - wilson.ml
  - 200 lines - lhopital.ml
  - 202 lines - quartic.ml
  - 210 lines - cosine.ml
  - 211 lines - primerecip.ml
  - 213 lines - fe