In [None]:
# DEBUG: Cek apakah ada informasi position gambar di XLSX
import zipfile
from io import BytesIO
import xml.etree.ElementTree as ET

xlsx_url = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=xlsx"
response = requests.get(xlsx_url)
xlsx_bytes = BytesIO(response.content)

with zipfile.ZipFile(xlsx_bytes, 'r') as zip_ref:
    # Cek xl/drawings/drawing1.xml untuk info posisi gambar
    drawing_files = [f for f in zip_ref.namelist() if 'drawing' in f and f.endswith('.xml')]
    print(f"Drawing files: {drawing_files}\n")
    
    if drawing_files:
        # Baca drawing file pertama
        drawing_xml = zip_ref.read(drawing_files[0]).decode('utf-8')
        print("Sample drawing XML:")
        print(drawing_xml[:1000])
    
    # Cek xl/worksheets/sheet1.xml
    sheet_file = 'xl/worksheets/sheet1.xml'
    if sheet_file in zip_ref.namelist():
        sheet_xml = zip_ref.read(sheet_file).decode('utf-8')
        # Cari reference ke gambar
        if 'drawing' in sheet_xml:
            print("\n‚úÖ Sheet contains drawing references!")
            print(sheet_xml[sheet_xml.find('drawing'):sheet_xml.find('drawing')+200])

: 

In [None]:
# SOLUTION: Parse drawing XML untuk dapat row number tiap image!
import zipfile
from io import BytesIO
import xml.etree.ElementTree as ET
import re
import time

xlsx_url = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=xlsx"

# Retry logic untuk network error
max_retries = 3
for attempt in range(max_retries):
    try:
        print(f"üì• Downloading XLSX (attempt {attempt + 1}/{max_retries})...")
        response = requests.get(xlsx_url, timeout=30)
        xlsx_bytes = BytesIO(response.content)
        print("‚úÖ XLSX downloaded!")
        break
    except Exception as e:
        print(f"‚ùå Error: {e}")
        if attempt < max_retries - 1:
            print("‚è≥ Retrying in 2 seconds...")
            time.sleep(2)
        else:
            raise

# Map: image filename -> row number
image_to_row = {}

with zipfile.ZipFile(xlsx_bytes, 'r') as zip_ref:
    drawing_files = [f for f in zip_ref.namelist() if 'drawing' in f and f.endswith('.xml')]
    
    for drawing_file in drawing_files:
        drawing_xml = zip_ref.read(drawing_file).decode('utf-8')
        
        # Parse XML
        root = ET.fromstring(drawing_xml)
        
        # Namespace
        ns = {
            'xdr': 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing',
            'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'
        }
        
        # Cari semua anchor (posisi gambar)
        for anchor in root.findall('.//xdr:oneCellAnchor', ns) + root.findall('.//xdr:twoCellAnchor', ns):
            # Ambil row position
            from_elem = anchor.find('.//xdr:from', ns)
            if from_elem is not None:
                row_elem = from_elem.find('.//xdr:row', ns)
                if row_elem is not None:
                    row_num = int(row_elem.text)  # 0-indexed
                    
                    # Ambil nama image
                    pic = anchor.find('.//xdr:pic', ns)
                    if pic is not None:
                        cnvpr = pic.find('.//xdr:nvPicPr/xdr:cNvPr', ns)
                        if cnvpr is not None:
                            img_name = cnvpr.get('name')
                            if img_name:
                                image_to_row[img_name] = row_num

print(f"\n‚úÖ Mapped {len(image_to_row)} images to rows")
print(f"\nSample mapping (first 10):")
for i, (img, row) in enumerate(list(image_to_row.items())[:10]):
    print(f"  {img} ‚Üí Row {row + 1} (spreadsheet)")
    
print(f"\n... and {len(image_to_row) - 10} more images")

In [None]:
# RESET: Delete dan download ulang semua images
import shutil

# Delete folder
if os.path.exists(IMAGE_FOLDER):
    shutil.rmtree(IMAGE_FOLDER)
    print(f"üóëÔ∏è Deleted {IMAGE_FOLDER}")

# Recreate folder
os.makedirs(IMAGE_FOLDER, exist_ok=True)
print(f"üìÅ Created fresh {IMAGE_FOLDER}")

# Download ulang
print("\nüì• Re-downloading images...")
response = requests.get(xlsx_url, timeout=30)
xlsx_bytes = BytesIO(response.content)

with zipfile.ZipFile(xlsx_bytes, 'r') as zip_ref:
    media_files = [f for f in zip_ref.namelist() if f.startswith('xl/media/')]
    
    for media_file in media_files:
        img_data = zip_ref.read(media_file)
        img_name = os.path.basename(media_file)
        save_path = os.path.join(IMAGE_FOLDER, img_name)
        
        with open(save_path, 'wb') as f:
            f.write(img_data)
    
    print(f"‚úÖ Downloaded {len(media_files)} images with original names")

In [None]:
# NOW RENAME BERDASARKAN MAPPING YANG BENER!
import shutil

print("üîÑ Renaming images based on actual row positions...\n")

renamed_count = 0
for img_name, row_idx in image_to_row.items():
    # row_idx dari XML adalah 0-indexed, row 0 = header, row 1 = data pertama
    # Tapi di df, row 0 = data pertama (karena pd.read_csv skip header)
    # Jadi: row_idx - 1 = index di df
    
    df_idx = row_idx - 1
    
    if df_idx >= 0 and df_idx < len(df):
        row_data = df.iloc[df_idx]
        new_name = str(row_data['Nama File (tambah .jpg)']).strip()
        
        if new_name and new_name != 'nan':
            old_path = os.path.join(IMAGE_FOLDER, img_name)
            new_path = os.path.join(IMAGE_FOLDER, new_name)
            
            if os.path.exists(old_path):
                # Handle duplicate names
                if os.path.exists(new_path) and old_path != new_path:
                    base, ext = os.path.splitext(new_name)
                    new_name = f"{base}_{df_idx+1}{ext}"
                    new_path = os.path.join(IMAGE_FOLDER, new_name)
                
                if old_path != new_path:
                    shutil.move(old_path, new_path)
                    print(f"Row {row_idx+1}: {img_name} ‚Üí {new_name}")
                    renamed_count += 1

print(f"\n‚úÖ Renamed {renamed_count} images correctly!")

In [None]:
# CHECK: Verify hasil rename dengan preview
import glob
from IPython.display import Image, display

print("üîç Checking renamed images:\n")
print("=" * 80)

# Ambil beberapa sample dari df
sample_rows = [0, 1, 2, 10, 20]  # Row 1, 2, 3, 11, 21 di spreadsheet

for idx in sample_rows:
    if idx < len(df):
        row_data = df.iloc[idx]
        nama_makanan = row_data['Nama Makanan']
        nama_file = row_data['Nama File (tambah .jpg)']
        harga = row_data['Harga']
        kalori = row_data['Kalori (kkal)']
        
        img_path = os.path.join(IMAGE_FOLDER, nama_file)
        
        print(f"\nüìã Row {idx + 1} di Spreadsheet:")
        print(f"   Nama: {nama_makanan}")
        print(f"   File: {nama_file}")
        print(f"   Harga: {harga} | Kalori: {kalori}")
        print(f"   Exists: {'‚úÖ Yes' if os.path.exists(img_path) else '‚ùå NO'}")
        
        if os.path.exists(img_path):
            try:
                display(Image(filename=img_path, width=250))
            except:
                print("   (Cannot display image)")
        
        print("-" * 80)

In [None]:
# DEBUG: Cek mapping dan file yang ada
print("üîç DEBUG INFO:\n")

# 1. Cek berapa images yang ada di folder
actual_files = os.listdir(IMAGE_FOLDER)
print(f"üìÅ Files in {IMAGE_FOLDER}: {len(actual_files)} files")
print(f"   Sample: {actual_files[:5]}\n")

# 2. Cek mapping dari XML
print(f"üó∫Ô∏è Mapping from XML: {len(image_to_row)} mappings")
print("   Sample mappings:")
for i, (img, row) in enumerate(list(image_to_row.items())[:5]):
    print(f"   {img} ‚Üí Row {row + 1} (spreadsheet) ‚Üí df.iloc[{row - 1}]")
print()

# 3. Cek beberapa sample apakah file exist
print("üîé Checking first 5 mappings:")
for i, (img_name, row_idx) in enumerate(list(image_to_row.items())[:5]):
    df_idx = row_idx - 1
    if df_idx >= 0 and df_idx < len(df):
        expected_name = df.iloc[df_idx]['Nama File (tambah .jpg)']
        old_exists = os.path.exists(os.path.join(IMAGE_FOLDER, img_name))
        new_exists = os.path.exists(os.path.join(IMAGE_FOLDER, expected_name))
        
        print(f"\n   {i+1}. {img_name} (Row {row_idx + 1})")
        print(f"      Should be renamed to: {expected_name}")
        print(f"      Old name exists: {old_exists}")
        print(f"      New name exists: {new_exists}")