In [5]:
import requests
from bs4 import BeautifulSoup
import time
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import japanize_matplotlib
import re


In [6]:
# 1. データベースの準備
dbname = '最終課題.db'
conn = sqlite3.connect(dbname)
cur = conn.cursor()

# テーブルの初期化
cur.execute('DROP TABLE IF EXISTS properties')
cur.execute('''
    CREATE TABLE properties (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        name TEXT, 
        station TEXT, 
        price INTEGER, 
        age INTEGER, 
        floor_plan TEXT
    )
''')

# 2. スクレイピング関数の定義
def get_suumo_data():
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
    base_url = "https://suumo.jp/jj/chintai/ichiran/FR301FC001/?ar=030&bs=040&ra=008&cb=0.0&ct=9999999&et=9999999&cn=9999999&mb=0&mt=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&fw2=&ek=050026740&ek=050033920&ek=050016450&ek=050004200&ek=050032790&ek=050001460&ek=050024800&rn=0500"
    
    for page in range(1, 4):
        print(f"--- Page {page} を取得中... ---")
        url = f"{base_url}&page={page}"
        res = requests.get(url, headers=headers)
        res.encoding = 'utf-8'
        soup = BeautifulSoup(res.text, 'html.parser')
        items = soup.find_all("div", class_="cassetteitem")
        
        data_list = []
        for item in items:
            try:
                title_elem = item.find("div", class_="cassetteitem_content-title")
                name = title_elem.text.strip() if title_elem else "不明"
                
                station_elem = item.find("div", class_="cassetteitem_detail-col1")
                station = station_elem.text.strip() if station_elem else "不明"
                
                tbody = item.find("table", class_="cassetteitem_other")
                if tbody:
                    for tr in tbody.find("tbody").find_all("tr"):
                        try:
                            price_elem = tr.find_all("td")[3].find("li")
                            if price_elem:
                                price_text = price_elem.text.strip()
                                price = int(float(price_text.replace("万円", "")) * 10000)
                                floor_plan = tr.find_all("td")[2].text.strip()
                                data_list.append((name, station, price, 0, floor_plan))
                        except:
                            continue
            except Exception as e:
                print(f"物件スキップ: {e}")
                continue
                    
        if data_list:
            cur.executemany("INSERT INTO properties (name, station, price, age, floor_plan) VALUES (?, ?, ?, ?, ?)", data_list)
            conn.commit()
            print(f" -> {len(data_list)} 件保存しました")
        
        time.sleep(3) # 負荷対策

# 3. スクレイピングの実行
get_suumo_data()
print("完了！ 最終課題.db に保存しました。")

# 4. 接続を閉じる
conn.close()

--- Page 1 を取得中... ---
 -> 30 件保存しました
--- Page 2 を取得中... ---
 -> 30 件保存しました
--- Page 3 を取得中... ---
 -> 30 件保存しました
完了！ 最終課題.db に保存しました。
