In [27]:
%%shell
# Debian busterを追加
cat > /etc/apt/sources.list.d/debian.list <<'EOF'
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF
# キーを追加
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A
apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg

# DebianのレポジトリからChromiumパッケージを追加
cat > /etc/apt/preferences.d/chromium.pref << 'EOF'
Package: *
Pin: release a=eoan
Pin-Priority: 500

Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300

Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF

# ChromiumとChromium driverをインストール
apt-get update
apt-get install chromium
pip install chromium-driver==90.*

# Seleniumをインストール。
pip install selenium
# 日本語フォントをインストール
apt-get -y install fonts-ipafont-gothic

Executing: /tmp/apt-key-gpghome.GndmMkW6KD/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
gpg: key DCC9EFBF77E11517: "Debian Stable Release Key (10/buster) <debian-release@lists.debian.org>" not changed
gpg: Total number processed: 1
gpg:              unchanged: 1
Executing: /tmp/apt-key-gpghome.eP7sspBO25/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
gpg: key DC30D7C23CBBABEE: "Debian Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" not changed
gpg: Total number processed: 1
gpg:              unchanged: 1
Executing: /tmp/apt-key-gpghome.PUbOvSSUlh/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A
gpg: key 4DFAB270CAA96DFA: "Debian Security Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" not changed
gpg: Total number processed: 1
gpg:              unchanged: 1
gpg: cannot open '/dev/tty': No such device or address
gpg: [stdout]: write error: Broken pipe
gpg: filter_flush failed on c



In [28]:
import re
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
import pandas as pd

In [29]:
def select_page(n):
  tag = '#ctl00_phContents_ucGrid_grv > tbody:nth-child(1) > tr:nth-child(32) > td:nth-child(1) > table:nth-child(1) > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(' + str(n) + ') > a:nth-child(1)'
  click_element = driver.find_element(By.CSS_SELECTOR,tag)
  click_element.click()
  time.sleep(1)

def syllabus_list():
  content = []
  a_tags = driver.find_elements(By.TAG_NAME, "a")
  for a_tag in a_tags:
    url = a_tag.get_attribute("href")
    if re.search(r'^https', str(url)):
        content.append([a_tag.text,url])
  df = pd.DataFrame(content,columns=["授業名","URL"])
  return df

In [30]:
# selenium の初期設定
options = Options()
# Google Colab の場合は、次の2行のコメントアウトを外す (ブラウザが表示されない)
options.add_argument('--headless') # ブラウザを表示しない場合は、指定サイズの画像が保存可能
options.add_argument('--no-sandbox')

# URLで指定されたWebページをブラウザで表示
driver = webdriver.Chrome(options=options)
driver.set_window_size(1280,2000)
driver.get('http://syllabus.saitama-u.ac.jp/portal/public/syllabus/')
time.sleep(1) # 1秒スリープ

# ドロップダウンで条件を設定
dropdown_1 = driver.find_element(By.CSS_SELECTOR, '#ctl00_phContents_ddl_fac') # 開講学部
select_1 = Select(dropdown_1)
select_1.select_by_visible_text('理工学研究科博士前期課程')
time.sleep(1) # 1秒スリープ

# 「検索」をクリック
search_button = driver.find_element(By.CSS_SELECTOR, "#ctl00_phContents_ctl06_btnSearch")
search_button.click()
time.sleep(1) # 1秒スリープ

df = syllabus_list()
for i in range(2,12):
  select_page(i)
  df = pd.concat([df,syllabus_list()], axis=0,ignore_index=True)
for i in range(3,13):
  select_page(i)
  df = pd.concat([df,syllabus_list()], axis=0,ignore_index=True)
for i in range(6,12):
  select_page(i)
  df = pd.concat([df,syllabus_list()], axis=0,ignore_index=True)

df.to_csv("./syllabus.csv",index=False,encoding='utf-8')



# selenium のドライバを修了
driver.quit()