### 1. セットアップ
時間がかかる場合がありますが、ブラウザを閉じずにお待ちください。

In [None]:
# Node.jsのインストール
!dpkg --configure -a > /dev/null
!sudo apt-get update > /dev/null
!sudo apt-get install -y ca-certificates curl gnupg > /dev/null
!sudo mkdir -p /etc/apt/keyrings
!curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
!NODE_MAJOR=18 && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
!sudo apt-get update > /dev/null
!sudo apt-get install nodejs -y > /dev/null

# 必要なライブラリのインストール
!pip install jinja2 > /dev/null
!pip install openpyxl > /dev/null
!pip install pandas > /dev/null

from datetime import datetime
import hashlib
import json
import os
import re
from urllib.parse import urlparse

from google.colab import files
from jinja2 import Template, FileSystemLoader, Environment
import openpyxl
import pandas as pd

###### 関数 ######

def get_datetime():
    return datetime.now().strftime('%Y%m%d%H%M%S')

##################

###### 定数 ######

env = Environment(loader=FileSystemLoader('.'))
JDCAT_ELEMENTS = ["Title", "Study ID", "Author", "Distributor", "URI", "Topic", "Summary", "Time Period(s)", "Geographic Coverage", "Data Type", "Access", "License", "DOI", "Data Language", "Version", "Unit of Analysis", "Universe / Population", "Sampling Procedure", "Collection method", "Funding Agency", "Grant ID", "Alternative title", "Series", "Date of collection", "Sampling Rate", "Bibliographic Citation", "Datafile URI", "Related Studies", "Related Publications", "Publisher", "Provider"]

##################

!rm -rf /content/dakit2-minimal
!git clone https://github.com/adachi-a/dakit2-minimal
os.chdir("/content/dakit2-minimal")
!npm install > /dev/null

In [None]:
# @title 2. 設定
サイトのタイトル = "DAKit Test Site" # @param {type:"string"}
dakit_site_title = サイトのタイトル
サイトの作成者 = "adachi-a" # @param {type:"string"}
dakit_site_author = サイトの作成者
サイトのURL = "https://adachi-a.github.io/dakit2/" # @param {type:"string"}
dakit_site_url = サイトのURL
検索エンジンにインデックスさせない = True # @param {type:"boolean"}
dakit_noindex = 検索エンジンにインデックスさせない
カラーテーマ = "secondary" # @param {type:"string"}
dakit_bs_theme_bg = カラーテーマ

if dakit_site_title == "":
    raise ValueError("dakit_site_titleを空白にすることはできません。")
if dakit_site_author == "":
    raise ValueError("dakit_site_authorを空白にすることはできません。")
if dakit_site_url == "":
    raise ValueError("dakit_site_urlを空白にすることはできません。")

if not dakit_site_url.endswith("/"):
    dakit_site_url += "/" # add trailing slash

base_url_parse_result = urlparse(dakit_site_url)

# vite.config.js
with open("./vite.config.js", "w") as f:
    f.write(f"""
import {{ defineConfig }} from 'vite';
import {{ svelte }} from '@sveltejs/vite-plugin-svelte';

// https://vitejs.dev/config/
export default defineConfig({{
  plugins: [svelte()],
  base: '{base_url_parse_result.path}',
}});
    """)

 ### 3. メタデータの読み込み
 - 読み込ませるExcelブックは、メタデータを記載した`data`と画像情報を記載した`images`の2つのシートだけを持つ必要があります。
 - `data`シートの一番左の列は`id`とし、列内で重複のないようにします（検索用であり資料のメタデータとは異なります）。

In [None]:
uploaded = files.upload()
metadata_filename = next(iter(uploaded))
book = openpyxl.load_workbook(metadata_filename)

if book.sheetnames != ["data", "images"]:
    raise ValueError('Excelブックは"data"と”images"の2つのシートだけを持つようにします。')

catalog = pd.read_excel(metadata_filename, sheet_name="data").fillna("")
image_list = pd.read_excel(metadata_filename, sheet_name="images").fillna("")

if catalog.columns[0] != "id":
    raise ValueError('dataの一番左の列は常に"id"とする必要があります。')

os.makedirs("./public", exist_ok=True)
catalog.to_json("./public/data.json", orient="records", force_ascii=False)

# catalogの列一覧を取得し、JDCatメタデータスキーマとのマッピング用のExcelファイルを返す
catalog_columns = pd.DataFrame({"列名": catalog.columns, "要素": ["" for _ in range(len(catalog.columns))]}).T
catalog_columns_datetime = get_datetime()
catalog_columns.to_excel(f"mapping_{catalog_columns_datetime}.xlsx", index=False)
files.download(f"mapping_{catalog_columns_datetime}.xlsx")

### 4. メタデータのマッピング
上のセルでダウンロードしたExcelブックに、JDCatメタデータスキーマの要素を記入してアップロードします。

In [None]:
uploaded = files.upload()
mapping_filename = next(iter(uploaded))

# JDCatメタデータスキーマとのマッピング用のExcelファイルの読み込み
map_df = pd.read_excel(mapping_filename, header=0, index_col=None).fillna("").T

map_dict = {}

for el in JDCAT_ELEMENTS:
    map_dict[el] = ""

for _, element in map_df.iterrows():
    if element[1] != "" and element[1] in JDCAT_ELEMENTS:
        map_dict[element[1]] = element[0]

In [None]:
# @title 5. 検索ページの設定
資料のIDの列名 = "資料番号" # @param {type:"string"}
dakit_item_id_column_name = 資料のIDの列名
資料のタイトルの列名 = "タイトル" # @param {type:"string"}
dakit_item_title_column_name = 資料のタイトルの列名

# fuse_keys = ... 検索対象列名
fuse_keys: list = ['タイトル', '別タイトル']

# ソートの設定
itemsjs_sortings = {
      "資料番号(昇順)": {
        "field": '資料番号',
        "order": 'asc',
      },
      "資料番号(降順)": {
        "field": '資料番号',
        "order": 'desc',
      },
}

# 絞り込みの設定
itemsjs_aggregations = {
      "内容分類": {
        "title": '内容分類',
        "conjunction": False,
      },
      "資料分類": {
        "title": '資料分類',
        "conjunction": False,
      },
      "印刷分類": {
        "title": '印刷分類',
        "conjunction": False,
      }
}

# 検索ページの表示方法の設定
dakit_display_keys_string: str = ",".join(catalog.columns.to_list())
dakit_display_template_string: str = """
<a href="./items/{資料番号}.html" target="_blank"><h5 class="mb-0">{タイトル}</h5></a>
<p class="mb-1"><small>{別タイトル} {内容分類} {資料分類} {印刷分類} {年月日} {サイズ} {備考}</small></p>
"""

#dakit_fuse_keys.json
with open("./src/dakit_fuse_keys.json","w") as f:
    json.dump(fuse_keys, f, indent=2, ensure_ascii=False)

#dakit_itemsjs_configuration.json
itemsjs_configuration: dict = {
    "sortings": itemsjs_sortings,
    "aggregations": itemsjs_aggregations,
    "removeStopWordFilter": True,
    "native_search_enabled": False,
    "custom_id_field": 'id'
}

with open("./src/dakit_itemsjs_configuration.json","w") as f:
    json.dump(itemsjs_configuration, f, indent=2, ensure_ascii=False)

template = env.get_template('./kit/Search.svelte.jinja')

with open("./src/Search.svelte", "w") as f:
    f.write(template.render(
        dakit_site_title=dakit_site_title,
        dakit_site_url=dakit_site_url,
        dakit_noindex=dakit_noindex,
        dakit_display_keys_string=dakit_display_keys_string,
        dakit_display_template_string=dakit_display_template_string,
    ))

template = env.get_template('./kit/index.html.jinja')

with open("./index.html", "w") as f:
    f.write(template.render(
        dakit_site_title=dakit_site_title
    ))

template = env.get_template('./kit/App.svelte.jinja')

with open("./src/App.svelte", "w") as f:
    f.write(template.render(
        dakit_site_title=dakit_site_title,
        dakit_noindex=dakit_noindex,
        dakit_bs_theme_bg=dakit_bs_theme_bg
    ))

### 6. 検索ページ群の生成

In [None]:
!npm run build

### 7. 個別ページ群の生成

In [None]:
template_image = env.get_template('./kit/item.html.jinja')
template_no_image = env.get_template('./kit/item_no_image.html.jinja')
items_with_image = set(image_list["id"].to_list())
os.makedirs("./dist/items/images", exist_ok=True)

for item in catalog.iterrows():
    item_id = item[1][dakit_item_id_column_name]
    item_title = item[1][dakit_item_title_column_name]
    item_metadata = item[1].to_dict()
    if item_id not in items_with_image:
        template = template_no_image
        with open(f"./dist/items/{item_id}.html", "w") as f:
            f.write(template.render(
                dakit_site_title=dakit_site_title,
                dakit_site_url=dakit_site_url,
                dakit_noindex=dakit_noindex,
                dakit_bs_theme_bg=dakit_bs_theme_bg,
                item_title=item_title,
                item_metadata_filename=f"../jpcoar/{item_id}.xml",
                item_metadata=item_metadata
            ))
    else:
        template = template_image
        item_image_list = image_list[image_list["id"] == item_id]["path"].to_list()
        item_image_array = [{"src": x} for x in item_image_list]
        with open(f"./dist/items/{item_id}.html", "w") as f:
            f.write(template.render(
                dakit_site_title=dakit_site_title,
                dakit_site_url=dakit_site_url,
                dakit_noindex=dakit_noindex,
                dakit_bs_theme_bg=dakit_bs_theme_bg,
                item_title=item_title,
                item_metadata_filename=f"../jpcoar/{item_id}.xml",
                item_metadata=item_metadata,
                item_image_list=item_image_list,
                item_image_array=item_image_array
            ))

### 8. メタデータファイルの生成

In [None]:
os.makedirs("./dist/jpcoar", exist_ok=True)
for _, item in catalog.iterrows():

    row = {}

    for el in JDCAT_ELEMENTS:
        if map_dict[el]:
            row[el] = item[map_dict[el]]
        else:
            row[el] = ""

    template =f"""<jpcoar:jpcoar xmlns:datacite="https://schema.datacite.org/meta/kernel-4/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcndl="http://ndl.go.jp/dcndl/terms/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:jpcoar="https://github.com/JPCOAR/schema/blob/master/1.0/" xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rioxxterms="http://www.rioxx.net/schema/v2.0/rioxxterms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://github.com/JPCOAR/schema/blob/master/1.0/jpcoar_scm.xsd">
        <dc:title xml:lang="ja">{row["Title"]}</dc:title>
        <jpcoar:relatedIdentifier identifierType="Local">{row["Study ID"]}</jpcoar:relatedIdentifier>
        <jpcoar:creator><jpcoar:creatorName>{row["Author"]}</jpcoar:creatorName></jpcoar:creator>
        <jpcoar:contributor contributorType="Distributor">
            <jpcoar:contributorName>
            {row["Distributor"]}
            </jpcoar:contributorName>
        </jpcoar:contributor>
        <jpcoar:identifier identifierType="URI">
            {row["URI"]}
        </jpcoar:identifier>
        <jpcoar:subject subjectScheme="Other">
            {row["Topic"]}
        </jpcoar:subject>
        <datacite:description descriptionType="Abstract">
            {row["Summary"]}
        </datacite:description>
        <dcterms:temporal>
        {row["Time Period(s)"]}
        </dcterms:temporal>
        <datacite:geoLocationPlace>
        {row["Geographic Coverage"]}
        </datacite:geoLocationPlace>
        <datacite:description descriptionType="Other">
        {row["Data Type"]}
        </datacite:description>
        <dcterms:accessRights>
        {row["Access"]}
        </dcterms:accessRights>
        <dc:rights>
        {row["License"]}
        </dc:rights>
        <jpcoar:relatedIdentifier>
        {row["DOI"]}
        </jpcoar:relatedIdentifier>
        <dc:language>
        {row["Data Language"]}
        </dc:language>
        <datacite:version>
        {row["Version"]}
        </datacite:version>
        <datacite:description descriptionType="Methods">
        {row["Unit of Analysis"]}
        </datacite:description>
        <datacite:description descriptionType="Methods">
        {row["Universe / Population"]}
        </datacite:description>
        <datacite:description descriptionType="Methods">
        {row["Sampling Procedure"]}
        </datacite:description>
        <datacite:description descriptionType="Methods">
        {row["Collection method"]}
        </datacite:description>
        <jpcoar:funderName>
        {row["Funding Agency"]}
        </jpcoar:funderName>
        <datacite:awardNumber>
        {row["Grant ID"]}
        </datacite:awardNumber>
        <dcterms:alternative>
        {row["Alternative title"]}
        </dcterms:alternative>
        <datacite:description descriptionType="Methods">
        {row["Date of collection"]}
        </datacite:description>
        <datacite:description descriptionType="Methods">
        {row["Sampling Rate"]}
        </datacite:description>
        <jpcoar:URI>
        {row["Datafile URI"]}
        </jpcoar:URI>
        <jpcoar:relation relationType="isSupplementadBy">
        {row["Related Studies"]}
        </jpcoar:relation>
        <jpcoar:relation relationType="isReferencedBy">
        {row["Related Publications"]}
        </jpcoar:relation>
        <dc:publisher>
        {row["Publisher"]}
        </dc:publisher>
        <jpcoar:contributor contributorType="Others">
        {row["Provider"]}
        </jpcoar:contributor>
      </jpcoar:jpcoar>"""

    with open(f"""./dist/jpcoar/{row["Study ID"]}.xml""", mode="w", encoding="utf-8") as f:
        f.write(template)

### 9. ResourceListの生成

In [None]:
def file_md5(file_path, size=4096):
    m = hashlib.md5()
    with open(file_path, 'rb') as f:
        for chunk in iter(lambda: f.read(size * m.block_size), b''):
            m.update(chunk)
    return m.hexdigest()

file_time = datetime.utcnow()
now = f"""{str(file_time.year).zfill(4)}-{str(file_time.month).zfill(2)}-{str(file_time.day).zfill(2)}T{str(file_time.hour).zfill(2)}:{str(file_time.minute).zfill(2)}:{str(file_time.second).zfill(2)}Z"""

pref = """<?xml version='1.0' encoding='UTF-8'?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">
<rs:md at="{}" capability="resourcelist" completed="{} " />""".format(now, now)
directory = "./dist/jpcoar/"
dir_name = dakit_site_url[0:-1]
files_ = os.listdir(directory)

for f in files_:
    directory = "./dist/jpcoar/"
    path = directory + f
    filename = f
    hs = file_md5(path)
    change_time = datetime.utcfromtimestamp(os.path.getmtime(path))
    lastmod = now = f"""{str(change_time.year).zfill(4)}-{str(change_time.month).zfill(2)}-{str(change_time.day).zfill(2)}T{str(change_time.hour).zfill(2)}:{str(change_time.minute).zfill(2)}:{str(change_time.second).zfill(2)}Z"""
    length = os.path.getsize(path)
    template = f"""
    <url>
	    <loc>{dir_name}/jpcoar/{filename}</loc>
	    <lastmod>{lastmod}</lastmod>
	    <rs:md hash="md5:{hs}" length="{length}" type="application/xml" />
      <rs:ln rel="describes" href="{dir_name}/jpcoar/{filename}" />
      <rs:ln rel="profile" href="https://github.com/JPCOAR/schema/blob/master/1.0/"/>
    </url>"""
    pref = pref + template

pref = pref + "\n</urlset>"

with open("./dist/ResourceList.xml", mode="w", encoding="utf-8") as f:
    f.write(pref)

### 10. ダウンロード

In [None]:
!npx html-beautify --preserve-newlines false ./dist/*/*.html
!touch ./dist/.nojekyll
!zip -r /content/dakit2_output.zip /content/dakit2-minimal/dist
files.download("/content/dakit2_output.zip")