diff --git a/package.json b/package.json index ca609f7..6e16bdb 100644 --- a/package.json +++ b/package.json @@ -13,10 +13,24 @@ "format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md}\"", "format:check": "prettier --check \"**/*.{js,jsx,ts,tsx,json,md}\"" }, - "keywords": [], + "keywords": [ + "stripe", + "documentation", + "loader", + "langchain", + "sitemap" + ], "author": "hideokamoto", - "license": "ISC", - "description": "", + "license": "MIT", + "description": "A collection of utility libraries for easily retrieving and processing Stripe data", + "repository": { + "type": "git", + "url": "https://github.com/wpkyoto/stripe-docs-loader" + }, + "bugs": { + "url": "https://github.com/wpkyoto/stripe-docs-loader/issues" + }, + "homepage": "https://github.com/wpkyoto/stripe-docs-loader#readme", "devDependencies": { "@types/node": "^22.13.10", "prettier": "^3.2.5", diff --git a/packages/core/package.json b/packages/core/package.json index 59e172f..a04141e 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -18,10 +18,29 @@ "scripts": { "dev": "vite build --watch", "build": "vite build && tsc --emitDeclarationOnly", - "test": "vitest run" + "test": "vitest run", + "prepublishOnly": "npm run build" }, - "keywords": [], + "keywords": [ + "stripe", + "documentation", + "sitemap", + "processor", + "utility" + ], "author": "hideokamoto", "license": "MIT", - "description": "Core library for Stripe loaders" + "description": "Core library for Stripe loaders", + "repository": { + "type": "git", + "url": "https://github.com/wpkyoto/stripe-docs-loader", + "directory": "packages/core" + }, + "bugs": { + "url": "https://github.com/wpkyoto/stripe-docs-loader/issues" + }, + "homepage": "https://github.com/wpkyoto/stripe-docs-loader/tree/main/packages/core#readme", + "publishConfig": { + "access": "public" + } } diff --git a/packages/langchain-stripe-loader/package.json b/packages/langchain-stripe-loader/package.json index 115216d..46b5756 100644 --- a/packages/langchain-stripe-loader/package.json +++ b/packages/langchain-stripe-loader/package.json @@ -18,18 +18,36 @@ "scripts": { "dev": "vite build --watch", "build": "vite build && tsc --emitDeclarationOnly", - "test": "vitest run" + "test": "vitest run", + "prepublishOnly": "npm run build" }, "dependencies": { "@langchain/core": "^0.3.42", - "stripe-loaders-core": "*", - "turndown": "^7.2.0" + "node-html-markdown": "^1.3.0", + "stripe-loaders-core": "^0.0.0" }, - "keywords": [], + "keywords": [ + "stripe", + "documentation", + "loader", + "langchain", + "llm", + "ai", + "document-loader" + ], "author": "hideokamoto", "license": "MIT", "description": "LangChain loader for Stripe data", - "devDependencies": { - "@types/turndown": "^5.0.5" + "repository": { + "type": "git", + "url": "https://github.com/wpkyoto/stripe-docs-loader", + "directory": "packages/langchain-stripe-loader" + }, + "bugs": { + "url": "https://github.com/wpkyoto/stripe-docs-loader/issues" + }, + "homepage": "https://github.com/wpkyoto/stripe-docs-loader/tree/main/packages/langchain-stripe-loader#readme", + "publishConfig": { + "access": "public" } } diff --git a/packages/langchain-stripe-loader/src/StripeComLoader.ts b/packages/langchain-stripe-loader/src/StripeComLoader.ts index e93bc8a..f2f2953 100644 --- a/packages/langchain-stripe-loader/src/StripeComLoader.ts +++ b/packages/langchain-stripe-loader/src/StripeComLoader.ts @@ -1,7 +1,7 @@ import { SitemapProcessor } from 'stripe-loaders-core'; import { BaseDocumentLoader } from '@langchain/core/document_loaders/base'; import { Document } from '@langchain/core/documents'; -import Turndown from 'turndown'; +import { NodeHtmlMarkdown } from 'node-html-markdown'; import { extractBodyFromHTML } from './utils'; /** @@ -95,9 +95,12 @@ export class StripeComDocumentLoader extends BaseDocumentLoader { const articles = urls ? await this.fetchArticlesFromURLs(urls, locale) : await this.fetchArticlesFromSitemap(resource, locale); - const encoder = new Turndown(); + + // NodeHtmlMarkdownを使用してHTMLをMarkdownに変換 + const nhm = new NodeHtmlMarkdown(); + const documents = articles.map(article => { - const markdownContent = encoder.turndown(article.content); + const markdownContent = nhm.translate(article.content); return new Document({ pageContent: markdownContent, metadata: { diff --git a/packages/langchain-stripe-loader/src/StripeDocsLoader.ts b/packages/langchain-stripe-loader/src/StripeDocsLoader.ts index d1527c7..f9833b9 100644 --- a/packages/langchain-stripe-loader/src/StripeDocsLoader.ts +++ b/packages/langchain-stripe-loader/src/StripeDocsLoader.ts @@ -1,7 +1,7 @@ import { SitemapProcessor } from 'stripe-loaders-core'; import { BaseDocumentLoader } from '@langchain/core/document_loaders/base'; import { Document } from '@langchain/core/documents'; -import Turndown from 'turndown'; +import { NodeHtmlMarkdown } from 'node-html-markdown'; import { extractArticleFromHTML } from './utils'; /** * Interface representing a Stripe documentation article @@ -61,9 +61,12 @@ export class StripeDocsDocumentLoader extends BaseDocumentLoader { */ async load(locale: string = 'en-US'): Promise { const articles = await this.fetchArticlesFromSitemap(locale); - const encoder = new Turndown(); + + // NodeHtmlMarkdownを使用してHTMLをMarkdownに変換 + const nhm = new NodeHtmlMarkdown(); + const documents = articles.map(article => { - const markdownContent = encoder.turndown(article.content); + const markdownContent = nhm.translate(article.content); return new Document({ pageContent: markdownContent, metadata: {