From 9d627146c059696e1b4ddb7f9ae0a18a3ea8f5ab Mon Sep 17 00:00:00 2001 From: Hidetaka Okamoto Date: Tue, 11 Mar 2025 14:33:22 +0900 Subject: [PATCH 1/2] update packagejson --- package.json | 20 +++++++++++--- packages/core/package.json | 25 ++++++++++++++--- packages/langchain-stripe-loader/package.json | 27 ++++++++++++++++--- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/package.json b/package.json index ca609f7..6e16bdb 100644 --- a/package.json +++ b/package.json @@ -13,10 +13,24 @@ "format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md}\"", "format:check": "prettier --check \"**/*.{js,jsx,ts,tsx,json,md}\"" }, - "keywords": [], + "keywords": [ + "stripe", + "documentation", + "loader", + "langchain", + "sitemap" + ], "author": "hideokamoto", - "license": "ISC", - "description": "", + "license": "MIT", + "description": "A collection of utility libraries for easily retrieving and processing Stripe data", + "repository": { + "type": "git", + "url": "https://github.com/wpkyoto/stripe-docs-loader" + }, + "bugs": { + "url": "https://github.com/wpkyoto/stripe-docs-loader/issues" + }, + "homepage": "https://github.com/wpkyoto/stripe-docs-loader#readme", "devDependencies": { "@types/node": "^22.13.10", "prettier": "^3.2.5", diff --git a/packages/core/package.json b/packages/core/package.json index 59e172f..a04141e 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -18,10 +18,29 @@ "scripts": { "dev": "vite build --watch", "build": "vite build && tsc --emitDeclarationOnly", - "test": "vitest run" + "test": "vitest run", + "prepublishOnly": "npm run build" }, - "keywords": [], + "keywords": [ + "stripe", + "documentation", + "sitemap", + "processor", + "utility" + ], "author": "hideokamoto", "license": "MIT", - "description": "Core library for Stripe loaders" + "description": "Core library for Stripe loaders", + "repository": { + "type": "git", + "url": "https://github.com/wpkyoto/stripe-docs-loader", + "directory": "packages/core" + }, + "bugs": { + "url": "https://github.com/wpkyoto/stripe-docs-loader/issues" + }, + "homepage": "https://github.com/wpkyoto/stripe-docs-loader/tree/main/packages/core#readme", + "publishConfig": { + "access": "public" + } } diff --git a/packages/langchain-stripe-loader/package.json b/packages/langchain-stripe-loader/package.json index 115216d..fcad51c 100644 --- a/packages/langchain-stripe-loader/package.json +++ b/packages/langchain-stripe-loader/package.json @@ -18,17 +18,38 @@ "scripts": { "dev": "vite build --watch", "build": "vite build && tsc --emitDeclarationOnly", - "test": "vitest run" + "test": "vitest run", + "prepublishOnly": "npm run build" }, "dependencies": { "@langchain/core": "^0.3.42", - "stripe-loaders-core": "*", + "stripe-loaders-core": "^0.0.0", "turndown": "^7.2.0" }, - "keywords": [], + "keywords": [ + "stripe", + "documentation", + "loader", + "langchain", + "llm", + "ai", + "document-loader" + ], "author": "hideokamoto", "license": "MIT", "description": "LangChain loader for Stripe data", + "repository": { + "type": "git", + "url": "https://github.com/wpkyoto/stripe-docs-loader", + "directory": "packages/langchain-stripe-loader" + }, + "bugs": { + "url": "https://github.com/wpkyoto/stripe-docs-loader/issues" + }, + "homepage": "https://github.com/wpkyoto/stripe-docs-loader/tree/main/packages/langchain-stripe-loader#readme", + "publishConfig": { + "access": "public" + }, "devDependencies": { "@types/turndown": "^5.0.5" } From bc9f8465824a301e64ec69dfd6dc17d361678f06 Mon Sep 17 00:00:00 2001 From: Hidetaka Okamoto Date: Tue, 11 Mar 2025 15:06:33 +0900 Subject: [PATCH 2/2] fix: Replace Turndown with node-html-markdown for Node.js compatibility --- packages/langchain-stripe-loader/package.json | 7 ++----- packages/langchain-stripe-loader/src/StripeComLoader.ts | 9 ++++++--- packages/langchain-stripe-loader/src/StripeDocsLoader.ts | 9 ++++++--- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/packages/langchain-stripe-loader/package.json b/packages/langchain-stripe-loader/package.json index fcad51c..46b5756 100644 --- a/packages/langchain-stripe-loader/package.json +++ b/packages/langchain-stripe-loader/package.json @@ -23,8 +23,8 @@ }, "dependencies": { "@langchain/core": "^0.3.42", - "stripe-loaders-core": "^0.0.0", - "turndown": "^7.2.0" + "node-html-markdown": "^1.3.0", + "stripe-loaders-core": "^0.0.0" }, "keywords": [ "stripe", @@ -49,8 +49,5 @@ "homepage": "https://github.com/wpkyoto/stripe-docs-loader/tree/main/packages/langchain-stripe-loader#readme", "publishConfig": { "access": "public" - }, - "devDependencies": { - "@types/turndown": "^5.0.5" } } diff --git a/packages/langchain-stripe-loader/src/StripeComLoader.ts b/packages/langchain-stripe-loader/src/StripeComLoader.ts index e93bc8a..f2f2953 100644 --- a/packages/langchain-stripe-loader/src/StripeComLoader.ts +++ b/packages/langchain-stripe-loader/src/StripeComLoader.ts @@ -1,7 +1,7 @@ import { SitemapProcessor } from 'stripe-loaders-core'; import { BaseDocumentLoader } from '@langchain/core/document_loaders/base'; import { Document } from '@langchain/core/documents'; -import Turndown from 'turndown'; +import { NodeHtmlMarkdown } from 'node-html-markdown'; import { extractBodyFromHTML } from './utils'; /** @@ -95,9 +95,12 @@ export class StripeComDocumentLoader extends BaseDocumentLoader { const articles = urls ? await this.fetchArticlesFromURLs(urls, locale) : await this.fetchArticlesFromSitemap(resource, locale); - const encoder = new Turndown(); + + // NodeHtmlMarkdownを使用してHTMLをMarkdownに変換 + const nhm = new NodeHtmlMarkdown(); + const documents = articles.map(article => { - const markdownContent = encoder.turndown(article.content); + const markdownContent = nhm.translate(article.content); return new Document({ pageContent: markdownContent, metadata: { diff --git a/packages/langchain-stripe-loader/src/StripeDocsLoader.ts b/packages/langchain-stripe-loader/src/StripeDocsLoader.ts index d1527c7..f9833b9 100644 --- a/packages/langchain-stripe-loader/src/StripeDocsLoader.ts +++ b/packages/langchain-stripe-loader/src/StripeDocsLoader.ts @@ -1,7 +1,7 @@ import { SitemapProcessor } from 'stripe-loaders-core'; import { BaseDocumentLoader } from '@langchain/core/document_loaders/base'; import { Document } from '@langchain/core/documents'; -import Turndown from 'turndown'; +import { NodeHtmlMarkdown } from 'node-html-markdown'; import { extractArticleFromHTML } from './utils'; /** * Interface representing a Stripe documentation article @@ -61,9 +61,12 @@ export class StripeDocsDocumentLoader extends BaseDocumentLoader { */ async load(locale: string = 'en-US'): Promise { const articles = await this.fetchArticlesFromSitemap(locale); - const encoder = new Turndown(); + + // NodeHtmlMarkdownを使用してHTMLをMarkdownに変換 + const nhm = new NodeHtmlMarkdown(); + const documents = articles.map(article => { - const markdownContent = encoder.turndown(article.content); + const markdownContent = nhm.translate(article.content); return new Document({ pageContent: markdownContent, metadata: {