From 5003c686d04aaee1a32b4bf132d01843059f44ca Mon Sep 17 00:00:00 2001 From: Jason Maurer Date: Tue, 29 May 2018 13:00:51 -0400 Subject: [PATCH] generate a sitemap as well as a robots.txt --- src/cmds/manifest.js | 3 ++- src/core/hydrate.js | 11 ++++++++++- src/core/index.js | 10 ++++++++-- src/core/output.js | 11 +++++++++-- src/core/robots.js | 13 +++++++++++++ src/core/robots.spec.js | 12 ++++++++++++ src/core/server.js | 12 +++++++++++- src/core/sitemap.js | 38 ++++++++++++++++++++++++++++++++++++++ src/core/sitemap.spec.js | 12 ++++++++++++ src/utils/config.js | 2 ++ 10 files changed, 117 insertions(+), 7 deletions(-) create mode 100644 src/core/robots.js create mode 100644 src/core/robots.spec.js create mode 100644 src/core/sitemap.js create mode 100644 src/core/sitemap.spec.js diff --git a/src/cmds/manifest.js b/src/cmds/manifest.js index a61e7d6..dfdd9d1 100644 --- a/src/cmds/manifest.js +++ b/src/cmds/manifest.js @@ -3,7 +3,8 @@ const { hydrateTree } = require('../core/hydrate') module.exports = async (args, config) => { const tree = await dirTree(config.root) - const manifest = await hydrateTree(tree, config) + const { manifest } = await hydrateTree(tree, config) + const manifestStr = JSON.stringify(manifest, null, 2) console.log(manifestStr) diff --git a/src/core/hydrate.js b/src/core/hydrate.js index 3134c2e..6104766 100644 --- a/src/core/hydrate.js +++ b/src/core/hydrate.js @@ -4,6 +4,7 @@ const ourpath = require('../utils/path') const { getFrontmatterOnly } = require('../utils/frontmatter') const { mergeLeftByKey } = require('../utils/merge') const { walkSource } = require('./source') +const Sitemap = require('./sitemap') async function getMetaData (item, parentItems) { const data = item.type === 'file' @@ -49,6 +50,7 @@ function normalizeItems (data) { async function hydrateTree (tree, config, onRegenerate) { const urls = {} + const sitemap = new Sitemap() if (tree.childrenIndex === undefined) { throw new Error('No index file was found! Create a `readme.md` at the root of your project.') @@ -104,6 +106,10 @@ async function hydrateTree (tree, config, onRegenerate) { // url is now taken, like most women urls[hydratedItem.url] = hoistedItem.path + // add url to the sitemap + const fullUrl = `${config.domain}${hydratedItem.url}` + sitemap.addUrl(fullUrl, metaData.sitemap) + hydratedItem.input = metaData.input || hoistedItem.path hydratedItem.outputDir = syspath.join( config.output, @@ -156,7 +162,10 @@ async function hydrateTree (tree, config, onRegenerate) { return hydratedItem } - return _recursive(tree) + return { + manifest: await _recursive(tree), + sitemap: sitemap.generate(), + } } // async function hydrateContent (manifest) { diff --git a/src/core/index.js b/src/core/index.js index 4bacd3b..2975e15 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -2,6 +2,7 @@ const loadSyntax = require('./syntax') const staticAssets = require('./static') const { dirTree } = require('./filesystem') const { hydrateTree } = require('./hydrate') +const { getRobotsTxt } = require('./robots') const { getCompiler } = require('./compiler') const { log } = require('../utils/emit') @@ -14,12 +15,17 @@ module.exports = async (env, localConfig) => { // generate and hydrate the manifest const tree = await dirTree(localConfig.root) - const manifest = await hydrateTree(tree, localConfig) + const hydrated = await hydrateTree(tree, localConfig) log('Generated and hydrated manifest') // this gets passed to the theme - const props = { manifest, config: localConfig } + const props = { + config: localConfig, + manifest: hydrated.manifest, + sitemap: hydrated.sitemap, + robots: getRobotsTxt(localConfig), + } // setup webpack compiler so we can build (or watch) const compiler = await getCompiler(env, props) diff --git a/src/core/output.js b/src/core/output.js index 6c37250..a2e33f5 100644 --- a/src/core/output.js +++ b/src/core/output.js @@ -1,14 +1,21 @@ const fs = require('fs-extra') const syspath = require('path') +const { warn } = require('../utils/emit') const { generateDatabase } = require('./database') const { templateForProduction } = require('./template') const { getContent, getTableOfContents } = require('./filesystem') module.exports = async (entrypoints, props) => { - const db = await generateDatabase(props.manifest) const outputDB = syspath.join(props.config.output, 'db.json') + const outputSitemap = syspath.join(props.config.output, 'sitemap.xml') + const outputRobots = syspath.join(props.config.output, 'robots.txt') - await fs.outputJson(outputDB, db) + await fs.outputJson(outputDB, await generateDatabase(props.manifest)) + await fs.outputFile(outputSitemap, props.sitemap) + + await fs.pathExists(outputRobots) + ? warn('You have a custom robots.txt file, so one was not generated for you!') + : await fs.outputFile(outputRobots, props.robots) const _recursive = async ({ items, ...item }) => { if (item.outputDir) { diff --git a/src/core/robots.js b/src/core/robots.js new file mode 100644 index 0000000..96d6a2c --- /dev/null +++ b/src/core/robots.js @@ -0,0 +1,13 @@ +function getRobotsTxt (config = {}) { + const lines = [ + 'User-agent: *', + `Sitemap: ${config.domain || ''}/sitemap.xml`, + `Disallow: ${config.crawlable ? '' : '/'}`, + ] + + return lines.join('\n') +} + +module.exports = { + getRobotsTxt, +} diff --git a/src/core/robots.spec.js b/src/core/robots.spec.js new file mode 100644 index 0000000..8126297 --- /dev/null +++ b/src/core/robots.spec.js @@ -0,0 +1,12 @@ +const { expect } = require('code') +const robots = require('./robots') + +describe('unit: core/robots', () => { + it('getRobotsTxt()', async () => { + expect(robots.getRobotsTxt()).to.equal('User-agent: *\nSitemap: /sitemap.xml\nDisallow: /') + expect(robots.getRobotsTxt({ + domain: 'https://foo.bar', + crawlable: true, + })).to.equal('User-agent: *\nSitemap: https://foo.bar/sitemap.xml\nDisallow: ') + }) +}) diff --git a/src/core/server.js b/src/core/server.js index df477c5..7ecab29 100644 --- a/src/core/server.js +++ b/src/core/server.js @@ -35,11 +35,21 @@ module.exports = (props, compiler) => { index: false, })) + app.use('/robots.txt', (req, res, next) => { + res.setHeader('Content-Type', 'text/plain; charset=utf-8') + res.end(props.robots) + }) + + app.use('/sitemap.xml', (req, res, next) => { + res.setHeader('Content-Type', 'application/xml; charset=utf-8') + res.end(props.sitemap) + }) + app.use('/db.json', async (req, res, next) => { try { const db = await generateDatabase(props.manifest) - res.setHeader('Content-Type', 'application/json') + res.setHeader('Content-Type', 'application/json; charset=utf-8') res.end(JSON.stringify(db)) } catch (err) { next(err) diff --git a/src/core/sitemap.js b/src/core/sitemap.js new file mode 100644 index 0000000..9741038 --- /dev/null +++ b/src/core/sitemap.js @@ -0,0 +1,38 @@ +const { minify } = require('html-minifier') + +class Sitemap { + constructor () { + this.urls = [] + } + + addUrl (url, data = {}) { + const merged = Object.assign({}, { + loc: url, + priority: 0.5, + changefreq: null, + lastmod: null, + }, data) + + this.urls.push(merged) + } + + generate () { + return minify(` + + + ${this.urls.map(data => ` + + ${data.loc ? `${data.loc}` : ''} + ${data.lastmod ? `${data.lastmod}` : ''} + ${data.changefreq ? `${data.changefreq}` : ''} + ${data.priority ? `${data.priority}` : ''} + + `).join('\n')} + + `, { + collapseWhitespace: true, + }) + } +} + +module.exports = Sitemap diff --git a/src/core/sitemap.spec.js b/src/core/sitemap.spec.js new file mode 100644 index 0000000..f0ac7d4 --- /dev/null +++ b/src/core/sitemap.spec.js @@ -0,0 +1,12 @@ +const { expect } = require('code') +const Sitemap = require('./sitemap') + +describe('unit: core/sitemap', () => { + it('Sitemap()', async () => { + const sitemap = new Sitemap() + sitemap.addUrl('/foo') + sitemap.addUrl('/bar', { priority: 1 }) + sitemap.addUrl('/baz', { changefreq: 'daily' }) + expect(sitemap.generate()).to.equal(' /foo0.5/bar1/bazdaily0.5') + }) +}) diff --git a/src/utils/config.js b/src/utils/config.js index 5fdce4d..9516f7f 100644 --- a/src/utils/config.js +++ b/src/utils/config.js @@ -21,6 +21,8 @@ const DEFAULT_CONFIG = { static: '.static', temp: tempDir(), baseURL: '/', + domain: '', + crawlable: true, host: 'localhost', port: 8000, languages: ['bash', 'json'],