## Create sitemap.xml

In [49]:
import os
import re
import datetime

# Defined in app.py:
dict_website = {
    'base_url':'https://www.abcxyz.com/',
    'lang':'nl',
}

def create_sitemap_xml(dict_website,folder_build='build',priority='1.0',change_frequency='',print_xml=False):
    # Get all pages and correct URL's:
    ps = [os.path.join(path, name) for (path, subdirs, files) in os.walk(folder_build) for name in files if name.endswith('index.html')]
    ps = [x.replace(folder_build,dict_website['base_url']) for x in ps]
    ps = [re.sub('(?<!\:)//','/',x) for x in ps]
    ps = [re.sub('index.html$','',x) for x in ps]
    # Create XML content:
    last_modification_date = datetime.date.today()
    xml_content = '<?xml version="1.0" encoding="UTF-8"?>\n'
    xml_content += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
    for p in ps:
        xml_content += f'\t<url>\n'
        xml_content += f'\t\t<loc>{p}</loc>\n'
        xml_content += f'\t\t<lastmod>{last_modification_date.strftime("%Y-%m-%d")}</lastmod>\n'
        if change_frequency != '':
            xml_content += f'\t\t<changefreq>{change_frequency}</changefreq>\n'
        xml_content += f'\t\t<priority>{priority}</priority>\n'
        xml_content += f'\t</url>\n'
    xml_content += '</urlset>'
    if print_xml:
        print(xml_content)
    else:
        # Write to sitemap.xml file:
        path_sitemap = os.path.join(folder_build,'sitemap.xml')
        with open(path_sitemap,'w') as file:
            file.write(xml_content)

create_sitemap_xml(dict_website,folder_build='../build',print_xml=True)
# create_sitemap_xml(dict_website,change_frequency='daily')

<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
	<url>
		<loc>https://www.abcxyz.com/</loc>
		<lastmod>2024-09-06</lastmod>
		<priority>1.0</priority>
	</url>
	<url>
		<loc>https://www.abcxyz.com/test/</loc>
		<lastmod>2024-09-06</lastmod>
		<priority>1.0</priority>
	</url>
	<url>
		<loc>https://www.abcxyz.com/XYZ_123/</loc>
		<lastmod>2024-09-06</lastmod>
		<priority>1.0</priority>
	</url>
	<url>
		<loc>https://www.abcxyz.com/blog/</loc>
		<lastmod>2024-09-06</lastmod>
		<priority>1.0</priority>
	</url>
	<url>
		<loc>https://www.abcxyz.com/blog/testpage/</loc>
		<lastmod>2024-09-06</lastmod>
		<priority>1.0</priority>
	</url>
	<url>
		<loc>https://www.abcxyz.com/blog/testpage3/</loc>
		<lastmod>2024-09-06</lastmod>
		<priority>1.0</priority>
	</url>
</urlset>


## Create robots.txt

In [67]:
# Defined in app.py:
dict_website = {
    'base_url':'https://www.abcxyz.com/',
    'lang':'nl',
    'sitemap_images':'sitemap_images.xml',
}

def create_robots_txt(dict_website,folder_build='build',list_folder_exclude = [], print_txt=False):
    txt_content='User-agent: *\n'
    if len(list_folder_exclude)>0:
        txt_content += '\n'.join([f'Disallow: /{x}/' for x in list_folder_exclude]) + '\n\n'
    else:
        txt_content += 'Disallow:\n'
    txt_content += f"Sitemap: {dict_website['base_url']}sitemap.xml"
    if dict_website['sitemap_images'] != '':
        txt_content += f"\nSitemap: {dict_website['base_url']}{dict_website['sitemap_images']}"
    if print_txt:
        print(txt_content)
    else:
        path_robots = os.path.join(folder_build,'robots.txt')
        with open(path_robots,'w') as file:
            file.write(txt_content)

create_robots_txt(dict_website,folder_build='../build',list_folder_exclude=['secret_directory','hide_this'],print_txt=True)
# create_robots_txt(dict_website,folder_build='../build',print_txt=True)

User-agent: *
Disallow: /secret_directory/
Disallow: /hide_this/

Sitemap: https://www.abcxyz.com/sitemap.xml
Sitemap: https://www.abcxyz.com/sitemap_images.xml


## Create dict_meta_empty

In [31]:
dict_md_all_yaml = {'blog/testpage': {'xyz': 'bla','title': 'Title for test page from a Markdown file', 'subtitle': 'And some subtitle', 'category': ['Life', 'Tech']}, 'blog/instructions_commands': {'title': 'Commands and instructions', 'subtitle': 'And some subtitle for instructions', 'category': ['Tech']}, 'blog/ds-reqs': {'title': 'Data scientist requirements job posts', 'subtitle': 'Q2 and Q3 2024', 'category': ['Tech']}}

# print(dict_md_all_yaml)
# print()
list_keys_meta = list(set([k2 for k in dict_md_all_yaml for k2 in dict_md_all_yaml[k]]))
dict_meta_empty = {k:'' for k in list_keys_meta}

# print(dict_meta_empty)
# print()

'blog/instructions_commands'

# new_dict = {**old_dict, 'changed_val': value, **other_new_vals_as_dict}
dict_page_meta = {**dict_meta_empty, **dict_md_all_yaml['blog/instructions_commands'], 'new_val': 'abc','subtitle':'new, overridden subtitle'}
print(dict_page_meta)
print()
print(dict_meta_empty)

{'title': 'Commands and instructions', 'xyz': '', 'category': ['Tech'], 'subtitle': 'new, overridden subtitle', 'new_val': 'abc'}

{'title': '', 'xyz': '', 'category': '', 'subtitle': ''}
