Skip to content

Commit

Permalink
wiki: Fix Test Wikitech's mobile domain and regenerate
Browse files Browse the repository at this point in the history
The regeneration also:
* Added 1 new wiki (tlywiki)
* Changed the language codes of 13 projects from their non-standard
  language subdomains to their ISO 693 codes

Bug: T344080
  • Loading branch information
nshahquinn committed Sep 6, 2023
1 parent 884a0f8 commit d055204
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 58 deletions.
81 changes: 37 additions & 44 deletions wiki/generate.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -31,15 +31,15 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/nshahquinn-wmf/.conda/envs/2023-06-27T21.59.54_nshahquinn-wmf/lib/python3.10/site-packages/pandas/io/sql.py:761: UserWarning: pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy\n",
" warnings.warn(\n"
"/srv/home/nshahquinn-wmf/wmfdata-python/wmfdata/mariadb.py:142: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
" result = pd.read_sql_query(\n"
]
}
],
Expand All @@ -59,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 23,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -105,7 +105,7 @@
"Index: []"
]
},
"execution_count": 4,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -117,7 +117,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -139,14 +139,14 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"# These are the mobile URL templates from the production wgMobileUrlTemplate config\n",
"# variable. As of Aug 2023, the variable is defined at\n",
"# variable. As of Aug 2023, the variable is defined in\n",
"# https://gerrit.wikimedia.org/r/plugins/gitiles/operations/mediawiki-config\n",
"# /+/refs/heads/master/wmf-config/InitialiseSettings.php#6903\n",
"# /+/refs/heads/master/wmf-config/InitialiseSettings.php\n",
"#\n",
"# The templates here are kept in the same format and the same order as in the source\n",
"# variable, for ease of comparison.\n",
Expand All @@ -156,25 +156,29 @@
" \"sourceswiki\": \"m.{h0}.{h1}\",\n",
" \"wikidatawiki\": \"m.{h1}.{h2}\",\n",
" \"wikifunctionswiki\": \"m.{h1}.{h2}\",\n",
" # Wikitech (database code `labswiki`) currently uses the same domain for mobile and\n",
" # desktop. When it is moved to the production cluster (T237773), it will probably \n",
" # start using the default URL template so that Varnish can cache it properly.\n",
" \"labswiki\": \"{h0}.{h1}.{h2}\"\n",
" # \"wikitech\" is a database group that contains Wikitech (database code \"labswiki\") \n",
" # and Test Wikitech (database code \"labtestwiki\")\n",
" \"labswiki\": \"\",\n",
" \"labtestwiki\": \"\"\n",
"}\n",
"\n",
"def derive_mobile_domain(row):\n",
" split_domain = row[\"domain_name\"].split(\".\")\n",
" \n",
" h_parts = {}\n",
" # Assign the parts of the split domain to the names (\"h0\", \"h1\", etc.) used in the templates\n",
" for i, h in enumerate(split_domain):\n",
" h_parts[f\"h{i}\"] = h\n",
" for i, p in enumerate(split_domain):\n",
" h_parts[f\"h{i}\"] = p\n",
" \n",
" for db_code, template in other_templates.items():\n",
" # The 'name' is the row's index\n",
" if row.name == db_code:\n",
" return template.format(**h_parts)\n",
" \n",
" if template:\n",
" return template.format(**h_parts)\n",
" # An empty template means the desktop URL is used unmodified\n",
" else:\n",
" return \".\".join(h_parts.values())\n",
" \n",
" return default_template.format(**h_parts)\n",
"\n",
"wikis[\"mobile_domain_name\"] = wikis.apply(derive_mobile_domain, axis=1)"
Expand All @@ -189,7 +193,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -237,25 +241,16 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'als',\n",
" 'btm',\n",
" 'diq',\n",
" 'fiu-vro',\n",
" 'map-bms',\n",
" 'nah',\n",
" 'pih',\n",
" 'simple',\n",
" 'szy',\n",
" 'tay'}"
"{'btm', 'diq', 'map-bms', 'nah', 'pih', 'simple', 'szy', 'tay'}"
]
},
"execution_count": 13,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -268,16 +263,14 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"# Add missing language names. Remove manual additions once no longer needed. \n",
"extra_langs = {\n",
" \"als\": \"Alsatian\",\n",
" \"btm\": \"Mandailing\",\n",
" \"diq\": \"Zazaki\",\n",
" \"fiu-vro\": \"Võro\",\n",
" \"map-bms\": \"Banyumasan\",\n",
" \"nah\": \"Nahuatl\",\n",
" \"pih\": \"Norfuk-Pitkern\",\n",
Expand All @@ -299,7 +292,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -326,7 +319,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -353,7 +346,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -362,7 +355,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 32,
"metadata": {},
"outputs": [
{
Expand All @@ -380,7 +373,7 @@
" 'yuewiktionary']"
]
},
"execution_count": 18,
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -405,7 +398,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -426,7 +419,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -443,7 +436,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -467,7 +460,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
29 changes: 15 additions & 14 deletions wiki/wikis.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ afwiktionary af.wiktionary.org wiktionary af af.m.wiktionary.org Afrikaans open
akwiki ak.wikipedia.org wikipedia ak ak.m.wikipedia.org Akan closed public public Akan Wikipedia
akwikibooks ak.wikibooks.org wikibooks ak ak.m.wikibooks.org Akan closed public public Akan Wikibooks
akwiktionary ak.wiktionary.org wiktionary ak ak.m.wiktionary.org Akan closed public public Akan Wiktionary
alswiki als.wikipedia.org wikipedia als als.m.wikipedia.org Alsatian open public public Alemannisch Wikipedia
alswiki als.wikipedia.org wikipedia gsw als.m.wikipedia.org Alemannic open public public Alemannisch Wikipedia
altwiki alt.wikipedia.org wikipedia alt alt.m.wikipedia.org Southern Altai open public public Altai Wikipedia
amiwiki ami.wikipedia.org wikipedia ami ami.m.wikipedia.org Amis open public public Amis Wikipedia
amwiki am.wikipedia.org wikipedia am am.m.wikipedia.org Amharic open public public Amharic Wikipedia
Expand Down Expand Up @@ -76,14 +76,14 @@ azwiktionary az.wiktionary.org wiktionary az az.m.wiktionary.org Azerbaijani ope
banwiki ban.wikipedia.org wikipedia ban ban.m.wikipedia.org Balinese open public public Balinese Wikipedia
banwikisource ban.wikisource.org wikisource ban ban.m.wikisource.org Balinese open public public Balinese Wikisource
barwiki bar.wikipedia.org wikipedia bar bar.m.wikipedia.org Bavarian open public public Bavarian Wikipedia
bat_smgwiki bat-smg.wikipedia.org wikipedia bat-smg bat-smg.m.wikipedia.org Samogitian open public public Samogitian Wikipedia
bat_smgwiki bat-smg.wikipedia.org wikipedia sgs bat-smg.m.wikipedia.org Samogitian open public public Samogitian Wikipedia
bawiki ba.wikipedia.org wikipedia ba ba.m.wikipedia.org Bashkir open public public Bashkir Wikipedia
bawikibooks ba.wikibooks.org wikibooks ba ba.m.wikibooks.org Bashkir open public public Bashkir Wikibooks
bclwiki bcl.wikipedia.org wikipedia bcl bcl.m.wikipedia.org Central Bikol open public public Bikol Central Wikipedia
bclwikiquote bcl.wikiquote.org wikiquote bcl bcl.m.wikiquote.org Central Bikol open public public Central Bikol Wikiquote
bclwiktionary bcl.wiktionary.org wiktionary bcl bcl.m.wiktionary.org Central Bikol open public public Central Bikol Wiktionary
bdwikimedia bd.wikimedia.org bdwikimedia en bd.m.wikimedia.org English open public public Wikimedia Bangladesh
be_x_oldwiki be-tarask.wikipedia.org wikipedia be-x-old be-tarask.m.wikipedia.org Belarusian (Taraškievica orthography) open public public Belarusian (Taraškievica) Wikipedia
be_x_oldwiki be-tarask.wikipedia.org wikipedia be-tarask be-tarask.m.wikipedia.org Belarusian (Taraškievica orthography) open public public Belarusian (Taraškievica) Wikipedia
betawikiversity beta.wikiversity.org betawikiversity en beta.m.wikiversity.org English open public public Wikiversity Beta
bewiki be.wikipedia.org wikipedia be be.m.wikipedia.org Belarusian open public public Belarusian Wikipedia
bewikibooks be.wikibooks.org wikibooks be be.m.wikibooks.org Belarusian open public public Belarusian Wikibooks
Expand Down Expand Up @@ -271,7 +271,7 @@ fawikivoyage fa.wikivoyage.org wikivoyage fa fa.m.wikivoyage.org Persian open pu
fawiktionary fa.wiktionary.org wiktionary fa fa.m.wiktionary.org Persian open public public Persian Wiktionary
fdcwiki fdc.wikimedia.org fdc en fdc.m.wikimedia.org English open private private Wikimedia FDC
ffwiki ff.wikipedia.org wikipedia ff ff.m.wikipedia.org Fula open public public Fulah Wikipedia
fiu_vrowiki fiu-vro.wikipedia.org wikipedia fiu-vro fiu-vro.m.wikipedia.org Võro open public public Võro Wikipedia
fiu_vrowiki fiu-vro.wikipedia.org wikipedia vro fiu-vro.m.wikipedia.org Võro open public public Võro Wikipedia
fiwiki fi.wikipedia.org wikipedia fi fi.m.wikipedia.org Finnish open public public Finnish Wikipedia
fiwikibooks fi.wikibooks.org wikibooks fi fi.m.wikibooks.org Finnish open public public Finnish Wikibooks
fiwikimedia fi.wikimedia.org fiwikimedia en fi.m.wikimedia.org English open public public Wikimedia Finland
Expand Down Expand Up @@ -499,7 +499,7 @@ kywikibooks ky.wikibooks.org wikibooks ky ky.m.wikibooks.org Kyrgyz open public
kywikiquote ky.wikiquote.org wikiquote ky ky.m.wikiquote.org Kyrgyz open public public Kyrgyz Wikiquote
kywiktionary ky.wiktionary.org wiktionary ky ky.m.wiktionary.org Kyrgyz open public public Kyrgyz Wiktionary
labswiki wikitech.wikimedia.org labs en wikitech.wikimedia.org English open public public Wikitech
labtestwiki labtestwikitech.wikimedia.org labtest en labtestwikitech.m.wikimedia.org English open public public Test Wikitech
labtestwiki labtestwikitech.wikimedia.org labtest en labtestwikitech.wikimedia.org English open public public Test Wikitech
ladwiki lad.wikipedia.org wikipedia lad lad.m.wikipedia.org Ladino open public public Ladino Wikipedia
lawiki la.wikipedia.org wikipedia la la.m.wikipedia.org Latin open public public Latin Wikipedia
lawikibooks la.wikibooks.org wikibooks la la.m.wikibooks.org Latin open public public Latin Wikibooks
Expand Down Expand Up @@ -716,8 +716,8 @@ rmwiktionary rm.wiktionary.org wiktionary rm rm.m.wiktionary.org Romansh closed
rmywiki rmy.wikipedia.org wikipedia rmy rmy.m.wikipedia.org Vlax Romani open public public Romani Wikipedia
rnwiki rn.wikipedia.org wikipedia rn rn.m.wikipedia.org Rundi open public public Rundi Wikipedia
rnwiktionary rn.wiktionary.org wiktionary rn rn.m.wiktionary.org Rundi closed public public Rundi Wiktionary
roa_rupwiki roa-rup.wikipedia.org wikipedia roa-rup roa-rup.m.wikipedia.org Aromanian open public public Aromanian Wikipedia
roa_rupwiktionary roa-rup.wiktionary.org wiktionary roa-rup roa-rup.m.wiktionary.org Aromanian open public public Aromanian Wiktionary
roa_rupwiki roa-rup.wikipedia.org wikipedia rup roa-rup.m.wikipedia.org Aromanian open public public Aromanian Wikipedia
roa_rupwiktionary roa-rup.wiktionary.org wiktionary rup roa-rup.m.wiktionary.org Aromanian open public public Aromanian Wiktionary
roa_tarawiki roa-tara.wikipedia.org wikipedia roa-tara roa-tara.m.wikipedia.org Tarantino open public public Tarandíne Wikipedia
romdwikimedia romd.wikimedia.org romdwikimedia en romd.m.wikimedia.org English open public private Wikimedians of Romania and Moldova User Group
rowiki ro.wikipedia.org wikipedia ro ro.m.wikipedia.org Romanian open public public Romanian Wikipedia
Expand Down Expand Up @@ -879,6 +879,7 @@ tlwiki tl.wikipedia.org wikipedia tl tl.m.wikipedia.org Tagalog open public publ
tlwikibooks tl.wikibooks.org wikibooks tl tl.m.wikibooks.org Tagalog open public public Tagalog Wikibooks
tlwikiquote tl.wikiquote.org wikiquote tl tl.m.wikiquote.org Tagalog open public public Tagalog Wikiquote
tlwiktionary tl.wiktionary.org wiktionary tl tl.m.wiktionary.org Tagalog open public public Tagalog Wiktionary
tlywiki tly.wikipedia.org wikipedia tly tly.m.wikipedia.org Talysh open public public Talysh Wikipedia
tnwiki tn.wikipedia.org wikipedia tn tn.m.wikipedia.org Tswana open public public Tswana Wikipedia
tnwiktionary tn.wiktionary.org wiktionary tn tn.m.wiktionary.org Tswana open public public Tswana Wiktionary
towiki to.wikipedia.org wikipedia to to.m.wikipedia.org Tongan open public public Tongan Wikipedia
Expand Down Expand Up @@ -992,13 +993,13 @@ zawikibooks za.wikibooks.org wikibooks za za.m.wikibooks.org Zhuang closed publi
zawikiquote za.wikiquote.org wikiquote za za.m.wikiquote.org Zhuang closed public public Zhuang Wikiquote
zawiktionary za.wiktionary.org wiktionary za za.m.wiktionary.org Zhuang closed public public Zhuang Wiktionary
zeawiki zea.wikipedia.org wikipedia zea zea.m.wikipedia.org Zeelandic open public public Zeelandic Wikipedia
zh_classicalwiki zh-classical.wikipedia.org wikipedia zh-classical zh-classical.m.wikipedia.org Classical Chinese open public public Classical Chinese Wikipedia
zh_min_nanwiki zh-min-nan.wikipedia.org wikipedia zh-min-nan zh-min-nan.m.wikipedia.org Chinese (Min Nan) open public public Min Nan Wikipedia
zh_min_nanwikibooks zh-min-nan.wikibooks.org wikibooks zh-min-nan zh-min-nan.m.wikibooks.org Chinese (Min Nan) closed public public Min Nan Wikibooks
zh_min_nanwikiquote zh-min-nan.wikiquote.org wikiquote zh-min-nan zh-min-nan.m.wikiquote.org Chinese (Min Nan) closed public public Min Nan Wikiquote
zh_min_nanwikisource zh-min-nan.wikisource.org wikisource zh-min-nan zh-min-nan.m.wikisource.org Chinese (Min Nan) open public public Min Nan Wikisource
zh_min_nanwiktionary zh-min-nan.wiktionary.org wiktionary zh-min-nan zh-min-nan.m.wiktionary.org Chinese (Min Nan) open public public Min Nan Wiktionary
zh_yuewiki zh-yue.wikipedia.org wikipedia zh-yue zh-yue.m.wikipedia.org Cantonese open public public Cantonese Wikipedia
zh_classicalwiki zh-classical.wikipedia.org wikipedia lzh zh-classical.m.wikipedia.org Literary Chinese open public public Classical Chinese Wikipedia
zh_min_nanwiki zh-min-nan.wikipedia.org wikipedia nan zh-min-nan.m.wikipedia.org Min Nan Chinese open public public Min Nan Wikipedia
zh_min_nanwikibooks zh-min-nan.wikibooks.org wikibooks nan zh-min-nan.m.wikibooks.org Min Nan Chinese closed public public Min Nan Wikibooks
zh_min_nanwikiquote zh-min-nan.wikiquote.org wikiquote nan zh-min-nan.m.wikiquote.org Min Nan Chinese closed public public Min Nan Wikiquote
zh_min_nanwikisource zh-min-nan.wikisource.org wikisource nan zh-min-nan.m.wikisource.org Min Nan Chinese open public public Min Nan Wikisource
zh_min_nanwiktionary zh-min-nan.wiktionary.org wiktionary nan zh-min-nan.m.wiktionary.org Min Nan Chinese open public public Min Nan Wiktionary
zh_yuewiki zh-yue.wikipedia.org wikipedia yue zh-yue.m.wikipedia.org Cantonese open public public Cantonese Wikipedia
zhwiki zh.wikipedia.org wikipedia zh zh.m.wikipedia.org Chinese open public public Chinese Wikipedia
zhwikibooks zh.wikibooks.org wikibooks zh zh.m.wikibooks.org Chinese open public public Chinese Wikibooks
zhwikinews zh.wikinews.org wikinews zh zh.m.wikinews.org Chinese open public public Chinese Wikinews
Expand Down

0 comments on commit d055204

Please sign in to comment.