Skip to content

Commit

Permalink
move country search term creation into setup script
Browse files Browse the repository at this point in the history
Search results can become odd without the country search
terms, so make their inclusion a mandatory part of the
setup.

Also adds a new configuration variable to restrict the
languages taken into account by Nominatim.
  • Loading branch information
lonvia committed Sep 28, 2016
1 parent 6fd2887 commit 1982978
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 53 deletions.
4 changes: 1 addition & 3 deletions docs/Import_and_update.md
Expand Up @@ -67,10 +67,8 @@ avoid swapping, never give more than 2/3 of RAM to osm2pgsql.
Loading additional datasets
---------------------------

The following commands will create additional entries for countries and POI searches:
The following commands will create additional entries for POI searches:

./utils/specialphrases.php --countries > specialphrases_countries.sql
psql -d nominatim -f specialphrases_countries.sql
./utils/specialphrases.php --wiki-import > specialphrases.sql
psql -d nominatim -f specialphrases.sql

Expand Down
7 changes: 7 additions & 0 deletions settings/defaults.php
Expand Up @@ -11,6 +11,13 @@
@define('CONST_Database_Web_User', 'www-data');
@define('CONST_Max_Word_Frequency', '50000');
@define('CONST_Limit_Reindexing', true);
// Restrict search languages.
// Normally Nominatim will include all language variants of name:XX
// in the search index. Set this to a comma separated list of language
// codes, to restrict import to a subset of languages.
// Currently only affects the import of country names and special phrases.
@define('CONST_Languages', false);

// Set to false to avoid importing extra postcodes for the US.
@define('CONST_Use_Extra_US_Postcodes', true);
/* Set to true after importing Tiger house number data for the US.
Expand Down
36 changes: 0 additions & 36 deletions settings/phrase_settings.php
Expand Up @@ -2,42 +2,6 @@

// These settings control the import of special phrases from the wiki.

// Languages to download the special phrases for.
$aLanguageIn
= array(
'af',
'ar',
'br',
'ca',
'cs',
'de',
'en',
'es',
'et',
'eu',
'fa',
'fi',
'fr',
'gl',
'hr',
'hu',
'ia',
'is',
'it',
'ja',
'mk',
'nl',
'no',
'pl',
'ps',
'pt',
'ru',
'sk',
'sv',
'uk',
'vi',
);

// class/type combinations to exclude
$aTagsBlacklist
= array(
Expand Down
27 changes: 27 additions & 0 deletions utils/setup.php
Expand Up @@ -38,6 +38,7 @@
array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
array('create-country-names', '', 0, 1, 0, 0, 'bool', 'Create default list of searchable country names'),
array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
Expand Down Expand Up @@ -590,6 +591,32 @@
pgsqlRunScript($sTemplate);
}

if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
echo 'Creating search index for default country names';
$bDidSomething = true;

pgsqlRunScript("select getorcreate_country(make_standard_name('uk'), 'gb')");
pgsqlRunScript("select getorcreate_country(make_standard_name('united states'), 'us')");
pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x");
pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(name->'name'), country_code) from country_name where name ? 'name') as x");

$sSQL = 'select count(*) from (select getorcreate_country(make_standard_name(v), country_code) from (select country_code, skeys(name) as k, svals(name) as v from country_name) x where k ';
if (CONST_Languages) {
$sSQL .= 'in ';
$sDelim = '(';
foreach (explode(',', CONST_Languages) as $sLang) {
$sSQL .= $sDelim."'name:$sLang'";
$sDelim = ',';
}
$sSQL .= ')';
} else {
// all include all simple name tags
$sSQL .= "like 'name:%'";
}
$sSQL .= ') v';
pgsqlRunScript($sSQL);
}

if ($aCMDResult['drop']) {
// The implementation is potentially a bit dangerous because it uses
// a positive selection of tables to keep, and deletes everything else.
Expand Down
19 changes: 5 additions & 14 deletions utils/specialphrases.php
Expand Up @@ -12,29 +12,20 @@
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('countries', '', 0, 1, 0, 0, 'bool', 'Create import script for country codes and names'),
array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);

include(CONST_InstallPath.'/settings/phrase_settings.php');


if ($aCMDResult['countries']) {
echo "select getorcreate_country(make_standard_name('uk'), 'gb');\n";
echo "select getorcreate_country(make_standard_name('united states'), 'us');\n";
echo "select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x;\n";

echo "select count(*) from (select getorcreate_country(make_standard_name(get_name_by_language(country_name.name,ARRAY['name'])), country_code) from country_name where get_name_by_language(country_name.name, ARRAY['name']) is not null) as x;\n";
foreach ($aLanguageIn as $sLanguage) {
echo "select count(*) from (select getorcreate_country(make_standard_name(get_name_by_language(country_name.name,ARRAY['name:".$sLanguage."'])), country_code) from country_name where get_name_by_language(country_name.name, ARRAY['name:".$sLanguage."']) is not null) as x;\n";
}
}

if ($aCMDResult['wiki-import']) {
$aPairs = array();

foreach ($aLanguageIn as $sLanguage) {
$sLanguageIn = CONST_Languages ? CONST_Languages :
('af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi');

foreach (explode(',', $sLanguageIn) as $sLanguage) {
$sURL = 'http://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
$sWikiPageXML = file_get_contents($sURL);
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {
Expand Down

0 comments on commit 1982978

Please sign in to comment.