Skip to content

Commit

Permalink
replace '_' and ' ' when matching wikipedia article names
Browse files Browse the repository at this point in the history
  • Loading branch information
twain47 committed Sep 9, 2013
1 parent 28a5ef8 commit dab59d5
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion wikidata/import.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,9 @@ $PSQL -c "alter table entity add column description_en text"
$PSQL -c "update entity set description_en = description from entity_description where entity.entity_id = entity_description.entity_id and language = 'en'"

cat totals.txt | $PSQL -c "COPY import_link_hit from STDIN WITH CSV DELIMITER ' '"
$PSQL -c "insert into link_hit select target||'wiki', catch_decode_url_part(value), sum(hits) from import_link_hit group by target||'wiki', catch_decode_url_part(value)"
$PSQL -c "truncate link_hit"
$PSQL -c "insert into link_hit select target||'wiki', replace(catch_decode_url_part(value), '_', ' '), sum(hits) from import_link_hit where replace(catch_decode_url_part(value), '_', ' ') is not null group by target||'wiki', replace(dcatch_decode_url_part(value), '_', ' ')"
$PSQL -c "truncate entity_link_hit"
$PSQL -c "insert into entity_link_hit select entity_id, target, value, coalesce(hits,0) from entity_link left outer join link_hit using (target, value)"
$PSQL -c "create table entity_hit as select entity_id,sum(hits) as hits from entity_link_hit group by entity_id"
$PSQL -c "create unique index idx_entity_hit on entity_hit using btree (entity_id)"

0 comments on commit dab59d5

Please sign in to comment.