From e737a9056c84c073885c4c264dee7954c49cc03e Mon Sep 17 00:00:00 2001 From: Zakariyya Mughal Date: Tue, 18 May 2021 14:45:21 -0400 Subject: [PATCH] Get all CPAN distributions using scroll API for MetaCPAN This uses the ElasticSearch scroll API to get all CPAN distributions . Fixes . --- app/models/package_manager/cpan.rb | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/app/models/package_manager/cpan.rb b/app/models/package_manager/cpan.rb index 371b7faf64..6a26867c8c 100644 --- a/app/models/package_manager/cpan.rb +++ b/app/models/package_manager/cpan.rb @@ -13,16 +13,19 @@ def self.package_link(project, _version = nil) end def self.project_names - page = 1 projects = [] + size = 5000 + scroll_start_r = get("https://fastapi.metacpan.org/v1/release/_search?scroll=1m&size=#{size}&q=status:latest&fields=distribution") + projects += scroll_start_r["hits"]["hits"] + scroll_id = scroll_start_r['_scroll_id'] loop do - r = get("https://fastapi.metacpan.org/v1/release/_search?q=status:latest&fields=distribution&sort=date:desc&size=5000&from=#{page * 5000}")["hits"]["hits"] - break if r == [] + r = get("https://fastapi.metacpan.org/v1/_search/scroll?scroll=1m&scroll_id=#{ scroll_id }") + break if r["hits"]["hits"] == [] - projects += r - page += 1 + projects += r["hits"]["hits"] + scroll_id = r['_scroll_id'] end - projects.map { |project| project["fields"]["distribution"] }.uniq + projects.map { |project| project["fields"]["distribution"] }.flatten.uniq end def self.recent_names