diff --git a/htroot/CrawlMonitorRemoteStart.java b/htroot/CrawlMonitorRemoteStart.java index 8accaf87fd..906de0ea5a 100644 --- a/htroot/CrawlMonitorRemoteStart.java +++ b/htroot/CrawlMonitorRemoteStart.java @@ -61,8 +61,8 @@ record = recordIterator.next(); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", dark ? "1" : "0"); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created().toString()); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL")); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention")); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", ("true".equals(record.attributes().get("crawlingQ"))) ? "1" : "0"); showedCrawl++; @@ -88,8 +88,8 @@ record = recordIterator.next(); prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", dark ? "1" : "0"); prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created().toString()); prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_peername", peername); - prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); + prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL")); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention")); prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", ("true".equals(record.attributes().get("crawlingQ"))) ? "1" : "0"); showedCrawl++; diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 4f440a0acb..c964596e8b 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -594,6 +594,9 @@ public static serverObjects respond(final RequestHeader header, final serverObje m.remove("generalFilter"); m.remove("specificFilter"); m.put("intention", post.get("intention", "").replace(',', '/')); + if (successurls.size() > 0) { // just include at least one of the startURL's in case of multiple for the news service + m.put("startURL", successurls.iterator().next().toNormalform(true)); + } sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_CRAWL_START, m); } } else { diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index f00bba3756..fd8e604e11 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -60,9 +60,7 @@ import net.yacy.data.DidYouMean; import net.yacy.data.UserDB; import net.yacy.data.ymark.YMarkTables; -import net.yacy.document.Document; import net.yacy.document.LibraryProvider; -import net.yacy.document.Parser; import net.yacy.document.Tokenizer; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.util.Bitfield; @@ -73,7 +71,6 @@ import net.yacy.peers.EventChannel; import net.yacy.peers.NewsPool; import net.yacy.peers.graphics.ProfilingGraph; -import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; @@ -195,6 +192,7 @@ public static serverObjects respond( prop.put("geoinfo", "0"); prop.put("rss_queryenc", ""); prop.put("meanCount", 5); + prop.put("eventID",""); // mandatory parameter for yacysearchtrailer/yacysearchitem includes return prop; } diff --git a/libbuild/GitRevMavenTask/src/GitRevMavenTask.java b/libbuild/GitRevMavenTask/src/GitRevMavenTask.java index ebee6b1dba..0775905e45 100644 --- a/libbuild/GitRevMavenTask/src/GitRevMavenTask.java +++ b/libbuild/GitRevMavenTask/src/GitRevMavenTask.java @@ -87,7 +87,7 @@ public void execute() throws MojoExecutionException { break; } } - if (lastTag != null || distance++ > 999) { + if (lastTag != null || distance++ > 90999) { break; } } diff --git a/libbuild/GitRevTask/GitRevTask.java b/libbuild/GitRevTask/GitRevTask.java index 959e0b50fc..8834a87bf3 100644 --- a/libbuild/GitRevTask/GitRevTask.java +++ b/libbuild/GitRevTask/GitRevTask.java @@ -79,7 +79,7 @@ public void execute() { break; } } - if (lastTag != null || distance++ > 999) break; + if (lastTag != null || distance++ > 90999) break; } walk.dispose(); if (lastTag == null) { diff --git a/locales/cn.lng b/locales/cn.lng index 98b01c6fef..a200bd6e20 100644 --- a/locales/cn.lng +++ b/locales/cn.lng @@ -1069,7 +1069,7 @@ Network Scanner==网络扫描器 YaCy can scan a network segment for available http, ftp and smb server.==YaCy可扫描http, ftp 和smb服务器. You must first select a IP range and then, after this range is scanned,==须先指定IP范围, 再进行扫描, it is possible to select servers that had been found for a full-site crawl.==才有可能选择主机并将其作为全站crawl的服务器. -No servers had been detected in the given IP range #[iprange]#. +#No servers had been detected in the given IP range== Please enter a different IP range for another scan.==未检测到可用服务器, 请重新指定IP范围. Please wait...==请稍候... >Scan the network<==>扫描网络< @@ -2952,7 +2952,6 @@ New Password is empty.==新密码为空. #File: ViewFile.html #--------------------------- -YaCy '#[clientname]#': View URL Content==YaCy '#[clientname]#': 查看文件内容 View URL Content==查看链接内容 >Get URL Viewer<==>获取链接浏览器< >URL Metadata<==>链接元数据< diff --git a/locales/de.lng b/locales/de.lng index f03046679e..8de7210a6d 100644 --- a/locales/de.lng +++ b/locales/de.lng @@ -1334,7 +1334,7 @@ Network Scanner==Netzwerk Scanner YaCy can scan a network segment for available http, ftp and smb server.==YaCy kann ein Netzwerksegment auf verfügbare HTTP, FTP und SMB Server hin absuchen. You must first select a IP range and then, after this range is scanned,==Sie müssen zuerst einen IP Bereich festlegen und dann, nachdem dieser Bereich gescannt wurde, it is possible to select servers that had been found for a full-site crawl.==ist es möglich einen gefunden Server für eine volle Seiten Suche crawlen zu lassen. -No servers had been detected in the given IP range #[iprange]#. +No servers had been detected in the given IP range==Es wurde kein Server im angegebenen IP Bereich gefunden Please enter a different IP range for another scan.==Bitte geben Sie einen anderen IP Bereich ein für einen weiteren Scan. Please wait...==Bitte warten... >Scan the network<==>Das Netzwerk Scannen< @@ -3147,7 +3147,7 @@ For community support, please visit our==Für Unterstützung aus der Community, #File: Status_p.inc #--------------------------- -#System Status==System Status +System Status==Systemstatus Unknown==unbekannt YaCy version:==YaCy Version: Uptime:==Online seit: @@ -3493,7 +3493,6 @@ New Password is empty.==Das neue Passwort ist leer. #File: ViewFile.html #--------------------------- -YaCy '#[clientname]#': View URL Content==YaCy '#[clientname]#': Zeige URL Inhalte View URL Content==Zeige URL Inhalte >Get URL Viewer<==>URL Betrachter< "Show Metadata"=="Metadaten anzeigen" diff --git a/locales/gr.lng b/locales/gr.lng index fa73b2581e..bccae501d2 100644 --- a/locales/gr.lng +++ b/locales/gr.lng @@ -101,7 +101,7 @@ You do not need to provide any personal data here, but if you want to distribute #Yahoo!==Yahoo! #MSN=MSN Comment==Σχόλιο -"Save"==Αποθήκευση +"Save"=="Αποθήκευση" #----------------------------- #File: Connections_p.html diff --git a/locales/hi.lng b/locales/hi.lng index 5c69825cea..3f2f7774eb 100644 --- a/locales/hi.lng +++ b/locales/hi.lng @@ -1077,7 +1077,7 @@ Network Scanner==नेटवर्क स्कैनर YaCy can scan a network segment for available http, ftp and smb server.==YaCy उपलब्ध HTTP, FTP और किसी सर्वर के लिए एक नेटवर्क खंड स्कैन कर सकते हैं. You must first select a IP range and then, after this range is scanned,==इस श्रृंखला स्कैन के बाद आप पहली बार, तो एक आईपी श्रेणी का चयन करना चाहिए it is possible to select servers that had been found for a full-site crawl.==यह एक पूरी साइट क्रॉल के लिए पाया गया था कि सर्वर का चयन करने के लिए संभव है. -No servers had been detected in the given IP range #[iprange]#. +#No servers had been detected in the given IP range== Please enter a different IP range for another scan.==एक और स्कैन के लिए एक अलग आईपी रेंज दर्ज करें. Please wait...==कृपया प्रतीक्षा करें ... >Scan the network<==>नेटवर्क स्कैन< diff --git a/locales/it.lng b/locales/it.lng index aa0777d405..4fb128405e 100644 --- a/locales/it.lng +++ b/locales/it.lng @@ -97,7 +97,7 @@ You do not need to provide any personal data here, but if you want to distribute #Yahoo!==Yahoo! #MSN=MSN Comment==Commento -"Save"==Salva +"Save"=="Salva" #----------------------------- #File: Connections_p.html diff --git a/locales/master.lng.xlf b/locales/master.lng.xlf index 2d2e5b2ce5..294c984de8 100644 --- a/locales/master.lng.xlf +++ b/locales/master.lng.xlf @@ -1587,9 +1587,6 @@ "Submit" - - http://www.iana.org/assignments/media-types/</a> - @@ -2117,6 +2114,9 @@ Release will be installed. Please wait. + + You installed YaCy with a package manager. + To update YaCy, use the package manager: @@ -2192,9 +2192,6 @@ Last Deploy - - You installed YaCy with a package manager. - @@ -3297,8 +3294,8 @@ it is possible to select servers that had been found for a full-site crawl. - - No servers had been detected in the given IP range #[iprange]#. + + No servers had been detected in the given IP range Please enter a different IP range for another scan. @@ -5518,12 +5515,9 @@ - - YaCy '#[clientname]#': YaCy Search Network - - - YaCy Search Network '#[networkName]#' - + + YaCy Search Network + YaCy Network< @@ -5815,9 +5809,6 @@ >DHT-in< - - YaCy Search Network - Count of Connected Senior Peers @@ -6752,10 +6743,19 @@ "Set Boost Query" - + field not in local index (boost has no effect) + + You can boost with vocabularies, use the field + + + with values + + + You can also boost on logarithmic occurrence counters of the fields + "Set Field Boosts" @@ -8658,9 +8658,6 @@ See the page info about the url. - - YaCy '#[clientname]#': View URL Content - View URL Content diff --git a/locales/ru.lng b/locales/ru.lng index 05bbf3e7b3..48189adf48 100644 --- a/locales/ru.lng +++ b/locales/ru.lng @@ -1131,7 +1131,6 @@ List of possible crawl start URLs==Список ссылок для провер #File: CrawlProfileEditor_p.html #--------------------------- Crawl Profile Editor==Изменение профиля индексирования ->Crawl Profile Editor<==>Изменение профиля индексирования< >Crawler Steering<==>Управление индексатором< >Crawl Scheduler<==>Планировщик индексирования< >Scheduled Crawls can be modified in this table<==>Запланированное индексирование можно изменить в этой таблице< @@ -1461,7 +1460,7 @@ Network Scanner==Сканер сети YaCy can scan a network segment for available http, ftp and smb server.==YaCy может сканировать такие сегменты сети как http-, ftp- и smb-серверы . You must first select a IP range and then, after this range is scanned,==Сначала вы должны выбрать диапазон IP-адресов, а затем диапазон сканирования. it is possible to select servers that had been found for a full-site crawl.==После этого можно выбрать серверы для полного индексирования сайта. -No servers had been detected in the given IP range #[iprange]#.==Серверы не обнаружены в заданном диапазоне IP-адресов. +No servers had been detected in the given IP range==Серверы не обнаружены в заданном диапазоне IP-адресов Please enter a different IP range for another scan.==Пожалуйста, введите другой диапазон IP-адресов, для повторного сканирования. Please wait...==Пожалуйста, подождите... >Scan the network<==>Сканирование сети< @@ -2383,8 +2382,7 @@ The target peer is alive but did not receive your message. Sorry.==Узел по #File: Network.html #--------------------------- -YaCy '#[clientname]#': YaCy Search Network==YaCy '#[clientname]#': Мониторинг сети -YaCy Search Network '#[networkName]#'==Мониторинг сети YaCy +YaCy Search Network==Мониторинг сети YaCy YaCy Network<==Сеть YaCy< The information that is presented on this page can also be retrieved as XML.==Информация, указанная на этой странице, также может быть получена как XML. Click the API icon to see the XML.==Нажмите на иконку API, чтобы увидеть XML. @@ -2876,7 +2874,6 @@ field not in local index (boost has no effect)==поля нет в локаль #File: RegexTest.html #--------------------------- -RegexTest==Тест регулярного выражения Regex Test==Тест регулярного выражения Test String==Тест строки Regular Expression==Регулярное выражение @@ -3767,7 +3764,6 @@ Parsed Sentences==Разобранные предложения Parsed Tokens/Words==Разобранные маркеры/слова Link List==Список ссылок Citation Report==Отчет цитирования ->CitationReport<==>Отчет цитирования< "Show"=="Показать" Unable to find URL Entry in DB==Невозможно найти запись ссылки в базе данных. Invalid URL==Неправильный URL-адрес @@ -3938,14 +3934,13 @@ Title==Заголовок #File: WatchWebStructure_p.html #--------------------------- -Web Structure<==Вэб-структура< +Web Structure==Вэб-структура The data that is visualized here can also be retrieved in a XML file, which lists the reference relation between the domains.==Эти данные, также могут быть получены в виде XML-файла с перекрёстными ссылками между доменами. With a GET-property 'about' you get only reference relations about the host that you give in the argument field for 'about'.==Указав параметр "GET" 'about' вы получите только перекрёстные ссылки о хосте, которые указан в поле 'about'. With a GET-property 'latest' you get a list of references that had been computed during the current run-time of YaCy, and with each next call only an update to the next list of references.==Указав параметр GET" 'latest' вы получите список ссылок вычисленных во время текущей работы YaCy, обновляющийся при каждом следующем вызове. Click the API icon to see the XML file.==Нажмите на иконку API для просмотра XML-файла. To see a list of all APIs, please visit the==Для просмотра списка всех API, пожалуйста, посетите API wiki page==страницу API Wiki -Web Structure==Вэб-структура >Host List<==>Список хостов< >#[count]# outlinks==>#[count]# внешних ссылок host<==Хост< diff --git a/locales/uk.lng b/locales/uk.lng index 15f99fdaef..7ab57e9a09 100644 --- a/locales/uk.lng +++ b/locales/uk.lng @@ -427,7 +427,7 @@ You can also use your peer without opening it, but this is not recomended.==Ви #File: ConfigHeuristics_p.html #--------------------------- Heuristics Configuration==Настройки евристики -A heuristic is an 'experience-based technique that help in problem solving, learning and discovery' (wikipedia).==Heuristik 'bezeichnet die Kunst, mit begrenztem Wissen und wenig Zeit zu guten Lösungen zu kommen.' (Wikipedia). +#A heuristic is an 'experience-based technique that help in problem solving, learning and discovery' (wikipedia).==Heuristik 'bezeichnet die Kunst, mit begrenztem Wissen und wenig Zeit zu guten Lösungen zu kommen.' (Wikipedia). The search heuristics that can be switched on here are techniques that help the discovery of possible search results based on link guessing, in-search crawling and requests to other search engines.==Пошукова евристика може бути використовувати методи, які допомагають виявити можливі результати пошуку з використанням запитів по посиланнях, вбудованого сканування та запитів до інших пошукових систем. When a search heuristic is used, the resulting links are not used directly as search result but the loaded pages are indexed and stored like other content.==При використанні пошукової евристики знайдені посилання не відображаються як пошукові результати, а індексуються та зберігаються разом з іншим вмістом. This ensures that blacklists can be used and that the searched word actually appears on the page that was discovered by the heuristic.==Це гарантує, що чорні списки можуть бути використані, і що пошукові терміни з’являються дійсно на сторінках, які були знайдені за допомогою евристики. @@ -1993,8 +1993,7 @@ You cannot call this page directly. Instead, use a link on the setup the proxy befor #File: QuickCrawlLink_p.html #--------------------------- -Quick Crawl Link==Schnell Crawl Link -Quickly adding Bookmarks:==Schnell Crawl Lesezeichen: -Simply drag and drop the link shown below to your Browsers Toolbar/Link-Bar.==Ziehen Sie einfach den unten stehenden Link auf Ihre Browser Toolbar/Linkbar. -If you click on it while browsing, the currently viewed website will be inserted into the YaCy crawling queue for indexing.==Wenn Sie, während Sie surfen, auf dieses Lesezeichen klicken, wird die gerade betrachtete Seite zum YaCy Crawler-Puffer hinzugefügt, um indexiert zu werden. -Crawl with YaCy==Mit YaCy crawlen -Title:==Titel: -Link:==link: -Status:==Status: -URL successfully added to Crawler Queue==Die Url wurde erfolgreich zum Crawler-Puffer hinzugefügt. -Malformed URL==Fehler in der URL -Unable to create new crawling profile for URL:==Es ist nicht möglich für diese URL ein Crawling Profil zu erstellen: -Unable to add URL to crawler queue:==Es ist nicht möglich die URL zum Crawler-Puffer hinzuzufügen: +Quick Crawl Link==Швидке сканування посилання +#Quickly adding Bookmarks:==Schnell Crawl Lesezeichen: +#Simply drag and drop the link shown below to your Browsers Toolbar/Link-Bar.==Ziehen Sie einfach den unten stehenden Link auf Ihre Browser Toolbar/Linkbar. +#If you click on it while browsing, the currently viewed website will be inserted into the YaCy crawling queue for indexing.==Wenn Sie, während Sie surfen, auf dieses Lesezeichen klicken, wird die gerade betrachtete Seite zum YaCy Crawler-Puffer hinzugefügt, um indexiert zu werden. +#Crawl with YaCy==Mit YaCy crawlen +#Title:==Titel: +#Link:==link: +#Status:==Status: +#URL successfully added to Crawler Queue==Die Url wurde erfolgreich zum Crawler-Puffer hinzugefügt. +#Malformed URL==Fehler in der URL +#Unable to create new crawling profile for URL:==Es ist nicht möglich für diese URL ein Crawling Profil zu erstellen: +#Unable to add URL to crawler queue:==Es ist nicht möglich die URL zum Crawler-Puffer hinzuzufügen: #----------------------------- #File: Ranking_p.html @@ -2941,7 +2940,7 @@ Go back to the Settings page==Назад до ст Your system is not protected by a password==Ваша система не захищена паролем Please go to the User Administration page and set an administration password.==Будь-ласка, перейдіть на сторінку керування користувачами і виставте основний пароль. You don't have the correct access right to perform this task.==У вас немає дозволу на запуск цього додатка. -Please log in.==Bitte melden Sie sich an. +#Please log in.==Bitte melden Sie sich an. You can now go back to the Settings page if you want to make more changes.==Якщо хочете зробити інші зміни, можна перейти назад на сторінку налаштувань. See you soon!==До зустрічі! Just a moment, please!==Зачекайте трохи, будь ласка! diff --git a/pom.xml b/pom.xml index 1f626a6b2d..40e4e71e43 100644 --- a/pom.xml +++ b/pom.xml @@ -151,7 +151,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.10.3 + 2.10.4 javadoc true @@ -248,7 +248,7 @@ maven-assembly-plugin - 2.5.3 + 2.6 assembly.xml @@ -266,6 +266,16 @@ + + org.apache.maven.plugins + maven-source-plugin + 3.0.0 + + + org.apache.maven.plugins + maven-deploy-plugin + 2.8.2 + diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java index f6e5c16199..ac2a6244c5 100644 --- a/source/net/yacy/crawler/CrawlSwitchboard.java +++ b/source/net/yacy/crawler/CrawlSwitchboard.java @@ -59,8 +59,8 @@ public final class CrawlSwitchboard { - public static final String CRAWL_PROFILE_AUTOCRAWL_DEEP = "autocrawlDeep"; - public static final String CRAWL_PROFILE_AUTOCRAWL_SHALLOW = "autocrawlShallow"; + public static final String CRAWL_PROFILE_AUTOCRAWL_DEEP = "autocrawlDeep"; + public static final String CRAWL_PROFILE_AUTOCRAWL_SHALLOW = "autocrawlShallow"; public static final String CRAWL_PROFILE_PROXY = "proxy"; public static final String CRAWL_PROFILE_REMOTE = "remote"; public static final String CRAWL_PROFILE_SNIPPET_LOCAL_TEXT = "snippetLocalText"; @@ -107,18 +107,12 @@ public final class CrawlSwitchboard { private final File queuesRoot; private Switchboard switchboard; - public CrawlSwitchboard(final String networkName, Switchboard switchboard) { + public CrawlSwitchboard(Switchboard switchboard) { this.switchboard = switchboard; this.log = this.switchboard.log; this.queuesRoot = this.switchboard.queuesRoot; this.defaultPushProfiles = new ConcurrentHashMap<>(); - this.log.info("Initializing Word Index for the network '" + networkName + "'."); - - if ( networkName == null || networkName.isEmpty() ) { - log.severe("no network name given - shutting down"); - System.exit(0); - } this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap(Base64Order.enhancedCoder)); this.profilesActiveCrawlsCounter = new ConcurrentHashMap(); diff --git a/source/net/yacy/data/wiki/WikiCode.java b/source/net/yacy/data/wiki/WikiCode.java index 7ca0130743..f280722134 100644 --- a/source/net/yacy/data/wiki/WikiCode.java +++ b/source/net/yacy/data/wiki/WikiCode.java @@ -937,7 +937,17 @@ private String tagReplace(final String input, final Tags tags) { //extra treatment for headlines if (Arrays.binarySearch(HEADLINE_TAGS, tags.openWiki) >= 0) { - processHeadline(stringBuilder, firstPosition, tags, secondPosition, direlem); + // require line starts with headline markup (hdr e.g. " == Title == " but not "Seven = six plus one" ) + int i = 0; + boolean beginsWith = true; + while (i < firstPosition) { + if (stringBuilder.charAt(i) > ' ') { + beginsWith = false; + break; + } + i++; + } + if (beginsWith) processHeadline(stringBuilder, firstPosition, tags, secondPosition, direlem); } else { final int oldLength = stringBuilder.length(); stringBuilder.replace(firstPosition, firstPosition + tags.openWikiLength, tags.openHTML); diff --git a/source/net/yacy/kelondro/util/FileUtils.java b/source/net/yacy/kelondro/util/FileUtils.java index e3f748c65b..c9e8564fb7 100644 --- a/source/net/yacy/kelondro/util/FileUtils.java +++ b/source/net/yacy/kelondro/util/FileUtils.java @@ -517,6 +517,7 @@ public static Iterator strings(final byte[] a) { /** * Read lines of a file into an ArrayList. + * Empty lines in the file are ignored. * * @param listFile the file * @return the resulting array as an ArrayList @@ -529,7 +530,7 @@ public static ArrayList getListArray(final File listFile) { br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile), StandardCharsets.UTF_8)); while ( (line = br.readLine()) != null ) { - list.add(line); + if (!line.isEmpty()) list.add(line); } br.close(); } catch (final IOException e ) { @@ -576,6 +577,7 @@ private static boolean writeList(final File listFile, final String out) { /** * Read lines of a text file into a String, optionally ignoring comments. + * Empty lines are always ignored. * * @param listFile the File to read from. * @param withcomments If false ignore lines starting with '#'. diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index bb5ea5954a..1acfef42a2 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -1147,25 +1147,25 @@ public void run() { return 0; } - List container = new ArrayList(); + List resultContainer = new ArrayList(); Network.log.info("SEARCH (solr), returned " + docList[0].size() + " out of " + docList[0].getNumFound() + " documents and " + facets.size() + " facets " + facets.keySet().toString() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); int term = count; Collection docs; if (event.addResultsToLocalIndex) { // only needed to store remote results docs = new ArrayList(docList[0].size()); } else docs = null; - for (final SolrDocument doc: docList[0]) { + for (final SolrDocument tmpdoc: docList[0]) { //System.out.println("***DEBUG*** " + ((String) doc.getFieldValue("sku"))); if ( term-- <= 0 ) { break; // do not process more that requested (in case that evil peers fill us up with rubbish) } // get one single search result - if ( doc == null ) { + if ( tmpdoc == null ) { continue; } URIMetadataNode urlEntry; try { - urlEntry = new URIMetadataNode(doc); + urlEntry = new URIMetadataNode(tmpdoc); } catch (MalformedURLException ex) { continue; } @@ -1198,73 +1198,61 @@ public void run() { // put the remote documents to the local index. We must convert the solr document to a solr input document: if (event.addResultsToLocalIndex) { - /* Check document size, only if a limit is set on remote documents size allowed to be stored to local index */ - if(checkDocumentSize(doc, event.getRemoteDocStoredMaxSize() * 1024)) { - final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(doc); - - // the input document stays untouched because it contains top-level cloned objects - docs.add(sid); - // will be stored to index, and is a full solr document, can be added to firstseen - event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis())); - } else { - Network.log.info("Document size greater than " + event.getRemoteDocStoredMaxSize() + " kbytes, excludes it from being stored to local index. Url : " + urlEntry.urlstring()); - } + /* Check document size, only if a limit is set on remote documents size allowed to be stored to local index */ + if (checkDocumentSize(tmpdoc, event.getRemoteDocStoredMaxSize() * 1024)) { + final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(tmpdoc); + + // the input document stays untouched because it contains top-level cloned objects + docs.add(sid); + // will be stored to index, and is a full solr document, can be added to firstseen + event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis())); + } else { + Network.log.info("Document size greater than " + event.getRemoteDocStoredMaxSize() + " kbytes, excludes it from being stored to local index. Url : " + urlEntry.urlstring()); + } } // after this conversion we can remove the largest and not used field text_t and synonyms_sxt from the document // because that goes into a search cache and would take a lot of memory in the search cache //doc.removeFields(CollectionSchema.text_t.getSolrFieldName()); - doc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName()); - + tmpdoc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName()); + ResultURLs.stack( - ASCII.String(urlEntry.url().hash()), - urlEntry.url().getHost(), - event.peers.mySeed().hash.getBytes(), - UTF8.getBytes(target.hash), - EventOrigin.QUERIES); + ASCII.String(urlEntry.url().hash()), + urlEntry.url().getHost(), + event.peers.mySeed().hash.getBytes(), + UTF8.getBytes(target.hash), + EventOrigin.QUERIES); } - // add the url entry to the word indexes - container.add(urlEntry); + // add the url entry to the checked results + resultContainer.add(urlEntry); } - final int dls = docList[0].size(); final int numFound = (int) docList[0].getNumFound(); docList[0].clear(); docList[0] = null; if (localsearch) { - event.addNodes(container, facets, snippets, true, "localpeer", numFound); + event.addNodes(resultContainer, facets, snippets, true, "localpeer", numFound); event.addFinalize(); event.addExpectedRemoteReferences(-count); - Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references"); + Network.log.info("local search (solr): localpeer sent " + resultContainer.size() + "/" + numFound + " references"); } else { if (event.addResultsToLocalIndex) { - /* - * Current thread might be interrupted by SearchEvent.cleanup() - */ - if (Thread.interrupted()) { - throw new InterruptedException("solrQuery interrupted"); - } - WriteToLocalIndexThread writeToLocalIndexThread = new WriteToLocalIndexThread(event.query.getSegment(), - docs); - writeToLocalIndexThread.start(); - try { - writeToLocalIndexThread.join(); - } catch (InterruptedException e) { - /* - * Current thread interruption might happen while waiting - * for writeToLocalIndexThread. - */ - writeToLocalIndexThread.stopWriting(); - throw new InterruptedException("solrQuery interrupted"); - } - docs.clear(); + /* + * Current thread might be interrupted by SearchEvent.cleanup() + */ + if (Thread.interrupted()) { + throw new InterruptedException("solrQuery interrupted"); + } + WriteToLocalIndexThread writeToLocalIndexThread = new WriteToLocalIndexThread(event.query.getSegment(), + docs); // will clear docs on return + writeToLocalIndexThread.start(); } - event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound); + event.addNodes(resultContainer, facets, snippets, false, target.getName() + "/" + target.hash, numFound); event.addFinalize(); event.addExpectedRemoteReferences(-count); - Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + numFound + " references"); + Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (resultContainer.size()) + "/" + numFound + " references"); } - return dls; + return resultContainer.size(); } /** @@ -1285,6 +1273,7 @@ private static class WriteToLocalIndexThread extends Thread { /** * Parameters must be not null. + * After writing the collection is cleared * @param segment solr segment to write * @param docs solr documents collection to put to segment */ @@ -1300,17 +1289,19 @@ public void stopWriting() { this.stop.set(true); } - @Override - public void run() { - for (SolrInputDocument doc: docs) { - if(stop.get()) { - Network.log.info("Writing documents collection to Solr segment was stopped."); - return; - } - segment.putDocument(doc); + @Override + public void run() { + for (SolrInputDocument doc : docs) { + if (stop.get()) { + docs.clear(); + Network.log.info("Writing documents collection to Solr segment was stopped."); + return; + } + segment.putDocument(doc); } - } - } + docs.clear(); + } + } /** * Only when maxSize is greater than zero, check that doc size is lower. To diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 174ba7d576..290a321158 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -607,7 +607,7 @@ public void run() { } // create a crawler - this.crawler = new CrawlSwitchboard(networkName, this); + this.crawler = new CrawlSwitchboard(this); // start yacy core this.log.config("Starting YaCy Protocol Core"); @@ -1398,7 +1398,7 @@ public void switchNetwork(final String networkDefinition) throws FileNotFoundExc // create a crawler this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object - this.crawler = new CrawlSwitchboard(networkName, this); + this.crawler = new CrawlSwitchboard(this); // init a DHT transmission dispatcher this.dhtDispatcher = diff --git a/test/java/net/yacy/data/wiki/WikiCodeTest.java b/test/java/net/yacy/data/wiki/WikiCodeTest.java index c4b75a5119..ca110fb9ac 100644 --- a/test/java/net/yacy/data/wiki/WikiCodeTest.java +++ b/test/java/net/yacy/data/wiki/WikiCodeTest.java @@ -1,5 +1,6 @@ package net.yacy.data.wiki; +import java.io.BufferedReader; import org.junit.Test; import static org.junit.Assert.*; @@ -32,4 +33,26 @@ public void testProcessMetadata() { } } -} \ No newline at end of file + /** + * test header wiki markup + */ + @Test + public void testProcessLineOfWikiCode() { + String[] hdrTeststr = new String[]{ // ok test header + "== Header ==", "==Header=="}; + + String[] nohdrTeststr = new String[]{ // wrong test header + "Text of = Header, false = wrong", "One=Two"}; + + WikiCode wc = new WikiCode(); + + for (String s : hdrTeststr) { // test ok header + String erg = wc.transform("8090", s); + assertTrue("

tag expected:"+erg, erg.contains("

")); + } + for (String s : nohdrTeststr) { // test wrong header + String erg = wc.transform("8090", s); + assertFalse("no header tag expected:"+erg, erg.contains("

")); + } + } +}