Skip to content

Commit

Permalink
Fixes loklak#1112: Add image, video filter constraints for cache
Browse files Browse the repository at this point in the history
  • Loading branch information
vibhcool committed May 25, 2017
1 parent 047c8f2 commit e290dcc
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 39 deletions.
36 changes: 31 additions & 5 deletions src/org/loklak/api/search/SearchServlet.java
Expand Up @@ -26,6 +26,8 @@
import java.util.Date;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.ArrayList;
import java.util.Arrays;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
Expand Down Expand Up @@ -151,7 +153,9 @@ protected void doGet(final HttpServletRequest request, final HttpServletResponse
DAO.getConfig(SEARCH_MAX_PUBLIC_COUNT_NAME, 100)));

String filter = post.get("filter", post.get("filter", ""));

filter.replaceAll("\\s","");
ArrayList<String> filterList = new ArrayList<String>(Arrays.asList(filter.split(",")));

// create tweet timeline
final String ordername = post.get("order", Timeline.Order.CREATED_AT.getMessageFieldName());
final Timeline.Order order = Timeline.parseOrder(ordername);
Expand Down Expand Up @@ -188,7 +192,7 @@ protected void doGet(final HttpServletRequest request, final HttpServletResponse
public void run() {
final String scraper_query = tokens.translate4scraper();
DAO.log(request.getServletPath() + " scraping with query: " + scraper_query);
Timeline twitterTl = DAO.scrapeTwitter(post, filter, scraper_query, order, timezoneOffsetf, true, timeout, true);
Timeline twitterTl = DAO.scrapeTwitter(post, filterList, scraper_query, order, timezoneOffsetf, true, timeout, true);
count_twitter_new.set(twitterTl.size());
tl.putAll(QueryEntry.applyConstraint(twitterTl, tokens, false)); // pre-localized results are not filtered with location constraint any more
tl.setScraperInfo(twitterTl.getScraperInfo());
Expand All @@ -200,7 +204,18 @@ public void run() {
// start a local search
Thread localThread = queryf == null || queryf.length() == 0 ? null : new Thread() {
public void run() {
DAO.SearchLocalMessages localSearchResult = new DAO.SearchLocalMessages(queryf, order, timezoneOffsetf, last_cache_search_time.get() > SEARCH_CACHE_THREASHOLD_TIME ? Math.min(maximumRecords, (int) DAO.getConfig(SEARCH_LOW_COUNT_NAME, 10)) : maximumRecords, agregation_limit, fields);

DAO.SearchLocalMessages localSearchResult =
new DAO.SearchLocalMessages(
queryf,
order,
timezoneOffsetf,
last_cache_search_time.get() > SEARCH_CACHE_THREASHOLD_TIME ? Math.min(maximumRecords,
(int) DAO.getConfig(SEARCH_LOW_COUNT_NAME, 10)) : maximumRecords,
agregation_limit,
filterList,
fields
);
long time = System.currentTimeMillis() - start;
last_cache_search_time.set(time);
post.recordEvent("cache_time", time);
Expand Down Expand Up @@ -265,7 +280,7 @@ public void run() {
} else if ("twitter".equals(source) && tokens.raw.length() > 0) {
final String scraper_query = tokens.translate4scraper();
DAO.log(request.getServletPath() + " scraping with query: " + scraper_query);
Timeline twitterTl = DAO.scrapeTwitter(post, filter, scraper_query, order, timezoneOffset, true, timeout, true);
Timeline twitterTl = DAO.scrapeTwitter(post, filterList, scraper_query, order, timezoneOffset, true, timeout, true);

count_twitter_new.set(twitterTl.size());
tl.putAll(QueryEntry.applyConstraint(twitterTl, tokens, false)); // pre-localized results are not filtered with location constraint any more
Expand All @@ -274,7 +289,18 @@ public void run() {
// in this case we use all tweets, not only the latest one because it may happen that there are no new and that is not what the user expects

} else if ("cache".equals(source)) {
DAO.SearchLocalMessages localSearchResult = new DAO.SearchLocalMessages(query, order, timezoneOffset, last_cache_search_time.get() > SEARCH_CACHE_THREASHOLD_TIME ? Math.min(maximumRecords, (int) DAO.getConfig(SEARCH_LOW_COUNT_NAME, 10)) : maximumRecords, agregation_limit, fields);
DAO.SearchLocalMessages localSearchResult =
new DAO.SearchLocalMessages(
query,
order,
timezoneOffset,
last_cache_search_time.get() > SEARCH_CACHE_THREASHOLD_TIME ?
Math.min(maximumRecords,(int) DAO.getConfig(SEARCH_LOW_COUNT_NAME, 10))
: maximumRecords,
agregation_limit,
filterList,
fields
);
cache_hits.set(localSearchResult.timeline.getHits());
tl.putAll(localSearchResult.timeline);
tl.setResultIndex(localSearchResult.timeline.getResultIndex());
Expand Down
40 changes: 34 additions & 6 deletions src/org/loklak/data/DAO.java
Expand Up @@ -933,10 +933,19 @@ public static class SearchLocalMessages {
* @param resultCount - the number of messages in the result; can be zero if only aggregations are wanted
* @param aggregationLimit - the maximum count of facet entities, not search results
* @param aggregationFields - names of the aggregation fields. If no aggregation is wanted, pass no (zero) field(s)
* @param filterList - list of filters in String datatype
*/
public SearchLocalMessages(final String q, final Timeline.Order order_field, final int timezoneOffset, final int resultCount, final int aggregationLimit, final String... aggregationFields) {
public SearchLocalMessages (
final String q,
final Timeline.Order order_field,
final int timezoneOffset,
final int resultCount,
final int aggregationLimit,
final ArrayList<String> filterList,
final String... aggregationFields
) {
this.timeline = new Timeline(order_field);
QueryEntry.ElasticsearchQuery sq = new QueryEntry.ElasticsearchQuery(q, timezoneOffset);
QueryEntry.ElasticsearchQuery sq = new QueryEntry.ElasticsearchQuery(q, timezoneOffset, filterList);
long interval = sq.until.getTime() - sq.since.getTime();
IndexName resultIndex;
if (aggregationFields.length > 0 && q.contains("since:")) {
Expand Down Expand Up @@ -978,6 +987,25 @@ public SearchLocalMessages(final String q, final Timeline.Order order_field, fin
}
this.aggregations = query.aggregations;
}

public SearchLocalMessages (
final String q,
final Timeline.Order order_field,
final int timezoneOffset,
final int resultCount,
final int aggregationLimit,
final String... aggregationFields
) {
this(
q,
order_field,
timezoneOffset,
resultCount,
aggregationLimit,
new ArrayList<>(),
aggregationFields
);
}

private static boolean insufficient(ElasticsearchClient.Query query, int resultCount, int aggregationLimit, String... aggregationFields) {
return query.hitCount < resultCount || (aggregationFields.length > 0 && getAggregationResultLimit(query.aggregations) < aggregationLimit);
Expand Down Expand Up @@ -1108,12 +1136,12 @@ public static Timeline scrapeTwitter(
long timeout,
boolean recordQuery) {

return scrapeTwitter(post, "", q, order, timezoneOffset, byUserQuery, timeout, recordQuery);
return scrapeTwitter(post, new ArrayList<>(), q, order, timezoneOffset, byUserQuery, timeout, recordQuery);
}

public static Timeline scrapeTwitter(
final Query post,
final String filter,
final ArrayList<String> filterList,
final String q,
final Timeline.Order order,
final int timezoneOffset,
Expand All @@ -1133,7 +1161,7 @@ public static Timeline scrapeTwitter(
// maybe the remote server died, we try then ourself
start = System.currentTimeMillis();

tl = TwitterScraper.search(q, filter, order, true, true, 400);
tl = TwitterScraper.search(q, filterList, order, true, true, 400);
if (post != null) post.recordEvent("local_scraper_after_unsuccessful_remote", System.currentTimeMillis() - start);
} else {
tl.writeToIndex();
Expand All @@ -1142,7 +1170,7 @@ public static Timeline scrapeTwitter(
if (post != null && remote.size() > 0) post.recordEvent("omitted_scraper_latency_" + remote.get(0), peerLatency.get(remote.get(0)));
long start = System.currentTimeMillis();

tl = TwitterScraper.search(q, filter, order, true, true, 400);
tl = TwitterScraper.search(q, filterList, order, true, true, 400);
if (post != null) post.recordEvent("local_scraper", System.currentTimeMillis() - start);
}

Expand Down
40 changes: 19 additions & 21 deletions src/org/loklak/harvester/TwitterScraper.java
Expand Up @@ -61,13 +61,13 @@ public class TwitterScraper {

public static Timeline search(
final String query,
final String filter,
final ArrayList<String> filterList,
final Timeline.Order order,
final boolean writeToIndex,
final boolean writeToBackend,
int jointime) {

Timeline[] tl = search(query, filter.replaceAll("\\s",""), order, writeToIndex, writeToBackend);
Timeline[] tl = search(query, filterList, order, writeToIndex, writeToBackend);
long timeout = System.currentTimeMillis() + jointime;
for (MessageEntry me: tl[1]) {
assert me instanceof TwitterTweet;
Expand All @@ -84,10 +84,10 @@ public static Timeline search(
final boolean writeToIndex,
final boolean writeToBackend,
int jointime) {
return search(query, "", order, writeToIndex, writeToBackend, jointime);
return search(query, new ArrayList<>(), order, writeToIndex, writeToBackend, jointime);
}

public static String prepareSearchURL(final String query, final String filter) {
public static String prepareSearchURL(final String query, final ArrayList<String> filterList) {
// check
// https://twitter.com/search-advanced for a better syntax
// build queries like https://twitter.com/search?f=tweets&vertical=default&q=kaffee&src=typd
Expand All @@ -110,7 +110,7 @@ public static String prepareSearchURL(final String query, final String filter) {
String q = t.length() == 0 ? "*" : URLEncoder.encode(t.substring(1), "UTF-8");

// type of content to fetch
if(filter.equals("video"))
if(filterList.contains("video") && filterList.size() == 1)
type = "videos";

// building url
Expand All @@ -126,27 +126,27 @@ public static Timeline[] search(
final Timeline.Order order,
final boolean writeToIndex,
final boolean writeToBackend) {
return search(query, "", order, writeToIndex, writeToBackend);
return search(query, new ArrayList<>(), order, writeToIndex, writeToBackend);
}

public static Timeline[] search(
final String query,
final String filter,
final ArrayList<String> filterList,
final Timeline.Order order,
final boolean writeToIndex,
final boolean writeToBackend) {
// check
// https://twitter.com/search-advanced for a better syntax
// https://support.twitter.com/articles/71577-how-to-use-advanced-twitter-search#
String https_url = prepareSearchURL(query, filter);
String https_url = prepareSearchURL(query, filterList);
Timeline[] timelines = null;
try {
ClientConnection connection = new ClientConnection(https_url);
if (connection.inputStream == null) return null;
try {
BufferedReader br = new BufferedReader(new InputStreamReader(connection.inputStream, StandardCharsets.UTF_8));

timelines = search(br, filter, order, writeToIndex, writeToBackend);
timelines = search(br, filterList, order, writeToIndex, writeToBackend);
} catch (IOException e) {
DAO.severe(e);
} finally {
Expand Down Expand Up @@ -175,19 +175,19 @@ public static Timeline[] parse(
final Timeline.Order order,
final boolean writeToIndex,
final boolean writeToBackend) {
return parse(file, "", order, writeToIndex, writeToBackend);
return parse(file, new ArrayList<>(), order, writeToIndex, writeToBackend);
}

public static Timeline[] parse(
final File file,
String filter,
final ArrayList<String> filterList,
final Timeline.Order order,
final boolean writeToIndex,
final boolean writeToBackend) {
Timeline[] timelines = null;
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
timelines = search(br, filter, order, writeToIndex, writeToBackend);
timelines = search(br, filterList, order, writeToIndex, writeToBackend);
} catch (IOException e) {
DAO.severe(e);
} finally {
Expand All @@ -207,7 +207,7 @@ public static Timeline[] search(
final boolean writeToIndex,
final boolean writeToBackend) throws IOException {

return search(br, "", order, writeToIndex, writeToBackend);
return search(br, new ArrayList<>(), order, writeToIndex, writeToBackend);
}

/**
Expand All @@ -219,7 +219,7 @@ public static Timeline[] search(
*/
public static Timeline[] search(
final BufferedReader br,
String filter,
final ArrayList<String> filterList,
final Timeline.Order order,
final boolean writeToIndex,
final boolean writeToBackend) throws IOException {
Expand All @@ -231,9 +231,6 @@ public static Timeline[] search(
};
Timeline timelineReady = new Timeline(order);
Timeline timelineWorking = new Timeline(order);
Set<String> filter_array = new HashSet<String>(Arrays.asList(
(filter.toLowerCase()).split(",")
));
String input;
Map<String, prop> props = new HashMap<String, prop>();
Set<String> images = new LinkedHashSet<>();
Expand Down Expand Up @@ -361,7 +358,7 @@ public static Timeline[] search(
if (props.size() == 10 || (debuglog && props.size() > 4 && input.indexOf("stream-item") > 0)) {

// filter tweets with videos and others
if (filter_array.contains("video") && filter_array.size() > 1) {
if (filterList.contains("video") && filterList.size() > 1) {
match_video1 = video_url_patterns[0].matcher(props.get("tweettext").value);
match_video2 = video_url_patterns[1].matcher(props.get("tweettext").value);

Expand All @@ -374,7 +371,7 @@ public static Timeline[] search(
}

// filter tweets with images
if (filter_array.contains("image") && images.size() < 1) {
if (filterList.contains("image") && images.size() < 1) {
props = new HashMap<String, prop>();
place_id = "";
place_name = "";
Expand Down Expand Up @@ -698,12 +695,13 @@ public static String unshorten(String text) {
*/
public static void main(String[] args) {
//wget --no-check-certificate "https://twitter.com/search?q=eifel&src=typd&f=realtime"
String filter = "image";
ArrayList<String> filterList = new ArrayList<String>();
filterList.add("image");
Timeline[] result = null;
if (args[0].startsWith("/"))
result = parse(new File(args[0]),Timeline.Order.CREATED_AT, true, true);
else
result = TwitterScraper.search(args[0], filter, Timeline.Order.CREATED_AT, true, true);
result = TwitterScraper.search(args[0], filterList, Timeline.Order.CREATED_AT, true, true);
int all = 0;
for (int x = 0; x < 2; x++) {
if (x == 0) System.out.println("Timeline[0] - finished to be used:");
Expand Down

0 comments on commit e290dcc

Please sign in to comment.