diff --git a/README.md b/README.md index 15b2742..eb2cc3b 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,10 @@ client = Client() query = client.query() query.pattern = 'github page-size:10' query.language = 'sv' + +# Assumes UTC if no timezone is specified +query.start_time = datetime.datetime(2015, 2, 23, 15, 18, 13) + result = query.execute() for post in result.posts: @@ -66,9 +70,7 @@ To learn more about the capabilities of the API, please read the [Twingly Search ## Requirements * API key, contact sales@twingly.com via [twingly.com](https://www.twingly.com/try-for-free/) to get one -* Python - * Python 2.7+, 3.0+ - * [Requests](https://pypi.python.org/pypi/requests) +* Python 2.7+, 3.0+ ## Release diff --git a/examples/search_using_local_timezone.py b/examples/search_using_local_timezone.py new file mode 100644 index 0000000..a0f2395 --- /dev/null +++ b/examples/search_using_local_timezone.py @@ -0,0 +1,59 @@ +import twingly_search +import datetime +import dateutil.parser +from dateutil.tz import * + +""" +Simple cli that lets you search in Twingly Search API using your local timezone + +Example run: + python examples/search_using_local_timezone.py + > What do you want to search for? cupcakes + > Start time (in CET): 2016-02-01 00:00:00 + > End time (in CET): 2016-02-10 00:00:00 + Search results --------------------- + Published (in CET) - Post URL + 2016-02-09 21:38:09 - http://www.heydonna.com/2016/02/smores-cupcakes + 2016-02-09 20:21:36 - http://www.attwentynine.com/2016/02/09/chocolate-cake-cupcake + ... +""" +class SimpleSearchCli(object): + CURRENT_TIMEZONE = tzlocal() + CURRENT_TIMEZONE_NAME = datetime.datetime.now(CURRENT_TIMEZONE).tzname() + + def __init__(self): + self.client = twingly_search.Client() + + def start(self): + query = self.client.query() + + # See https://developer.twingly.com/resources/search-language/ + query.pattern = raw_input("What do you want to search for? ") + query.start_time = self._read_time_from_stdin("Start time") + query.end_time = self._read_time_from_stdin("End time") + + results = query.execute() + + self._print_results(results) + + def _read_time_from_stdin(self, time_label): + prompt = "%s (in %s): " % (time_label, self.CURRENT_TIMEZONE_NAME) + + parsed_time = dateutil.parser.parse(raw_input(prompt)) + # Sets your local timezone on the parsed time object. + # If no timezone is set, twingly_search assumes UTC. + return parsed_time.replace(tzinfo=self.CURRENT_TIMEZONE) + + def _print_results(self, result): + print "Search results ---------------------" + print "Published (in %s) - Post URL" % self.CURRENT_TIMEZONE_NAME + + for post in result.posts: + # The time returned from the API is always in UTC, + # convert it to your local timezone before displaying it. + local_datetime = post.published.astimezone(self.CURRENT_TIMEZONE) + published_date_string = local_datetime.strftime("%Y-%m-%d %H:%M:%S") + + print "%s - %s" % (published_date_string, post.url) + +SimpleSearchCli().start() diff --git a/requirements.txt b/requirements.txt index f38dacc..08fe18c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ future requests +pytz diff --git a/test_requirements.txt b/test_requirements.txt index 2b930b1..7fff4d5 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,4 +1,5 @@ -future -requests +-r requirements.txt + nose betamax +python-dateutil diff --git a/tests/test_query.py b/tests/test_query.py index 8a2a021..228dbcf 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -1,6 +1,8 @@ from __future__ import unicode_literals import unittest -import datetime +from datetime import datetime +import pytz +import dateutil.parser from betamax import Betamax import twingly_search @@ -8,6 +10,10 @@ class QueryTest(unittest.TestCase): def setUp(self): self._client = twingly_search.Client() + def datetime_with_timezone(self, date, timezone_string): + timezone = pytz.timezone(timezone_string) + return timezone.localize(date) + def test_query_new(self): q = self._client.query() self.assertIsInstance(q, twingly_search.Query) @@ -41,19 +47,55 @@ def test_query_should_add_language(self): def test_query_should_add_start_time(self): q = self._client.query() q.pattern = "spotify" - q.start_time = datetime.datetime(2012, 12, 28, 9, 1, 22) + q.start_time = self.datetime_with_timezone(datetime(2012, 12, 28, 9, 1, 22), "UTC") + self.assertEqual(q.request_parameters()['ts'], "2012-12-28 09:01:22") + + def test_query_using_start_time_without_timezone(self): + q = self._client.query() + q.pattern = "spotify" + q.start_time = datetime(2012, 12, 28, 9, 1, 22) self.assertEqual(q.request_parameters()['ts'], "2012-12-28 09:01:22") + def test_query_using_start_time_with_timezone_other_than_utc(self): + q = self._client.query() + q.pattern = "spotify" + q.start_time = self.datetime_with_timezone(datetime(2012, 12, 28, 9, 1, 22), "Europe/Stockholm") + self.assertEqual(q.request_parameters()['ts'], "2012-12-28 08:01:22") + + def test_query_using_start_time_parsed_by_dateutil(self): + q = self._client.query() + q.pattern = "spotify" + q.end_time = dateutil.parser.parse("2012-12-28 09:01:22 -0800") + self.assertEqual(q.request_parameters()['tsTo'], "2012-12-28 17:01:22") + def test_query_should_add_end_time(self): q = self._client.query() q.pattern = "spotify" - q.end_time = datetime.datetime(2012, 12, 28, 9, 1, 22) + q.end_time = self.datetime_with_timezone(datetime(2012, 12, 28, 9, 1, 22), "UTC") self.assertEqual(q.request_parameters()['tsTo'], "2012-12-28 09:01:22") + def test_query_using_end_time_without_timezone(self): + q = self._client.query() + q.pattern = "spotify" + q.end_time = datetime(2012, 12, 28, 9, 1, 22) + self.assertEqual(q.request_parameters()['tsTo'], "2012-12-28 09:01:22") + + def test_query_using_end_time_with_timezone_other_than_utc(self): + q = self._client.query() + q.pattern = "spotify" + q.end_time = self.datetime_with_timezone(datetime(2012, 12, 28, 9, 1, 22), "Europe/Stockholm") + self.assertEqual(q.request_parameters()['tsTo'], "2012-12-28 08:01:22") + + def test_query_using_end_time_parsed_by_dateutil(self): + q = self._client.query() + q.pattern = "spotify" + q.end_time = dateutil.parser.parse("2012-12-28 09:01:22 +0800") + self.assertEqual(q.request_parameters()['tsTo'], "2012-12-28 01:01:22") + def test_query_should_encode_url_parameters(self): q = self._client.query() q.pattern = "spotify" - q.end_time = datetime.datetime(2012, 12, 28, 9, 1, 22) + q.end_time = self.datetime_with_timezone(datetime(2012, 12, 28, 9, 1, 22), "UTC") self.assertIn("tsTo=2012-12-28+09%3A01%3A22", q.url_parameters()) def test_query_pattern(self): diff --git a/twingly_search/post.py b/twingly_search/post.py index 55afe23..7f34031 100644 --- a/twingly_search/post.py +++ b/twingly_search/post.py @@ -1,4 +1,5 @@ import datetime +from pytz import utc class Post(object): """ @@ -19,17 +20,19 @@ class Post(object): (https://developer.twingly.com/resources/search/#authority) tags (list of string) tags """ - url = '' - title = '' - summary = '' - language_code = '' - published = datetime.datetime.strptime("1970-01-01 00:00:00Z", '%Y-%m-%d %H:%M:%SZ') - indexed = datetime.datetime.strptime("1970-01-01 00:00:00Z", '%Y-%m-%d %H:%M:%SZ') - blog_url = '' - blog_name = '' - authority = 0 - blog_rank = 0 - tags = [] + + def __init__(self): + self.url = '' + self.title = '' + self.summary = '' + self.language_code = '' + self.published = self._parse_time("1970-01-01 00:00:00Z") + self.indexed = self._parse_time("1970-01-01 00:00:00Z") + self.blog_url = '' + self.blog_name = '' + self.authority = 0 + self.blog_rank = 0 + self.tags = [] def set_values(self, params): """ @@ -41,14 +44,18 @@ def set_values(self, params): self.title = params['title'] self.summary = params['summary'] self.language_code = params['languageCode'] - self.published = datetime.datetime.strptime(params['published'], '%Y-%m-%d %H:%M:%SZ') - self.indexed = datetime.datetime.strptime(params['indexed'], '%Y-%m-%d %H:%M:%SZ') + self.published = self._parse_time(params['published']) + self.indexed = self._parse_time(params['indexed']) self.blog_url = params['blogUrl'] self.blog_name = params['blogName'] self.authority = int(params['authority']) self.blog_rank = int(params['blogRank']) self.tags = params['tags'] + def _parse_time(self, time): + parsed_time = datetime.datetime.strptime(time, '%Y-%m-%d %H:%M:%SZ') + return utc.localize(parsed_time) + def __unicode__(self): return "%s %s" % (self.title, self.url) diff --git a/twingly_search/query.py b/twingly_search/query.py index cdd0e3d..cf52835 100644 --- a/twingly_search/query.py +++ b/twingly_search/query.py @@ -1,4 +1,5 @@ import datetime +from pytz import utc from twingly_search.errors import TwinglyQueryException @@ -18,7 +19,9 @@ class Query(object): language (string) language which language to restrict the query to client (Client) the client that this query is connected to start_time (datetime.datetime) search for posts published after this time (inclusive) + Assumes UTC if the datetime object has no timezone set. end_time (datetime.datetime) search for posts published before this time (inclusive) + Assumes UTC if the datetime object has no timezone set. """ pattern = '' language = '' @@ -26,6 +29,8 @@ class Query(object): start_time = None end_time = None + DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" + def __init__(self, client): """ No need to call this method manually, instead use {Client#query}. @@ -64,28 +69,20 @@ def request_parameters(self): 'key': self.client.api_key, 'searchpattern': self.pattern, 'documentlang': self.language, - 'ts': self._ts(), - 'tsTo': self._tsTo(), + 'ts': self._time_to_utc_string(self.start_time, "start_time"), + 'tsTo': self._time_to_utc_string(self.end_time, "end_time"), 'xmloutputversion': 2 } - def _ts(self): - if self.start_time is not None: - if isinstance(self.start_time, datetime.datetime): - return self.start_time.strftime("%Y-%m-%d %H:%M:%S") - elif isinstance(self.start_time, basestring): - return self.start_time - else: - return '' - else: - return '' - - def _tsTo(self): - if self.end_time is not None: - if isinstance(self.end_time, datetime.datetime): - return self.end_time.strftime("%Y-%m-%d %H:%M:%S") - elif isinstance(self.end_time, basestring): - return self.end_time + def _time_to_utc_string(self, time, attr_name): + if time is not None: + if isinstance(time, datetime.datetime): + time_in_utc = time + if time.tzinfo is not None: + time_in_utc = time.astimezone(utc) + return time_in_utc.strftime(self.DATETIME_FORMAT) + elif isinstance(time, basestring): + return time else: return '' else: