Permalink
Browse files

some fixes

  • Loading branch information...
1 parent f37d9d4 commit 63d56d20a1d24f48db8ed66b2b2ac452922cad5d Wahbeh Qardaji committed Jun 10, 2011
Showing with 20 additions and 21 deletions.
  1. +13 −14 mrjob/tools/emr/s3_tmpwatch.py
  2. +3 −3 tests/mockboto.py
  3. +4 −4 tests/tools/emr/s3_tmpwatch_test.py
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Yelp
+# Copyright 2010-2011 Yelp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-""" Delete old files in a path (by default files that are older than 30 days)
+""" Delete old files in a path
"""
from datetime import datetime, timedelta
import logging
@@ -27,14 +27,15 @@
log = logging.getLogger('mrjob.tools.emr.s3_tmpwatch')
-DEFAULT_TIME_OLD = '30d'
-
def main():
option_parser = make_option_parser()
options, args = option_parser.parse_args()
+ # make sure time and uris are given
if not args:
option_parser.error('Please specify one or more URIs')
+ if not options.time:
+ option_parser.error('--time needs to be specified')
# set up logging
if not options.quiet:
@@ -45,11 +46,11 @@ def main():
time_old = process_time(options.time)
for path in args:
- S3_cleanup(path, time_old,
+ s3_cleanup(path, time_old,
conf_path=options.conf_path,
dry_run=options.test)
-def S3_cleanup(glob_path, time_old, dry_run=False, conf_path=None):
+def s3_cleanup(glob_path, time_old, dry_run=False, conf_path=None):
"""Delete all files older than *time_old* in *path*.
If *dry_run* is ``True``, then just log the files that need to be
deleted without actually deleting them
@@ -65,8 +66,7 @@ def S3_cleanup(glob_path, time_old, dry_run=False, conf_path=None):
for key in bucket.list(key_name):
last_modified = datetime.strptime(key.last_modified, boto.utils.ISO8601)
- last_modified = last_modified.replace(tzinfo=None)
- time_delta = datetime.now() - last_modified
+ time_delta = datetime.utcnow() - last_modified
if time_delta > time_old:
# Delete it
log.info('Deleting %s; is %s old' % (key.name, str(time_delta)))
@@ -92,21 +92,20 @@ def make_option_parser():
action='store_true',
help='Print more messages')
option_parser.add_option(
- '-q', '--quiet', dest='quiet', default=False,
- action='store_true',
- help="Don't print anything to stderr; just print deleted files to stdout")
+ '-q', '--quiet', dest='quiet', default=False,
+ action='store_true',
+ help="Don't print anything to stderr; just print deleted files to stdout")
option_parser.add_option(
'-c', '--conf-path', dest='conf_path', default=None,
help='Path to alternate mrjob.conf file to read from')
option_parser.add_option(
'--no-conf', dest='conf_path', action='store_false',
help="Don't load mrjob.conf even if it's available")
option_parser.add_option(
- '-t', '--time', dest='time',
- default=DEFAULT_TIME_OLD, type='str',
+ '--time', dest='time', type='str',
help='The time the file needs to be old before deleting it')
option_parser.add_option(
- '--test', dest='test', default=False,
+ '-t', '--test', dest='test', default=False,
action='store_true',
help="Don't actually delete any files; just log that we would")
View
@@ -43,7 +43,7 @@ def add_mock_s3_data(mock_s3_fs, data):
"""Update mock_s3_fs (which is just a dictionary mapping bucket to
key to contents) with a map from bucket name to key name to data and
time last modified."""
- time_modified = to_iso8601(datetime.datetime.now())
+ time_modified = to_iso8601(datetime.datetime.utcnow())
for bucket_name, key_name_to_bytes in data.iteritems():
mock_s3_fs.setdefault(bucket_name, {'keys':{}, 'location': ''})
bucket = mock_s3_fs[bucket_name]
@@ -109,7 +109,7 @@ def mock_state(self):
def new_key(self, key_name):
if key_name not in self.mock_state():
self.mock_state()[key_name] = ('',
- to_iso8601(datetime.datetime.now()))
+ to_iso8601(datetime.datetime.utcnow()))
return MockKey(bucket=self, name=key_name)
def get_key(self, key_name):
@@ -149,7 +149,7 @@ def read_mock_data(self):
def write_mock_data(self, data):
if self.name in self.bucket.mock_state():
self.bucket.mock_state()[self.name] = (data,
- to_iso8601(datetime.datetime.now()))
+ to_iso8601(datetime.datetime.utcnow()))
else:
raise boto.exception.S3ResponseError(404, 'Not Found')
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Yelp
+# Copyright 2011 Yelp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -73,14 +73,14 @@ def test_cleanup(self):
assert isinstance(key_bar, MockKey)
assert isinstance(key_qux, MockKey)
- S3_cleanup(remote_input_path, timedelta(days=30), dry_run=True)
+ s3_cleanup(remote_input_path, timedelta(days=30), dry_run=True)
# dry-run shouldn't delete anything
assert isinstance(key_foo, MockKey)
assert isinstance(key_bar, MockKey)
assert isinstance(key_qux, MockKey)
- S3_cleanup(remote_input_path, timedelta(days=30))
+ s3_cleanup(remote_input_path, timedelta(days=30))
key_foo = bucket.get_key('data/foo')
key_bar = bucket.get_key('data/bar')
@@ -91,7 +91,7 @@ def test_cleanup(self):
assert_equal(key_bar, None)
assert isinstance(key_qux, MockKey)
- S3_cleanup(remote_input_path, timedelta(hours=48))
+ s3_cleanup(remote_input_path, timedelta(hours=48))
key_foo = bucket.get_key('data/foo')
key_bar = bucket.get_key('data/bar')

0 comments on commit 63d56d2

Please sign in to comment.