Skip to content
This repository has been archived by the owner on May 14, 2024. It is now read-only.

Commit

Permalink
Merge pull request #79 from ukaea/feature/add-metadata-to-archive-files/
Browse files Browse the repository at this point in the history
#8

Feature/add metadata to archive files/#8
  • Loading branch information
robert-clegg-tessella committed Dec 4, 2018
2 parents c3e5ffc + c4af482 commit c3220f1
Show file tree
Hide file tree
Showing 16 changed files with 429 additions and 48 deletions.
4 changes: 3 additions & 1 deletion ditto_web_api/DittoWebApi/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from DittoWebApi.src.services.security.config_security_service import ConfigSecurityService
from DittoWebApi.src.utils.configurations import Configuration
from DittoWebApi.src.utils.file_system.files_system_helpers import FileSystemHelper
from DittoWebApi.src.utils.file_read_write_helper import FileReadWriteHelper
from DittoWebApi.src.utils.route_helper import format_route_specification


Expand Down Expand Up @@ -52,8 +53,9 @@ def setup_logger(log_file_location, level):

# Set up services
S3_ADAPTER = BotoAdapter(CONFIGURATION, LOGGER)
FILE_READ_WRITE_HELPER = FileReadWriteHelper()
FILE_SYSTEM_HELPER = FileSystemHelper()
ARCHIVER = Archiver(FILE_SYSTEM_HELPER, LOGGER)
ARCHIVER = Archiver(FILE_READ_WRITE_HELPER, FILE_SYSTEM_HELPER, LOGGER)
EXTERNAL_DATA_SERVICE = ExternalDataService(CONFIGURATION, FILE_SYSTEM_HELPER, LOGGER, S3_ADAPTER)
INTERNAL_DATA_SERVICE = InternalDataService(ARCHIVER, CONFIGURATION, FILE_SYSTEM_HELPER, LOGGER)
STORAGE_DIFFERENCE_PROCESSOR = StorageDifferenceProcessor(LOGGER)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def copy_dir(self, bucket_name, dir_path):

file_summary = self._storage_difference_processor.return_difference_comparison([], files_in_directory)
transfer_summary = self._external_data_service.perform_transfer(bucket_name, file_summary)
self._internal_data_service.archive_file_transfer(dir_path, file_summary=file_summary)
self._internal_data_service.archive_file_transfer(file_summary=file_summary)
return return_transfer_summary(**transfer_summary)

def create_bucket(self, bucket_name):
Expand Down Expand Up @@ -123,7 +123,7 @@ def copy_new(self, bucket_name, dir_path):
return return_transfer_summary(message=message,
files_skipped=len(files_in_directory))
transfer_summary = self._external_data_service.perform_transfer(bucket_name, files_summary)
self._internal_data_service.archive_file_transfer(dir_path, file_summary=files_summary)
self._internal_data_service.archive_file_transfer(file_summary=files_summary)
return return_transfer_summary(**transfer_summary)

def copy_new_and_update(self, bucket_name, dir_path):
Expand Down Expand Up @@ -151,5 +151,5 @@ def copy_new_and_update(self, bucket_name, dir_path):
return return_transfer_summary(message=message,
files_skipped=len(files_summary.files_to_be_skipped()))
transfer_summary = self._external_data_service.perform_transfer(bucket_name, files_summary)
self._internal_data_service.archive_file_transfer(dir_path, file_summary=files_summary)
self._internal_data_service.archive_file_transfer(file_summary=files_summary)
return return_transfer_summary(**transfer_summary)
53 changes: 43 additions & 10 deletions ditto_web_api/DittoWebApi/src/services/internal/archiver.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,55 @@
from DittoWebApi.src.utils.system_helper import current_time_in_utc


class Archiver:
def __init__(self, file_system_helper, logger):
def __init__(self, file_read_write_helper, file_system_helper, logger):
self._logger = logger
self._file_system_helper = file_system_helper
self._file_read_write_helper = file_read_write_helper

def write_archive(self, archive_file_path, file_summary):
try:
content = {}
new_archive_file = self._file_system_helper.create_and_open_file_for_writing(archive_file_path)

def write_archive(self, file_path, file_summary):
content = "test"
self._file_system_helper.create_file(file_path, content)
self._logger.debug(f"Archive file created: {file_path}")
self._archive_file_summary(file_summary, content)

def update_archive(self, file_path, file_summary):
content = "test test"
self._file_read_write_helper.write_json_to_file(new_archive_file, content)
except Exception as exception:
self._logger.error(f"Exception found here: {exception}")
raise
finally:
self._file_system_helper.close_file(new_archive_file)
self._logger.debug(f"Archive file created: {archive_file_path}")

def update_archive(self, archive_file_path, file_summary):
try:
archived_file = self._file_system_helper.open_file(file_path)
archived_file.write(content)
archived_file = self._file_system_helper.open_file_for_reading_and_writing(archive_file_path)
content = self._file_read_write_helper.read_file_as_json(archived_file)
self._file_read_write_helper.clear_file(archived_file)

self._archive_file_summary(file_summary, content)

self._file_read_write_helper.write_json_to_file(archived_file, content)

except Exception as exception:
self._logger.error(f"Exception found: {exception}")
raise
finally:
self._file_system_helper.close_file(archived_file)
self._logger.debug(f"Archive file updated: {file_path}")
self._logger.debug(f"Archive file updated: {archive_file_path}")

def _archive_file(self, file, time_of_transfer, type_of_transfer):
size = self._file_system_helper.file_size(file.abs_path)
return {"file": file.file_name,
"size": size,
"latest update": time_of_transfer,
"type of transfer": type_of_transfer}

def _archive_file_summary(self, file_summary, content):
time_of_transfer = str(current_time_in_utc())
for file in file_summary.new_files:
content[file.file_name] = self._archive_file(file, time_of_transfer, "new upload")

for file in file_summary.updated_files:
content[file.file_name] = self._archive_file(file, time_of_transfer, "file update")
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from DittoWebApi.src.models.file_information import FileInformation
from DittoWebApi.src.models.file_storage_summary import FilesStorageSummary


class InternalDataService:
Expand Down Expand Up @@ -27,10 +28,37 @@ def build_file_information(self, file_path):
file_name = self._file_system_helper.file_name(abs_path_to_file)
return FileInformation(abs_path_to_file, rel_path_to_file, file_name)

def archive_file_transfer(self, dir_path, file_summary):
full_dir_path = self._file_system_helper.join_paths(self._root_dir, dir_path) if dir_path else self._root_dir
file_path = self._file_system_helper.join_paths(full_dir_path, self._archive_file_name)
if self._file_system_helper.does_file_exist(file_path):
self._archiver.update_archive(file_path, file_summary)
else:
self._archiver.write_archive(file_path, file_summary)
def archive_file_transfer(self, file_summary):
sub_directory_file_summaries = self._split_file_summary_by_sub_dir(file_summary)

for sub_dir in sub_directory_file_summaries:
full_sub_dir_path = \
self._file_system_helper.join_paths(self._root_dir, sub_dir) if sub_dir else self._root_dir
self._logger.debug(f"Writing archive file in {full_sub_dir_path}")
archive_file_path = self._file_system_helper.join_paths(full_sub_dir_path, self._archive_file_name)
sub_dir_file_summary = sub_directory_file_summaries[sub_dir]

if self._file_system_helper.does_file_exist(archive_file_path):
self._archiver.update_archive(archive_file_path, sub_dir_file_summary)
else:
self._archiver.write_archive(archive_file_path, sub_dir_file_summary)

def _split_file_summary_by_sub_dir(self, file_summary):
dict_of_sub_dir_summaries = {}

self._add_file_to_sub_dir_file_summary(
file_summary.new_files, dict_of_sub_dir_summaries, lambda path: dict_of_sub_dir_summaries[path].new_files)

self._add_file_to_sub_dir_file_summary(
file_summary.updated_files,
dict_of_sub_dir_summaries,
lambda path: dict_of_sub_dir_summaries[path].updated_files)

return dict_of_sub_dir_summaries

def _add_file_to_sub_dir_file_summary(self, files, dict_of_sub_dir_summaries, summary_selector):
for file in files:
directory_rel_path = self._file_system_helper.file_directory(file.rel_path)
if directory_rel_path not in dict_of_sub_dir_summaries:
dict_of_sub_dir_summaries[directory_rel_path] = FilesStorageSummary(None)
summary_selector(directory_rel_path).append(file)
16 changes: 16 additions & 0 deletions ditto_web_api/DittoWebApi/src/utils/file_read_write_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import json


class FileReadWriteHelper:
@staticmethod
def write_json_to_file(open_file, json_content):
json.dump(json_content, open_file)

@staticmethod
def read_file_as_json(open_file):
return json.load(open_file)

@staticmethod
def clear_file(open_file):
open_file.seek(0)
open_file.truncate()
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,18 @@ def last_modified(self, file_path):
def file_size(self, abs_file_path):
return os.stat(abs_file_path).st_size

def create_file(self, file_path, content):
with open(file_path, "w") as file:
file.write(content)
def create_and_open_file_for_writing(self, file_path):
return open(file_path, "w")

def does_file_exist(self, file_path):
return os.path.exists(file_path)

def open_file(self, file_path):
return open(file_path, 'w')
def open_file_for_reading_and_writing(self, file_path):
return open(file_path, 'r+')

def close_file(self, open_file):
if not open_file.closed:
open_file.close()

def file_directory(self, file_rel_path):
return os.path.dirname(file_rel_path)
11 changes: 11 additions & 0 deletions ditto_web_api/DittoWebApi/src/utils/system_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import time
from datetime import datetime


def current_time():
return time.time()


def current_time_in_utc():
time_stamp = current_time()
return datetime.utcfromtimestamp(time_stamp)
115 changes: 115 additions & 0 deletions ditto_web_api/DittoWebApi/tests/services/archiver_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# pylint: disable=W0201
import logging
import unittest
import pytest
import mock

from DittoWebApi.src.utils.file_system.files_system_helpers import FileSystemHelper
from DittoWebApi.src.models.file_storage_summary import FilesStorageSummary
from DittoWebApi.src.models.file_information import FileInformation
from DittoWebApi.src.services.internal.archiver import Archiver
from DittoWebApi.src.utils.file_read_write_helper import FileReadWriteHelper


class TestArchive(unittest.TestCase):
@pytest.fixture(autouse=True)
def setup(self):
self._file_system_helper = mock.create_autospec(FileSystemHelper)
self._logger = mock.create_autospec(logging.Logger)
self._file_read_write_helper = mock.create_autospec(FileReadWriteHelper)
self.test_archiver = Archiver(self._file_read_write_helper, self._file_system_helper, self._logger)

self.mock_open_file = mock.Mock()
self._file_system_helper.create_and_open_file_for_writing.return_value = self.mock_open_file
self._file_system_helper.open_file_for_reading_and_writing.return_value = self.mock_open_file

self.mock_file_1 = mock.create_autospec(FileInformation)
self.mock_file_2 = mock.create_autospec(FileInformation)
self.mock_file_1.file_name = "file_1.txt"
self.mock_file_2.file_name = "file_2.txt"

self.mock_file_summary = mock.create_autospec(FilesStorageSummary)

self._file_system_helper.file_size.side_effect = [100, 50]

@mock.patch('DittoWebApi.src.utils.system_helper.time.time', return_value=12345)
def test_write_archive_creates_archive_file(self, time):
self.mock_file_summary.new_files = [self.mock_file_1]
self.mock_file_summary.updated_files = [self.mock_file_2]
# Act
self.test_archiver.write_archive("some_file_path", self.mock_file_summary)
# Assert
self._logger.debug.assert_called_with("Archive file created: some_file_path")
self._file_read_write_helper.write_json_to_file.assert_called_once_with(
self.mock_open_file, {self.mock_file_1.file_name: {
'file': self.mock_file_1.file_name,
'size': 100,
'latest update': '1970-01-01 03:25:45',
'type of transfer': 'new upload'},
self.mock_file_2.file_name: {
'file': self.mock_file_2.file_name,
'size': 50,
'latest update': '1970-01-01 03:25:45',
'type of transfer': 'file update'}}
)

@mock.patch('DittoWebApi.src.utils.system_helper.time.time', return_value=12345)
def test_update_archive_updates_an_archive_file(self, time):
self.mock_file_summary.new_files = [self.mock_file_1]
self.mock_file_summary.updated_files = [self.mock_file_2]
# Arrange
self._file_read_write_helper.read_file_as_json.return_value = {
self.mock_file_2.file_name: {
'file': self.mock_file_2.file_name,
'size': 50,
'latest update': '1970-01-01 03:25:45',
'type of transfer': 'file update'}
}
# Act
self.test_archiver.update_archive("some_file_path", self.mock_file_summary)
# Assert
self._logger.debug.assert_called_with("Archive file updated: some_file_path")
self._file_read_write_helper.write_json_to_file.assert_called_once_with(
self.mock_open_file, {
self.mock_file_2.file_name: {
'file': self.mock_file_2.file_name,
'size': 50,
'latest update': '1970-01-01 03:25:45',
'type of transfer': 'file update'},
self.mock_file_1.file_name: {
'file': self.mock_file_1.file_name,
'size': 100,
'latest update': '1970-01-01 03:25:45',
'type of transfer': 'new upload'}
})

@mock.patch('DittoWebApi.src.utils.system_helper.time.time', return_value=12345)
def test_update_archive_updates_an_archive_file_when_no_new_files(self, time):
self.mock_file_summary.new_files = []
self.mock_file_summary.updated_files = [self.mock_file_1, self.mock_file_2]
# Arrange
self._file_read_write_helper.read_file_as_json.return_value = {
self.mock_file_2.file_name: {
'file': self.mock_file_2.file_name,
'size': 50,
'latest update': '1970-01-01 03:25:45',
'type of transfer': 'new upload'}
}
# Act
self.test_archiver.update_archive("some_file_path", self.mock_file_summary)
# Assert
self._logger.debug.assert_called_with("Archive file updated: some_file_path")
self._file_read_write_helper.write_json_to_file.assert_called_once_with(
self.mock_open_file, {
self.mock_file_1.file_name: {
'file': self.mock_file_1.file_name,
'size': 100,
'latest update': '1970-01-01 03:25:45',
'type of transfer': 'file update'},
self.mock_file_2.file_name: {
'file': self.mock_file_2.file_name,
'size': 50,
'latest update': '1970-01-01 03:25:45',
'type of transfer': 'file update'}

})
Loading

0 comments on commit c3220f1

Please sign in to comment.