Skip to content

Commit

Permalink
Export posts to CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonmule committed Apr 13, 2016
1 parent 958372b commit 34a20e3
Show file tree
Hide file tree
Showing 12 changed files with 443 additions and 8 deletions.
38 changes: 38 additions & 0 deletions application/classes/Controller/Api/Posts/Export.php
@@ -0,0 +1,38 @@
<?php defined('SYSPATH') OR die('No direct access allowed.');

/**
* Ushahidi API Posts Exports Controller
*
* @author Ushahidi Team <team@ushahidi.com>
* @package Ushahidi\Application\Controllers
* @copyright 2013 Ushahidi
* @license https://www.gnu.org/licenses/agpl-3.0.html GNU Affero General Public License Version 3 (AGPL3)
*/

class Controller_Api_Posts_Export extends Ushahidi_Rest
{
protected function _scope()
{
return 'posts';
}

protected function _resource()
{
return 'posts_export';
}

public function action_get_index_collection()
{
// Get usecase with default formatter
$this->_usecase = service('factory.usecase')
->get($this->_resource(), 'export')
->setFilters($this->_filters());

// ...or use a different one if requested
$format = strtolower($this->request->query('format'));

if ($format !== 'csv') {
$this->_usecase->setFormatter(service("formatter.entity.post.$format"));
}
}
}
8 changes: 8 additions & 0 deletions application/classes/Ushahidi/Core.php
Expand Up @@ -83,6 +83,11 @@ public static function init()
return Kohana::$config->load('features.data-import.enabled');
});

// Data export config settings
$di->set('data-export.enabled', function() use ($di) {
return Kohana::$config->load('features.data-export.enabled');
});

$di->set('tool.uploader.prefix', function() use ($di) {
// Is this a multisite install?
$multisite = Kohana::$config->load('multisite.enabled');
Expand Down Expand Up @@ -291,6 +296,8 @@ public static function init()
'csv' => $di->lazyNew('Ushahidi_Formatter_CSV'),
'roles' => $di->lazyNew('Ushahidi_Formatter_Role'),
'permissions' => $di->lazyNew('Ushahidi_Formatter_Permission'),
// Formatter for post exports. Defaults to CSV export
'posts_export' => $di->lazyNew('Ushahidi_Formatter_Post_CSV'),
];

// Formatter parameters
Expand Down Expand Up @@ -381,6 +388,7 @@ public static function init()

$di->set('formatter.output.json', $di->lazyNew('Ushahidi_Formatter_JSON'));
$di->set('formatter.output.jsonp', $di->lazyNew('Ushahidi_Formatter_JSONP'));
$di->set('formatter.output.csv', $di->lazyNew('Ushahidi_Formatter_Export_CSV'));

// Formatter parameters
$di->setter['Ushahidi_Formatter_JSONP']['setCallback'] = function() {
Expand Down
29 changes: 29 additions & 0 deletions application/classes/Ushahidi/Formatter/Export/CSV.php
@@ -0,0 +1,29 @@
<?php defined('SYSPATH') OR die('No direct access allowed.');

/**
* Ushahidi CSV Formatter
*
* @author Ushahidi Team <team@ushahidi.com>
* @package Ushahidi\Application
* @copyright 2014 Ushahidi
* @license https://www.gnu.org/licenses/agpl-3.0.html GNU Affero General Public License Version 3 (AGPL3)
*/

use Ushahidi\Core\Tool\Formatter;
use Ushahidi\Core\Tool\OutputFormatter;

class Ushahidi_Formatter_Export_CSV implements Formatter, OutputFormatter
{
// Formatter
public function __invoke($input)
{
// CSV output

This comment has been minimized.

Copy link
@rjmackay

rjmackay Apr 18, 2016

Contributor

Definitely look at using League CSV or some other library to handle real CSV output

}

// OutputFormatter
public function getMimeType()
{
return 'application/csv';
}
}

198 changes: 198 additions & 0 deletions application/classes/Ushahidi/Formatter/Post/CSV.php
@@ -0,0 +1,198 @@
<?php defined('SYSPATH') OR die('No direct access allowed.');

/**
* Ushahidi API Formatter for CSV export
*
* @author Ushahidi Team <team@ushahidi.com>
* @package Ushahidi\Application
* @copyright 2014 Ushahidi
* @license https://www.gnu.org/licenses/agpl-3.0.html GNU Affero General Public License Version 3 (AGPL3)
*/

use Ushahidi\Core\SearchData;
use Ushahidi\Core\Tool\Formatter;

class Ushahidi_Formatter_Post_CSV implements Formatter

This comment has been minimized.

Copy link
@rjmackay

rjmackay Apr 18, 2016

Contributor

Would any of this be more easily handled by using: http://csv.thephpleague.com/ ??

{
/**
* @var SearchData
*/
protected $search;

// Formatter
public function __invoke($records)
{
return $this->generateCSVRecords($records);
}

/**
* Generates records that are suitable to save in CSV format.
* Records are padded with missing column headings as keys.
*
* @param array $records
*
* @return array
*/
protected function generateCSVRecords($records)
{
$csv_records = [];

// Get CSV heading
$heading = $this->getCSVHeading($records);

This comment has been minimized.

Copy link
@rjmackay

rjmackay Apr 18, 2016

Contributor

do we need to do this explicitly or could we just infer headings from array keys later on?


// Sort the columns from the heading so that they match with the record keys
sort($heading);

// Add heading
array_push($csv_records, $heading);

foreach ($records as $record)
{
$record = $record->asArray();

foreach ($record as $key => $val)
{
// Are these form values?
if ($key === 'values')
{
// Remove 'values' column
unset($record['values']);

foreach ($val as $key => $val)
{
// XXX: Is this always a single value array?
$val = $val[0];

// Is it a location?
if ($this->isLocation($val))

This comment has been minimized.

Copy link
@rjmackay

rjmackay Apr 18, 2016

Contributor

You should be able to infer this from a the values array format, not specifically check for locations.
ie. the format looks like

[
  'somefield' => [["value"]],
  'somefield2' => [[
   'complex' => 'value'
]],
]

We should really just output any array value as multiple fields..

{
// then create separate lat and lon fields
$record[$key.'.lat'] = $val['lat'];
$record[$key.'.lon'] = $val['lon'];
}

// else assign value as single string or csv string
else {
$record[$key] = $this->valueToString($val);
}
}
}

// If not form values then assign value as single string or CSV string
else
{
$record[$key] = $this->valueToString($val);
}
}

// Pad record with missing column headings as keys
$missing_keys = array_diff($heading, array_keys($record));
$record = array_merge($record, array_fill_keys($missing_keys, null));

// Sort the keys so that they match with columns from the CSV heading
ksort($record);

array_push($csv_records, $record);

This comment has been minimized.

Copy link
@rjmackay

rjmackay Apr 18, 2016

Contributor

This seems like we're likely to hit memory limits pretty fast. Could we do anything to mitigate that? ie. write the CSV directly to output? or into a temp file first? (if the answer is no we can figure this out when it becomes a real problem, not a hypothetical one)

}

return $csv_records;
}

/**
* Extracts column names shared across posts to create a CSV heading
*
* @param array $records
*
* @return array
*/
protected function getCSVHeading($records)
{
$columns = [];

// Collect all column headings
foreach ($records as $record)
{
$record = $record->asArray();

foreach ($record as $key => $val)
{
// Are these form values?
if ($key === 'values')
{
foreach ($val as $key => $val)
{
// Get value from single value array
$val = $val[0];

// Is it a location?
if ($this->isLocation($val))
{
// then create separate lat and lon columns
array_push($columns, $key.'.lat', $key.'.lon');
}

// ...else add it as single column
else
{
array_push($columns, $key);
}
}
}

// ...else add the key as is if not a form value key
else
{
array_push($columns, $key);
}
}
}

// Finally, return a list of unique column names found in all posts
return array_unique($columns);
}

/**
* Converts post values to strings
*
* @param mixed $value
*
* @return string
*/

protected function valueToString($value)

This comment has been minimized.

Copy link
@jasonmule

jasonmule Apr 14, 2016

Author Contributor

Multiple values should probably be mapped to multiple columns with the same name.

This comment has been minimized.

Copy link
@willdoran

willdoran Apr 15, 2016

Contributor

I was looking at csv formats and apparently the following is acceptable:
Title, multivalue, description
"Test", val1 val2 val3, "test desc"
or
Title, multivalue, description
"test","val1,va2,val3", "test desc"

Would that work? I can't find a definite standard format, some other systems use ; as a separator for multivalue

This comment has been minimized.

Copy link
@jasonmule

jasonmule Apr 15, 2016

Author Contributor

It's better than using multiple columns. The second one is what it is used at the moment but was wondering if this could sometimes get confusing if the column is simply a string and not a multi-value column...

This comment has been minimized.

Copy link
@willdoran

willdoran Apr 15, 2016

Contributor

From what I could find the second format is what excel uses - it would be nice if there was a really clear standard.

I think a case where it could be confusing is if a string "test, something that happens" was being imported to a multivalue field but I don't think we can protect against that very much :(

This comment has been minimized.

Copy link
@jasonmule

jasonmule Apr 15, 2016

Author Contributor

Thanks. Let me try to work with that to see how it imports.

This comment has been minimized.

Copy link
@rjmackay

rjmackay Apr 18, 2016

Contributor

I'm curious what other software does (ie. excel, google sheets) .. do they even have multi value fields?
I don't want to break our import behaviour for the majority (ie. single value fields) just to support multiple value fields.
At the least we should use something less common than , like | or ;

I think ideally we could figure out the max values per field and add extra columns to the CSV to account for that.. That has the advantage of importing cleanly into most spreadsheets and being easier to distinguish when we import it back to Ushahidi

This comment has been minimized.

Copy link
@rjmackay

rjmackay via email Apr 18, 2016

Contributor
{
// Convert array to csv string
if (is_array($value)) {
return implode(',', $value);

This comment has been minimized.

Copy link
@rjmackay

rjmackay Apr 18, 2016

Contributor

need to be escaping values if we do this..

}

// or return value as string
return (string) $value;
}

/**
* Checks if value is a locaton
*
* @param mixed $value
*
* @return bool
*/
protected function isLocation($value)
{
return is_array($value) &&
array_key_exists('lon', $value) &&
array_key_exists('lat', $value);
}

/**
* Store search parameters.
*
* @param SearchData $search
* @return $this
*/
public function setSearch(SearchData $search)
{
$this->search = $search;
return $this;
}
}
7 changes: 7 additions & 0 deletions application/classes/Ushahidi/Rest.php
Expand Up @@ -494,6 +494,13 @@ protected function _prepare_response()
{
$format = service("formatter.output.$type");

if ($type === 'csv')
{
// Format for export
$format($this->_response_payload);
return;
}

$body = $format($this->_response_payload);
$mime = $format->getMimeType();

Expand Down
5 changes: 5 additions & 0 deletions application/config/features.php
Expand Up @@ -54,4 +54,9 @@
'data-import' => [
'enabled' => TRUE,
],

// Data import
'data-export' => [
'enabled' => TRUE,
],
];
14 changes: 7 additions & 7 deletions application/routes/default.php
Expand Up @@ -67,15 +67,15 @@
/**
* Export Posts API SubRoute
*/
// Route::set('export', $apiBase . 'posts/export')
// ->defaults(array(
// 'action' => 'index',
// 'controller' => 'Export',
// 'directory' => 'Api/Posts'
// ));
Route::set('export', $apiBase . 'posts/export')
->defaults(array(
'action' => 'index',
'controller' => 'Export',
'directory' => 'Api/Posts'
));

/**
* Export Posts API SubRoute
* Stats Posts API SubRoute
*/
Route::set('post-stats', $apiBase . 'posts/stats')
->defaults(array(
Expand Down
11 changes: 11 additions & 0 deletions application/tests/features/api.posts.feature
Expand Up @@ -668,6 +668,17 @@ Feature: Testing the Posts API
And the "count" property equals "2"
Then the guzzle status code should be 200

# todo: Add steps to test export
@resetFixture @csvexport
Scenario: Search All Posts and export the results
Given that I want to get all "Posts"
And that the request "query string" is:
"""
q=Searching&format=csv
"""
When I request "/posts/export"
Then the guzzle status code should be 200

@resetFixture @search
Scenario: Search All Posts by locale
Given that I want to get all "Posts"
Expand Down

1 comment on commit 34a20e3

@jasonmule
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rjmackay @willdoran Could you take a look when you have some time?

Please sign in to comment.