Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
295 changes: 295 additions & 0 deletions src/Migration/Destinations/CSV.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
<?php

namespace Utopia\Migration\Destinations;

use Utopia\CLI\Console;
use Utopia\Database\Exception\Authorization;
use Utopia\Database\Exception\Conflict;
use Utopia\Database\Exception\Structure;
use Utopia\Migration\Destination;
use Utopia\Migration\Resource;
use Utopia\Migration\Resources\Database\Row;
use Utopia\Migration\Transfer;
use Utopia\Storage\Device;
use Utopia\Storage\Device\Local;

class CSV extends Destination
{
protected Device $deviceForFiles;
protected string $resourceId;
protected string $directory;
protected string $outputFile;
protected Local $local;

protected array $allowedColumns = [];

/**
* @throws Authorization
* @throws Structure
* @throws Conflict
* @throws \Exception
*/
public function __construct(
Device $deviceForFiles,
string $resourceId,
string $directory,
string $filename,
array $allowedColumns = [],
private readonly string $delimiter = ',',
private readonly string $enclosure = '"',
private readonly string $escape = '\\',
private readonly bool $includeHeaders = true,
) {
$this->deviceForFiles = $deviceForFiles;
$this->resourceId = $resourceId;
$this->directory = $directory;
$this->outputFile = $this->sanitizeFilename($filename);
$this->local = new Local(\sys_get_temp_dir() . '/csv_export_' . uniqid());
$this->local->setTransferChunkSize(Transfer::STORAGE_MAX_CHUNK_SIZE);
$this->createDirectory($this->local->getRoot());

foreach ($allowedColumns as $attribute) {
$this->allowedColumns[$attribute] = true;
}
}

public static function getName(): string
{
return 'CSV';
}

public static function getSupportedResources(): array
{
return [
Resource::TYPE_ROW,
];
}

public function report(array $resources = []): array
{
return [];
}

/**
* @param array<Row> $resources
* @throws \JsonException
* @throws \Exception
*/
protected function import(array $resources, callable $callback): void
{
$handle = null; // file handle
$buffer = ['lines' => [], 'size' => 0]; // Buffer for batching writes
$bufferBytes = 1024 * 1024; // 1MB
$log = $this->local->getRoot() . '/' . $this->outputFile . '.csv';

$flushBuffer = function () use ($log, &$handle, &$buffer) {
if (empty($buffer['lines'])) {
return;
}
try {
if (!isset($handle)) {
$handle = \fopen($log, 'a');
if ($handle === false) {
throw new \Exception("Failed to open file for writing: $log");
}
}

foreach ($buffer['lines'] as $line) {
if (\fputcsv($handle, $line, $this->delimiter, $this->enclosure, $this->escape) === false) {
throw new \Exception("Failed to write CSV line to file: $log");
}
}

$buffer = [
'lines' => [],
'size' => 0
];
} catch (\Exception $e) {
// Close handle on error
if (isset($handle)) {
\fclose($handle);
unset($handle);
}
throw $e;
}
};

try {
foreach ($resources as $resource) {
if (!($resource instanceof Row)) {
continue;
}

$csvData = $this->resourceToCSVData($resource);

// Write headers if this is the first row of the file
if (!isset($csvHeader) && $this->includeHeaders) {
$headers = \array_keys($csvData);
$buffer['lines'][] = $headers;
$buffer['size'] += \strlen(\implode($this->delimiter, $headers)) + 2; // Approximate size
$csvHeader = true;
}

$dataValues = \array_values($csvData);
$buffer['lines'][] = $dataValues;
$buffer['size'] += \strlen(\implode($this->delimiter, $dataValues)) + 2; // Approximate size

if ($buffer['size'] >= $bufferBytes) {
$flushBuffer();
}

$resource->setStatus(Resource::STATUS_SUCCESS);
if (isset($this->cache)) {
$this->cache->update($resource);
}
}

// Flush any remaining buffered lines
if (!empty($buffer['lines'])) {
$flushBuffer();
}
} finally {
if (\is_resource($handle)) {
\fclose($handle);
}
}

$callback($resources);
}

/**
* @throws \Exception
*/
public function shutdown(): void
{
$filename = $this->outputFile . '.csv';
$sourcePath = $this->local->getPath($filename);
$destPath = $this->deviceForFiles->getPath($this->directory . '/' . $filename);

// Check if the CSV file was actually created
if (!$this->local->exists($sourcePath)) {
throw new \Exception("No data to export for resource: $this->resourceId");
}

try {
// Transfer expects absolute paths within each device
$result = $this->local->transfer(
$sourcePath,
$destPath,
$this->deviceForFiles
);
if ($result === false) {
throw new \Exception('Error transferring to ' . $this->deviceForFiles->getRoot() . '/' . $filename);
}
if (!$this->deviceForFiles->exists($destPath)) {
throw new \Exception('File not found on destination: ' . $destPath);
}
} finally {
// Clean up the temporary directory
if (!$this->local->deletePath('') || $this->local->exists($this->local->getRoot())) {
Console::error('Error cleaning up: ' . $this->local->getRoot());
}
}
}

/**
* Helper to ensure a directory exists.
* @throws \Exception
*/
protected function createDirectory(string $path): void
{
if (!\file_exists($path)) {
if (!\mkdir($path, 0755, true)) {
throw new \Exception('Error creating directory: ' . $path);
}
}
}

/**
* Sanitize a filename to make it filesystem-safe
*/
protected function sanitizeFilename(string $filename): string
{
// Replace problematic characters with underscores
$sanitized = \preg_replace('/[:\\/<>"|*?]/', '_', $filename);
$sanitized = \preg_replace('/[^\x20-\x7E]/', '_', $sanitized);
$sanitized = \trim($sanitized);
return empty($sanitized) ? 'export' : $sanitized;
}
Comment thread
abnegate marked this conversation as resolved.

/**
* Convert a resource to CSV-compatible data
*/
protected function resourceToCSVData(Row $resource): array
{
$data = [
'$id' => $resource->getId(),
'$permissions' => $resource->getPermissions(),
'$createdAt' => $resource->getCreatedAt(),
'$updatedAt' => $resource->getUpdatedAt(),
];

// Add all attributes if no filter specified, otherwise only allowed ones
if (empty($this->allowedColumns)) {
$data = \array_merge($data, $resource->getData());
} else {
foreach ($resource->getData() as $key => $value) {
if (isset($this->allowedColumns[$key])) {
$data[$key] = $value;
}
}
}

foreach ($data as $key => $value) {
$data[$key] = $this->convertValueToCSV($value);
}

return $data;
}

/**
* Convert a single value to CSV-compatible format
*/
protected function convertValueToCSV(mixed $value): string
{
if (\is_null($value)) {
return 'null';
}
if (\is_bool($value)) {
return $value ? 'true' : 'false';
}
if (\is_array($value)) {
return $this->convertArrayToCSV($value);
}
if (\is_object($value)) {
return $this->convertObjectToCSV($value);
}
return (string)$value;
}

/**
* Convert array to CSV format
*/
protected function convertArrayToCSV(array $value): string
{
if (empty($value)) {
return '';
}
if (isset($value['$id'])) {
return $value['$id'];
}
return \json_encode($value);
}
Comment on lines +273 to +282
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

json_encode() may return false → declared string return type will throw; harden JSON encoding

When arrays/objects contain invalid UTF‑8, json_encode returns false. convertArrayToCSV()/convertObjectToCSV() declare string return types → runtime TypeError.

     protected function convertArrayToCSV(array $value): string
     {
         if (empty($value)) {
             return '';
         }
         if (isset($value['$id'])) {
             return $value['$id'];
         }
-        return \json_encode($value);
+        return $this->safeJsonEncode($value);
     }
@@
     protected function convertObjectToCSV($value): string
     {
         if ($value instanceof Row) {
             return $value->getId();
         }
-        return \json_encode($value);
+        return $this->safeJsonEncode($value);
     }
+
+    /**
+     * Robust JSON encoding for CSV fields.
+     */
+    private function safeJsonEncode(mixed $value): string
+    {
+        $json = \json_encode(
+            $value,
+            JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE
+        );
+        return $json === false ? '' : $json;
+    }

Please confirm the minimum supported PHP version includes JSON_INVALID_UTF8_SUBSTITUTE (PHP ≥7.2). If not, I can fall back to a different strategy.

Also applies to: 283-289


🏁 Script executed:

#!/bin/bash
set -euo pipefail
echo "PWD: $(pwd)"
echo
# Find composer.json files
composer_files=$(fd -t f -H composer.json || true)
if [ -z "$composer_files" ]; then
  echo "NO_COMPOSER_JSON"
  exit 0
fi
echo "FOUND_COMPOSER_JSON"
echo "$composer_files"
first=$(echo "$composer_files" | head -n1)
echo "---- BEGIN $first ----"
sed -n '1,240p' "$first" || true
echo "---- END $first ----"
echo
# Try jq extraction
if command -v jq >/dev/null 2>&1; then
  echo "require.php: $(jq -r '.require.php // empty' "$first")"
  echo "config.platform.php: $(jq -r '.config.platform.php // empty' "$first")"
  echo "require-dev php: $(jq -r '.["require-dev"].php // empty' "$first")"
else
  echo "jq not available; grepping for php requirement lines"
  rg -n --hidden --no-ignore '"php"\s*:\s*".+"' "$first" || true
  rg -n --hidden --no-ignore '"platform"\s*:\s*{\s*' "$first" || true
fi

# Search CI for php-version or setup-php
echo
echo "CI files mentioning php version (searching .github and common CI files):"
rg -n --hidden --no-ignore -S 'php-version|setup-php|matrix:.*php|php:' .github || true
rg -n --hidden --no-ignore -S 'php:' Dockerfile* || true
rg -n --hidden --no-ignore -S 'composer config platform.php' || true

# Search repo for explicit minimum php mentions
echo
echo "Search README or docs for 'PHP' version mentions:"
rg -n --hidden --no-ignore -S 'PHP [0-9]\.[0-9]' README* || true

Length of output: 1855


Harden JSON encoding — avoid json_encode returning false → TypeError

Confirmed: composer.json requires PHP >=8.1 (platform "php": "8.1"), so JSON_INVALID_UTF8_SUBSTITUTE is available. Replace json_encode(...) with a robust helper and add the helper below. Applies to src/Migration/Destinations/CSV.php — convertArrayToCSV (≈269–278) and convertObjectToCSV (≈283–289).

     protected function convertArrayToCSV(array $value): string
     {
         if (empty($value)) {
             return '';
         }
         if (isset($value['$id'])) {
             return $value['$id'];
         }
-        return \json_encode($value);
+        return $this->safeJsonEncode($value);
     }
@@
     protected function convertObjectToCSV($value): string
     {
         if ($value instanceof Row) {
             return $value->getId();
         }
-        return \json_encode($value);
+        return $this->safeJsonEncode($value);
     }
+
+    /**
+     * Robust JSON encoding for CSV fields.
+     */
+    private function safeJsonEncode(mixed $value): string
+    {
+        $json = \json_encode(
+            $value,
+            JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE
+        );
+        return $json === false ? '' : $json;
+    }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
protected function convertArrayToCSV(array $value): string
{
if (empty($value)) {
return '';
}
if (isset($value['$id'])) {
return $value['$id'];
}
return \json_encode($value);
}
protected function convertArrayToCSV(array $value): string
{
if (empty($value)) {
return '';
}
if (isset($value['$id'])) {
return $value['$id'];
}
return $this->safeJsonEncode($value);
}
protected function convertObjectToCSV($value): string
{
if ($value instanceof Row) {
return $value->getId();
}
return $this->safeJsonEncode($value);
}
/**
* Robust JSON encoding for CSV fields.
*/
private function safeJsonEncode(mixed $value): string
{
$json = \json_encode(
$value,
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE
);
return $json === false ? '' : $json;
}
🤖 Prompt for AI Agents
In src/Migration/Destinations/CSV.php around lines 269–278 (and similarly for
convertObjectToCSV around 283–289), replace direct json_encode(...) calls with a
safeJsonEncode helper to avoid json_encode returning false and causing a
TypeError; implement safeJsonEncode(mixed $value): string below the class
methods that calls json_encode($value,
JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE|JSON_INVALID_UTF8_SUBSTITUTE)
inside a try/catch (or checks json_last_error()), and if encoding fails return
an empty string (or a safe fallback) instead of allowing false to propagate;
update convertArrayToCSV and convertObjectToCSV to call safeJsonEncode($value)
where json_encode was used.


/**
* Convert object to CSV format
*/
protected function convertObjectToCSV($value): string
{
if ($value instanceof Row) {
return $value->getId();
}
return \json_encode($value);
}

}
4 changes: 2 additions & 2 deletions src/Migration/Resources/Database/Row.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ class Row extends Resource
* @param array<string> $permissions
*/
public function __construct(
string $id,
string $id,
private readonly Table $table,
private readonly array $data = [],
array $permissions = []
array $permissions = []
) {
$this->id = $id;
$this->permissions = $permissions;
Expand Down
45 changes: 37 additions & 8 deletions src/Migration/Sources/Appwrite.php
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,17 @@ private function exportDatabases(int $batchSize): void
$queries = [$this->database->queryLimit($batchSize)];

if ($this->rootResourceId !== '' && $this->rootResourceType === Resource::TYPE_DATABASE) {
$queries[] = $this->database->queryEqual('$id', [$this->rootResourceId]);
$targetDatabaseId = $this->rootResourceId;

// Handle database:collection format - extract database ID
if (\str_contains($this->rootResourceId, ':')) {
$parts = \explode(':', $this->rootResourceId, 2);
if (\count($parts) === 2) {
$targetDatabaseId = $parts[0];
}
}

$queries[] = $this->database->queryEqual('$id', [$targetDatabaseId]);
$queries[] = $this->database->queryLimit(1);
}

Expand Down Expand Up @@ -738,11 +748,11 @@ private function exportDatabases(int $batchSize): void
break;
}

$lastDatabase = $databases[count($databases) - 1];
$lastDatabase = $databases[\count($databases) - 1];

$this->callback($databases);

if (count($databases) < $batchSize) {
if (\count($databases) < $batchSize) {
break;
}
}
Expand All @@ -757,14 +767,33 @@ private function exportTables(int $batchSize): void
$databases = $this->cache->get(Database::getName());

foreach ($databases as $database) {
/** @var Database $database */
$lastTable = null;

/** @var Database $database */
while (true) {
$queries = [$this->database->queryLimit($batchSize)];
$tables = [];

if ($lastTable) {
// Filter to specific table if rootResourceType is database with database:collection format
if (
$this->rootResourceId !== '' &&
$this->rootResourceType === Resource::TYPE_DATABASE &&
\str_contains($this->rootResourceId, ':')
) {
$parts = \explode(':', $this->rootResourceId, 2);
if (\count($parts) === 2) {
$targetTableId = $parts[1]; // table ID
$queries[] = $this->database->queryEqual('$id', [$targetTableId]);
$queries[] = $this->database->queryLimit(1);
}
} elseif (
$this->rootResourceId !== '' &&
$this->rootResourceType === Resource::TYPE_TABLE
) {
$targetTableId = $this->rootResourceId;
$queries[] = $this->database->queryEqual('$id', [$targetTableId]);
$queries[] = $this->database->queryLimit(1);
} elseif ($lastTable) {
$queries[] = $this->database->queryCursorAfter($lastTable);
}

Expand All @@ -790,9 +819,9 @@ private function exportTables(int $batchSize): void

$this->callback($tables);

$lastTable = $tables[count($tables) - 1];
$lastTable = $tables[\count($tables) - 1];

if (count($tables) < $batchSize) {
if (\count($tables) < $batchSize) {
break;
}
}
Expand All @@ -807,7 +836,7 @@ private function exportColumns(int $batchSize): void
{
$tables = $this->cache->get(Table::getName());

/** @var Table[] $tables */
/** @var array<Table> $tables */
foreach ($tables as $table) {
$lastColumn = null;

Expand Down
Loading