Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F117764449
D5142.1775226247.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Authored By
Unknown
Size
9 KB
Referenced Files
None
Subscribers
None
D5142.1775226247.diff
View Options
diff --git a/src/app/DataMigrator/Driver/Takeout.php b/src/app/DataMigrator/Driver/Takeout.php
--- a/src/app/DataMigrator/Driver/Takeout.php
+++ b/src/app/DataMigrator/Driver/Takeout.php
@@ -131,6 +131,11 @@
$headline = '';
while (($line = fgets($fp)) !== false) {
+ // make sure we use correct end-line sequence
+ if (substr($line, -2) != "\r\n") {
+ $line = substr($line, 0, -1) . "\r\n";
+ }
+
if (str_starts_with($line, 'From ') && preg_match('/^From [^\s]+ [a-zA-Z]{3} [a-zA-Z]{3}/', $line)) {
$this->mailItemHandler($folder, $headline, $msg, $existing, $callback);
$msg = '';
@@ -155,52 +160,67 @@
}
// Read iCalendar file line by line
- // Note: We assume that event exceptions are always in order after the master event
+ // We can't do a sinle pass pass over the stream because events and event exceptions can be
+ // spread across the whole file not necessarily one after another.
$fp = fopen("{$this->location}/Calendar/{$foldername}.ics", 'r');
$event = '';
- $previous = '';
$head = '';
$got_head = false;
+ $events = [];
+ $pos = 0;
+ $start = null;
+
+ $add_vevent_block = function ($start_pos, $end_pos) use (&$event, &$events) {
+ // Get the UID which will be the array key
+ if (preg_match('/\nUID:(.[^\r\n]+(\r\n[\s\t][^\r\n]+)*)/', $event, $matches)) {
+ $uid = str_replace(["\r\n ", "\r\n "], '', $matches[1]);
+ // Remember position in the stream, we don't want to copy the whole content into memory
+ $chunk = $start_pos . ':' . $end_pos;
+ $events[$uid] = isset($events[$uid]) ? array_merge($events[$uid], [$chunk]) : [$chunk];
+ }
+
+ $event = '';
+ };
while (($line = fgets($fp)) !== false) {
+ $pos += strlen($line);
+
if (str_starts_with($line, 'BEGIN:VEVENT')) {
$got_head = true;
- if (strlen($event)) {
- if (strpos($event, "\nRECURRENCE-ID")) {
- $previous .= $event;
- } else {
- if (strlen($previous)) {
- $_event = $head . $previous . "END:VCALENDAR\r\n";
- $this->eventItemHandler($folder, $_event, $existing, $callback);
- }
- $previous = $event;
- }
- $event = '';
+ if ($start) {
+ $add_vevent_block($start, $pos - strlen($line));
}
+
+ $start = $pos - strlen($line);
} elseif (!$got_head) {
$head .= $line;
continue;
}
- // TODO: Probably stream_get_contents() once per event would be faster than concatenating lines
- if (!str_starts_with($line, 'END:VCALENDAR')) {
- $event .= $line;
+ if (str_starts_with($line, 'END:VCALENDAR')) {
+ $pos -= strlen($line);
+ break;
}
+
+ $event .= $line;
}
- fclose($fp);
+ if ($start) {
+ $add_vevent_block($start, $pos);
+ }
- if (strlen($event)) {
- if (strpos($event, "\nRECURRENCE-ID")) {
- $previous .= $event;
- } else {
- $this->eventItemHandler($folder, $head . $event . "END:VCALENDAR\r\n", $existing, $callback);
+ // Handle the events one by one (joining multiple VEVENT blocks for the same event)
+ foreach ($events as $chunks) {
+ $event = '';
+ foreach ($chunks as $pos) {
+ [$start, $end] = explode(':', $pos);
+ $event .= stream_get_contents($fp, intval($end) - intval($start), intval($start));
}
- }
- if (strlen($previous)) {
- $this->eventItemHandler($folder, $head . $previous . "END:VCALENDAR\r\n", $existing, $callback);
+
+ $this->eventItemHandler($folder, $head . $event . "END:VCALENDAR\r\n", $existing, $callback);
}
+ fclose($fp);
return;
}
diff --git a/src/tests/Unit/DataMigrator/Driver/TakeoutTest.php b/src/tests/Unit/DataMigrator/Driver/TakeoutTest.php
new file mode 100644
--- /dev/null
+++ b/src/tests/Unit/DataMigrator/Driver/TakeoutTest.php
@@ -0,0 +1,101 @@
+<?php
+
+namespace Tests\Unit\DataMigrator\Driver;
+
+use App\DataMigrator\Account;
+use App\DataMigrator\Driver\Takeout;
+use App\DataMigrator\Driver\Test;
+use App\DataMigrator\Engine;
+use App\DataMigrator\Interface\Folder;
+use Tests\TestCase;
+
+class TakeoutTest extends TestCase
+{
+ /**
+ * {@inheritDoc}
+ */
+ public function tearDown(): void
+ {
+ exec('rm -rf ' . storage_path('export/unit@gmail.com'));
+
+ parent::tearDown();
+ }
+
+ /**
+ * Test processing content of an mbox file from Takeout archive
+ */
+ public function testMboxParsing(): void
+ {
+ $folder = Folder::fromArray(['fullname' => 'Trash', 'type' => Engine::TYPE_MAIL]);
+ [$takeout, $importer] = $this->init();
+
+ $result = [];
+ $callback = function ($item) use (&$result) {
+ // Note: Small items don't use temp files, so we can just read the content
+ // Remove line-wrapping for easier testing
+ $result[$item->id] = $item->content;
+ };
+
+ // Parse "All mail Including Spam and Trash.mbox" file from tests/data/takeout-unit.zip
+ $takeout->fetchItemList($folder, $callback, $importer);
+
+ $this->assertCount(1, $result);
+ $this->assertSame(0, preg_match('/[^\r]\n/', $result['<2@google.com>']));
+ $this->assertTrue(str_starts_with($result['<2@google.com>'], 'X-GM-THRID:'));
+ $this->assertTrue(str_ends_with($result['<2@google.com>'], "Message 2\r\n\r\n"));
+ }
+
+ /**
+ * Test processing content of an ics file from Takeout archive
+ */
+ public function testVCalendarParsing(): void
+ {
+ $folder = Folder::fromArray(['fullname' => 'Test', 'type' => Engine::TYPE_EVENT]);
+ [$takeout, $importer] = $this->init();
+
+ $result = [];
+ $callback = function ($item) use (&$result) {
+ // Note: Small items don't use temp files, so we can just read the content
+ // Remove line-wrapping for easier testing
+ $content = str_replace(["\r\n ", "\r\n "], '', $item->content);
+ $result[preg_replace('/\.ics$/', '', $item->filename)] = $content;
+ };
+
+ // Parse Test.ics file from tests/data/takeout-unit.zip
+ $takeout->fetchItemList($folder, $callback, $importer);
+
+ $this->assertCount(4, $result);
+
+ foreach (['1111', '2222', '3333'] as $uid) {
+ $this->assertStringContainsString("UID:{$uid}", $result[$uid], "UID:{$uid}"); // @phpstan-ignore-line
+ $this->assertSame(1, preg_match_all('/BEGIN:VEVENT/', $result[$uid]), "UID:{$uid}"); // @phpstan-ignore-line
+ $this->assertTrue(str_starts_with($result[$uid], 'BEGIN:VCALENDAR'), "UID:{$uid}"); // @phpstan-ignore-line
+ $this->assertTrue(str_ends_with($result[$uid], "END:VCALENDAR\r\n"), "UID:{$uid}"); // @phpstan-ignore-line
+ }
+
+ $this->assertSame(3, preg_match_all('/UID:recur/', $result['recur']));
+ $this->assertSame(3, preg_match_all('/BEGIN:VEVENT/', $result['recur']));
+ $this->assertStringContainsString('RECURRENCE-ID;VALUE=DATE:20250410', $result['recur']);
+ $this->assertStringContainsString('RECURRENCE-ID;VALUE=DATE:20250510', $result['recur']);
+ $this->assertTrue(str_starts_with($result['recur'], 'BEGIN:VCALENDAR'));
+ $this->assertTrue(str_ends_with($result['recur'], "END:VCALENDAR\r\n"));
+
+ // TODO: We could also use App\Backends\DAV\Vevent to parse the output and do more assertions
+ }
+
+ /**
+ * Init common objects for tests
+ */
+ private function init()
+ {
+ $source = new Account('takeout://' . self::BASE_DIR . '/data/takeout-unit.zip?user=unit@gmail.com');
+ $destination = new Account('test://test%40kolab.org:test@test');
+ $folder = Folder::fromArray(['fullname' => 'Trash', 'type' => Engine::TYPE_MAIL]);
+ $engine = new Engine();
+
+ return [
+ new Takeout($source, $engine),
+ new Test($destination, $engine),
+ ];
+ }
+}
diff --git a/src/tests/data/takeout-unit.zip b/src/tests/data/takeout-unit.zip
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Apr 3, 2:24 PM (6 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
18824305
Default Alt Text
D5142.1775226247.diff (9 KB)
Attached To
Mode
D5142: DataMigrator: Fix handling of "spread event blocks" in the Takeout's ics parser
Attached
Detach File
Event Timeline