Page MenuHomePhorge

D5142.1775226247.diff
No OneTemporary

Authored By
Unknown
Size
9 KB
Referenced Files
None
Subscribers
None

D5142.1775226247.diff

diff --git a/src/app/DataMigrator/Driver/Takeout.php b/src/app/DataMigrator/Driver/Takeout.php
--- a/src/app/DataMigrator/Driver/Takeout.php
+++ b/src/app/DataMigrator/Driver/Takeout.php
@@ -131,6 +131,11 @@
$headline = '';
while (($line = fgets($fp)) !== false) {
+ // make sure we use correct end-line sequence
+ if (substr($line, -2) != "\r\n") {
+ $line = substr($line, 0, -1) . "\r\n";
+ }
+
if (str_starts_with($line, 'From ') && preg_match('/^From [^\s]+ [a-zA-Z]{3} [a-zA-Z]{3}/', $line)) {
$this->mailItemHandler($folder, $headline, $msg, $existing, $callback);
$msg = '';
@@ -155,52 +160,67 @@
}
// Read iCalendar file line by line
- // Note: We assume that event exceptions are always in order after the master event
+ // We can't do a sinle pass pass over the stream because events and event exceptions can be
+ // spread across the whole file not necessarily one after another.
$fp = fopen("{$this->location}/Calendar/{$foldername}.ics", 'r');
$event = '';
- $previous = '';
$head = '';
$got_head = false;
+ $events = [];
+ $pos = 0;
+ $start = null;
+
+ $add_vevent_block = function ($start_pos, $end_pos) use (&$event, &$events) {
+ // Get the UID which will be the array key
+ if (preg_match('/\nUID:(.[^\r\n]+(\r\n[\s\t][^\r\n]+)*)/', $event, $matches)) {
+ $uid = str_replace(["\r\n ", "\r\n "], '', $matches[1]);
+ // Remember position in the stream, we don't want to copy the whole content into memory
+ $chunk = $start_pos . ':' . $end_pos;
+ $events[$uid] = isset($events[$uid]) ? array_merge($events[$uid], [$chunk]) : [$chunk];
+ }
+
+ $event = '';
+ };
while (($line = fgets($fp)) !== false) {
+ $pos += strlen($line);
+
if (str_starts_with($line, 'BEGIN:VEVENT')) {
$got_head = true;
- if (strlen($event)) {
- if (strpos($event, "\nRECURRENCE-ID")) {
- $previous .= $event;
- } else {
- if (strlen($previous)) {
- $_event = $head . $previous . "END:VCALENDAR\r\n";
- $this->eventItemHandler($folder, $_event, $existing, $callback);
- }
- $previous = $event;
- }
- $event = '';
+ if ($start) {
+ $add_vevent_block($start, $pos - strlen($line));
}
+
+ $start = $pos - strlen($line);
} elseif (!$got_head) {
$head .= $line;
continue;
}
- // TODO: Probably stream_get_contents() once per event would be faster than concatenating lines
- if (!str_starts_with($line, 'END:VCALENDAR')) {
- $event .= $line;
+ if (str_starts_with($line, 'END:VCALENDAR')) {
+ $pos -= strlen($line);
+ break;
}
+
+ $event .= $line;
}
- fclose($fp);
+ if ($start) {
+ $add_vevent_block($start, $pos);
+ }
- if (strlen($event)) {
- if (strpos($event, "\nRECURRENCE-ID")) {
- $previous .= $event;
- } else {
- $this->eventItemHandler($folder, $head . $event . "END:VCALENDAR\r\n", $existing, $callback);
+ // Handle the events one by one (joining multiple VEVENT blocks for the same event)
+ foreach ($events as $chunks) {
+ $event = '';
+ foreach ($chunks as $pos) {
+ [$start, $end] = explode(':', $pos);
+ $event .= stream_get_contents($fp, intval($end) - intval($start), intval($start));
}
- }
- if (strlen($previous)) {
- $this->eventItemHandler($folder, $head . $previous . "END:VCALENDAR\r\n", $existing, $callback);
+
+ $this->eventItemHandler($folder, $head . $event . "END:VCALENDAR\r\n", $existing, $callback);
}
+ fclose($fp);
return;
}
diff --git a/src/tests/Unit/DataMigrator/Driver/TakeoutTest.php b/src/tests/Unit/DataMigrator/Driver/TakeoutTest.php
new file mode 100644
--- /dev/null
+++ b/src/tests/Unit/DataMigrator/Driver/TakeoutTest.php
@@ -0,0 +1,101 @@
+<?php
+
+namespace Tests\Unit\DataMigrator\Driver;
+
+use App\DataMigrator\Account;
+use App\DataMigrator\Driver\Takeout;
+use App\DataMigrator\Driver\Test;
+use App\DataMigrator\Engine;
+use App\DataMigrator\Interface\Folder;
+use Tests\TestCase;
+
+class TakeoutTest extends TestCase
+{
+ /**
+ * {@inheritDoc}
+ */
+ public function tearDown(): void
+ {
+ exec('rm -rf ' . storage_path('export/unit@gmail.com'));
+
+ parent::tearDown();
+ }
+
+ /**
+ * Test processing content of an mbox file from Takeout archive
+ */
+ public function testMboxParsing(): void
+ {
+ $folder = Folder::fromArray(['fullname' => 'Trash', 'type' => Engine::TYPE_MAIL]);
+ [$takeout, $importer] = $this->init();
+
+ $result = [];
+ $callback = function ($item) use (&$result) {
+ // Note: Small items don't use temp files, so we can just read the content
+ // Remove line-wrapping for easier testing
+ $result[$item->id] = $item->content;
+ };
+
+ // Parse "All mail Including Spam and Trash.mbox" file from tests/data/takeout-unit.zip
+ $takeout->fetchItemList($folder, $callback, $importer);
+
+ $this->assertCount(1, $result);
+ $this->assertSame(0, preg_match('/[^\r]\n/', $result['<2@google.com>']));
+ $this->assertTrue(str_starts_with($result['<2@google.com>'], 'X-GM-THRID:'));
+ $this->assertTrue(str_ends_with($result['<2@google.com>'], "Message 2\r\n\r\n"));
+ }
+
+ /**
+ * Test processing content of an ics file from Takeout archive
+ */
+ public function testVCalendarParsing(): void
+ {
+ $folder = Folder::fromArray(['fullname' => 'Test', 'type' => Engine::TYPE_EVENT]);
+ [$takeout, $importer] = $this->init();
+
+ $result = [];
+ $callback = function ($item) use (&$result) {
+ // Note: Small items don't use temp files, so we can just read the content
+ // Remove line-wrapping for easier testing
+ $content = str_replace(["\r\n ", "\r\n "], '', $item->content);
+ $result[preg_replace('/\.ics$/', '', $item->filename)] = $content;
+ };
+
+ // Parse Test.ics file from tests/data/takeout-unit.zip
+ $takeout->fetchItemList($folder, $callback, $importer);
+
+ $this->assertCount(4, $result);
+
+ foreach (['1111', '2222', '3333'] as $uid) {
+ $this->assertStringContainsString("UID:{$uid}", $result[$uid], "UID:{$uid}"); // @phpstan-ignore-line
+ $this->assertSame(1, preg_match_all('/BEGIN:VEVENT/', $result[$uid]), "UID:{$uid}"); // @phpstan-ignore-line
+ $this->assertTrue(str_starts_with($result[$uid], 'BEGIN:VCALENDAR'), "UID:{$uid}"); // @phpstan-ignore-line
+ $this->assertTrue(str_ends_with($result[$uid], "END:VCALENDAR\r\n"), "UID:{$uid}"); // @phpstan-ignore-line
+ }
+
+ $this->assertSame(3, preg_match_all('/UID:recur/', $result['recur']));
+ $this->assertSame(3, preg_match_all('/BEGIN:VEVENT/', $result['recur']));
+ $this->assertStringContainsString('RECURRENCE-ID;VALUE=DATE:20250410', $result['recur']);
+ $this->assertStringContainsString('RECURRENCE-ID;VALUE=DATE:20250510', $result['recur']);
+ $this->assertTrue(str_starts_with($result['recur'], 'BEGIN:VCALENDAR'));
+ $this->assertTrue(str_ends_with($result['recur'], "END:VCALENDAR\r\n"));
+
+ // TODO: We could also use App\Backends\DAV\Vevent to parse the output and do more assertions
+ }
+
+ /**
+ * Init common objects for tests
+ */
+ private function init()
+ {
+ $source = new Account('takeout://' . self::BASE_DIR . '/data/takeout-unit.zip?user=unit@gmail.com');
+ $destination = new Account('test://test%40kolab.org:test@test');
+ $folder = Folder::fromArray(['fullname' => 'Trash', 'type' => Engine::TYPE_MAIL]);
+ $engine = new Engine();
+
+ return [
+ new Takeout($source, $engine),
+ new Test($destination, $engine),
+ ];
+ }
+}
diff --git a/src/tests/data/takeout-unit.zip b/src/tests/data/takeout-unit.zip
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001

File Metadata

Mime Type
text/plain
Expires
Fri, Apr 3, 2:24 PM (6 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
18824305
Default Alt Text
D5142.1775226247.diff (9 KB)

Event Timeline