diff --git a/src/app/Console/Commands/Data/MigrateCommand.php b/src/app/Console/Commands/Data/MigrateCommand.php --- a/src/app/Console/Commands/Data/MigrateCommand.php +++ b/src/app/Console/Commands/Data/MigrateCommand.php @@ -28,6 +28,7 @@ {src : Source account} {dst : Destination account} {--type= : Object type(s)} + {--sync= : Execute migration synchronously} {--force : Force existing queue removal}'; // {--export-only : Only export data} // {--import-only : Only import previously exported data}'; @@ -51,6 +52,7 @@ $options = [ 'type' => $this->option('type'), 'force' => $this->option('force'), + 'sync' => $this->option('sync'), 'stdout' => true, ]; diff --git a/src/app/DataMigrator/Account.php b/src/app/DataMigrator/Account.php --- a/src/app/DataMigrator/Account.php +++ b/src/app/DataMigrator/Account.php @@ -69,9 +69,13 @@ $this->scheme = strtolower($url['scheme']); } + if (isset($url['port'])) { + $this->port = $url['port']; + } + if (isset($url['host'])) { $this->host = $url['host']; - $this->uri = $this->scheme . '://' . $url['host'] . ($url['path'] ?? ''); + $this->uri = $this->scheme . '://' . $url['host'] . ($url['path'] ?? '') . ($url['port'] ? ":" . $url['port'] : null); } if (!empty($url['query'])) { diff --git a/src/app/DataMigrator/Engine.php b/src/app/DataMigrator/Engine.php --- a/src/app/DataMigrator/Engine.php +++ b/src/app/DataMigrator/Engine.php @@ -98,6 +98,7 @@ $folders = $this->exporter->getFolders($types); $count = 0; + $async = !empty($options['sync']); foreach ($folders as $folder) { $this->debug("Processing folder {$folder->fullname}..."); @@ -105,12 +106,18 @@ $folder->queueId = $queue_id; $folder->location = $location; - // Dispatch the job (for async execution) - Jobs\FolderJob::dispatch($folder); - $count++; + if ($async) { + // Dispatch the job (for async execution) + Jobs\FolderJob::dispatch($folder); + $count++; + } else { + $this->processFolder($folder); + } } - $this->queue->bumpJobsStarted($count); + if ($count) { + $this->queue->bumpJobsStarted($count); + } $this->debug(sprintf('Done. %d %s created in queue: %s.', $count, Str::plural('job', $count), $queue_id)); } @@ -126,20 +133,25 @@ // Create the folder on the destination server $this->importer->createFolder($folder); - $count = 0; + // Let the exporter drive the folder migration if it can + if (method_exists($this->exporter, "processFolder")) { + $this->exporter->processFolder($folder, $this->importer); + } else { + $count = 0; + + // Fetch items from the source + $this->exporter->fetchItemList( + $folder, + function (Item $item) use (&$count) { + // Dispatch the job (for async execution) + Jobs\ItemJob::dispatch($item); + $count++; + } + ); - // Fetch items from the source - $this->exporter->fetchItemList( - $folder, - function (Item $item) use (&$count) { - // Dispatch the job (for async execution) - Jobs\ItemJob::dispatch($item); - $count++; + if ($count) { + $this->queue->bumpJobsStarted($count); } - ); - - if ($count) { - $this->queue->bumpJobsStarted($count); } $this->queue->bumpJobsFinished(); @@ -244,6 +256,10 @@ $driver = new EWS($account, $this); break; + case 'kolab': + $driver = new Kolab($account, $this); + break; + case 'dav': case 'davs': $driver = new DAV($account, $this); diff --git a/src/app/DataMigrator/Kolab.php b/src/app/DataMigrator/Kolab.php new file mode 100644 --- /dev/null +++ b/src/app/DataMigrator/Kolab.php @@ -0,0 +1,226 @@ +account = $account; + $this->engine = $engine; + + $config = self::getConfig($account->username, $account->password, $account->uri); + $this->imap = self::initIMAP($config); + } + + /** + * Create a folder. + * + * @param Folder $folder Folder data + * + * @throws \Exception on error + */ + public function createFolder(Folder $folder): void + { + if ($folder->type == "mail") { + if (!$this->imap->createFolder($folder->fullname)) { + // This may fail if the folder already exists + // throw new \Exception("Failed to create an IMAP folder {$folder->fullname}"); + } + } else { + //TODO create via dav + } + } + + private static function initIMAP(array $config, string $login_as = null) + { + $imap = new \rcube_imap_generic(); + + if (\config('app.debug')) { + $imap->setDebug(true, 'App\Backends\IMAP::logDebug'); + } + + if ($login_as) { + $config['options']['auth_cid'] = $config['user']; + $config['options']['auth_pw'] = $config['password']; + $config['options']['auth_type'] = 'PLAIN'; + $config['user'] = $login_as; + } + + $imap->connect($config['host'], $config['user'], $config['password'], $config['options']); + + if (!$imap->connected()) { + $message = sprintf("Login failed for %s against %s. %s", $config['user'], $config['host'], $imap->error); + + \Log::error($message); + + throw new \Exception("Connection to IMAP failed"); + } + + return $imap; + } + + /** + * Get LDAP configuration for specified access level + */ + private static function getConfig($user, $password, $uri) + { + $uri = \parse_url($uri); + $default_port = 143; + $ssl_mode = null; + + if (isset($uri['scheme'])) { + if (preg_match('/^(ssl|imaps)/', $uri['scheme'])) { + $default_port = 993; + $ssl_mode = 'ssl'; + } elseif ($uri['scheme'] === 'tls') { + $ssl_mode = 'tls'; + } + } + + $config = [ + 'host' => $uri['host'], + 'user' => $user, + 'password' => $password, + 'options' => [ + 'port' => !empty($uri['port']) ? $uri['port'] : $default_port, + 'ssl_mode' => $ssl_mode, + 'socket_options' => [ + 'ssl' => [ + 'verify_peer' => \config('imap.verify_peer'), + 'verify_peer_name' => \config('imap.verify_peer'), + 'verify_host' => \config('imap.verify_host') + ], + ], + ], + ]; + + return $config; + } + + /** + * Authenticate + */ + public function authenticate() + { + } + + /** + * Get folders hierarchy + */ + public function getFolders($types = []): array + { + $folders = $this->imap->listMailboxes('', ""); + \Log::debug("folders:" . var_export($folders, true)); + + $result = []; + foreach ($folders as $folder) { + //TODO migrate metadata? + //TODO skip groupware folders? + $result[$folder] = Folder::fromArray([ + 'fullname' => $folder, + 'type' => "mail" + ]); + } + + //TODO list dav folders + + return $result; + } + + public function fetchItemList(Folder $folder, $callback): void + { + throw new \Exception("Not implemented"); + } + + public function fetchItem(Item $item): string + { + throw new \Exception("Not implemented"); + } + + public function createItemFromFile(string $filename, Folder $folder): void + { + throw new \Exception("Not implemented"); + } + + public function processFolder(Folder $folder, $importer): void + { + // Job processing - initialize environment + $this->initEnv($this->engine->queue); + + $mailbox = $folder->fullname; + + $this->imap->select($mailbox); + $importer->imap->select($mailbox); + + $sourceHeaders = $this->imap->fetchHeaders($mailbox, "1:*", true, false, ['Message-Id']); + if (empty($sourceHeaders)) { + \Log::debug("nothing to migrate for {$mailbox}"); + return; + } + $targetHeaders = $importer->imap->fetchHeaders($mailbox, "1:*", true, false, ['Message-Id']); + $targetMessageIds = array_map(function ($headers) { + return $headers->messageID; + }, $targetHeaders); + + $errorCount = 0; + foreach ($sourceHeaders as $message) { + if ($message->messageID && in_array($message->messageID, $targetMessageIds)) { + \Log::debug("skipping message based on message-id header: {$message->uid} {$message->messageID}"); + } else { + //TODO do some chunking instead of each message individually + $sourceMessages = $this->imap->fetch($mailbox, $message->uid, true, ["RFC822", "FLAGS"]); + foreach ($sourceMessages as $sourceMessage) { + if (!$importer->imap->append($mailbox, $sourceMessage->body, array_keys($sourceMessage->flags), $sourceMessage->internaldate)) { + $errorCount++; + \Log::debug("Failed to append message: {$message->uid}"); + } else { + \Log::debug("Appended message: {$message->uid}"); + } + } + } + } + if ($errorCount) { + \Log::debug("Encountered {$errorCount} errors on folder {$mailbox}"); + } + } + + /** + * Initialize environment for job execution + * + * @param Queue $queue Queue + */ + protected function initEnv(Queue $queue): void + { + // $ews = $queue->data['options']['ews']; + // $this->source = new Account($this->queue->data['source']); + // $this->destination = new Account($this->queue->data['destination']); + // $this->options = $this->queue->data['options']; + // $this->importer = new DAVClient($this->destination); + } + +} diff --git a/src/include/rcube_charset.php b/src/include/rcube_charset.php new file mode 100644 --- /dev/null +++ b/src/include/rcube_charset.php @@ -0,0 +1,570 @@ + | + | | + | Licensed under the GNU General Public License version 3 or | + | any later version with exceptions for skins & plugins. | + | See the README file for a full license statement. | + | | + | PURPOSE: | + | Provide charset conversion functionality | + +-----------------------------------------------------------------------+ + | Author: Thomas Bruederli | + | Author: Aleksander Machniak | + | Author: Edmund Grimley Evans | + +-----------------------------------------------------------------------+ +*/ + +/** + * Character sets conversion functionality + * + * @package Framework + * @subpackage Core + */ +class rcube_charset +{ + /** + * Character set aliases (some of them from HTML5 spec.) + * + * @var array + */ + static public $aliases = [ + 'USASCII' => 'WINDOWS-1252', + 'ANSIX31101983' => 'WINDOWS-1252', + 'ANSIX341968' => 'WINDOWS-1252', + 'UNKNOWN8BIT' => 'ISO-8859-15', + 'UNKNOWN' => 'ISO-8859-15', + 'USERDEFINED' => 'ISO-8859-15', + 'KSC56011987' => 'EUC-KR', + 'GB2312' => 'GBK', + 'GB231280' => 'GBK', + 'UNICODE' => 'UTF-8', + 'UTF7IMAP' => 'UTF7-IMAP', + 'TIS620' => 'WINDOWS-874', + 'ISO88599' => 'WINDOWS-1254', + 'ISO885911' => 'WINDOWS-874', + 'MACROMAN' => 'MACINTOSH', + '77' => 'MAC', + '128' => 'SHIFT-JIS', + '129' => 'CP949', + '130' => 'CP1361', + '134' => 'GBK', + '136' => 'BIG5', + '161' => 'WINDOWS-1253', + '162' => 'WINDOWS-1254', + '163' => 'WINDOWS-1258', + '177' => 'WINDOWS-1255', + '178' => 'WINDOWS-1256', + '186' => 'WINDOWS-1257', + '204' => 'WINDOWS-1251', + '222' => 'WINDOWS-874', + '238' => 'WINDOWS-1250', + 'MS950' => 'CP950', + 'WINDOWS949' => 'UHC', + 'WINDOWS1257' => 'ISO-8859-13', + 'ISO2022JP' => 'ISO-2022-JP-MS', + ]; + + /** + * Windows codepages + * + * @var array + */ + static public $windows_codepages = [ + 37 => 'IBM037', // IBM EBCDIC US-Canada + 437 => 'IBM437', // OEM United States + 500 => 'IBM500', // IBM EBCDIC International + 708 => 'ASMO-708', // Arabic (ASMO 708) + 720 => 'DOS-720', // Arabic (Transparent ASMO); Arabic (DOS) + 737 => 'IBM737', // OEM Greek (formerly 437G); Greek (DOS) + 775 => 'IBM775', // OEM Baltic; Baltic (DOS) + 850 => 'IBM850', // OEM Multilingual Latin 1; Western European (DOS) + 852 => 'IBM852', // OEM Latin 2; Central European (DOS) + 855 => 'IBM855', // OEM Cyrillic (primarily Russian) + 857 => 'IBM857', // OEM Turkish; Turkish (DOS) + 858 => 'IBM00858', // OEM Multilingual Latin 1 + Euro symbol + 860 => 'IBM860', // OEM Portuguese; Portuguese (DOS) + 861 => 'IBM861', // OEM Icelandic; Icelandic (DOS) + 862 => 'DOS-862', // OEM Hebrew; Hebrew (DOS) + 863 => 'IBM863', // OEM French Canadian; French Canadian (DOS) + 864 => 'IBM864', // OEM Arabic; Arabic (864) + 865 => 'IBM865', // OEM Nordic; Nordic (DOS) + 866 => 'cp866', // OEM Russian; Cyrillic (DOS) + 869 => 'IBM869', // OEM Modern Greek; Greek, Modern (DOS) + 870 => 'IBM870', // IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 + 874 => 'windows-874', // ANSI/OEM Thai (ISO 8859-11); Thai (Windows) + 875 => 'cp875', // IBM EBCDIC Greek Modern + 932 => 'shift_jis', // ANSI/OEM Japanese; Japanese (Shift-JIS) + 936 => 'gb2312', // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) + 950 => 'big5', // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) + 1026 => 'IBM1026', // IBM EBCDIC Turkish (Latin 5) + 1047 => 'IBM01047', // IBM EBCDIC Latin 1/Open System + 1140 => 'IBM01140', // IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) + 1141 => 'IBM01141', // IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) + 1142 => 'IBM01142', // IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) + 1143 => 'IBM01143', // IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) + 1144 => 'IBM01144', // IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) + 1145 => 'IBM01145', // IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) + 1146 => 'IBM01146', // IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) + 1147 => 'IBM01147', // IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) + 1148 => 'IBM01148', // IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) + 1149 => 'IBM01149', // IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) + 1200 => 'UTF-16', // Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications + 1201 => 'UTF-16BE', // Unicode UTF-16, big endian byte order; available only to managed applications + 1250 => 'windows-1250', // ANSI Central European; Central European (Windows) + 1251 => 'windows-1251', // ANSI Cyrillic; Cyrillic (Windows) + 1252 => 'windows-1252', // ANSI Latin 1; Western European (Windows) + 1253 => 'windows-1253', // ANSI Greek; Greek (Windows) + 1254 => 'windows-1254', // ANSI Turkish; Turkish (Windows) + 1255 => 'windows-1255', // ANSI Hebrew; Hebrew (Windows) + 1256 => 'windows-1256', // ANSI Arabic; Arabic (Windows) + 1257 => 'windows-1257', // ANSI Baltic; Baltic (Windows) + 1258 => 'windows-1258', // ANSI/OEM Vietnamese; Vietnamese (Windows) + 10000 => 'macintosh', // MAC Roman; Western European (Mac) + 12000 => 'UTF-32', // Unicode UTF-32, little endian byte order; available only to managed applications + 12001 => 'UTF-32BE', // Unicode UTF-32, big endian byte order; available only to managed applications + 20127 => 'US-ASCII', // US-ASCII (7-bit) + 20273 => 'IBM273', // IBM EBCDIC Germany + 20277 => 'IBM277', // IBM EBCDIC Denmark-Norway + 20278 => 'IBM278', // IBM EBCDIC Finland-Sweden + 20280 => 'IBM280', // IBM EBCDIC Italy + 20284 => 'IBM284', // IBM EBCDIC Latin America-Spain + 20285 => 'IBM285', // IBM EBCDIC United Kingdom + 20290 => 'IBM290', // IBM EBCDIC Japanese Katakana Extended + 20297 => 'IBM297', // IBM EBCDIC France + 20420 => 'IBM420', // IBM EBCDIC Arabic + 20423 => 'IBM423', // IBM EBCDIC Greek + 20424 => 'IBM424', // IBM EBCDIC Hebrew + 20838 => 'IBM-Thai', // IBM EBCDIC Thai + 20866 => 'koi8-r', // Russian (KOI8-R); Cyrillic (KOI8-R) + 20871 => 'IBM871', // IBM EBCDIC Icelandic + 20880 => 'IBM880', // IBM EBCDIC Cyrillic Russian + 20905 => 'IBM905', // IBM EBCDIC Turkish + 20924 => 'IBM00924', // IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) + 20932 => 'EUC-JP', // Japanese (JIS 0208-1990 and 0212-1990) + 20936 => 'cp20936', // Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) + 20949 => 'cp20949', // Korean Wansung + 21025 => 'cp1025', // IBM EBCDIC Cyrillic Serbian-Bulgarian + 21866 => 'koi8-u', // Ukrainian (KOI8-U); Cyrillic (KOI8-U) + 28591 => 'iso-8859-1', // ISO 8859-1 Latin 1; Western European (ISO) + 28592 => 'iso-8859-2', // ISO 8859-2 Central European; Central European (ISO) + 28593 => 'iso-8859-3', // ISO 8859-3 Latin 3 + 28594 => 'iso-8859-4', // ISO 8859-4 Baltic + 28595 => 'iso-8859-5', // ISO 8859-5 Cyrillic + 28596 => 'iso-8859-6', // ISO 8859-6 Arabic + 28597 => 'iso-8859-7', // ISO 8859-7 Greek + 28598 => 'iso-8859-8', // ISO 8859-8 Hebrew; Hebrew (ISO-Visual) + 28599 => 'iso-8859-9', // ISO 8859-9 Turkish + 28603 => 'iso-8859-13', // ISO 8859-13 Estonian + 28605 => 'iso-8859-15', // ISO 8859-15 Latin 9 + 38598 => 'iso-8859-8-i', // ISO 8859-8 Hebrew; Hebrew (ISO-Logical) + 50220 => 'iso-2022-jp', // ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) + 50221 => 'csISO2022JP', // ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) + 50222 => 'iso-2022-jp', // ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) + 50225 => 'iso-2022-kr', // ISO 2022 Korean + 51932 => 'EUC-JP', // EUC Japanese + 51936 => 'EUC-CN', // EUC Simplified Chinese; Chinese Simplified (EUC) + 51949 => 'EUC-KR', // EUC Korean + 52936 => 'hz-gb-2312', // HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) + 54936 => 'GB18030', // Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) + 65000 => 'UTF-7', + 65001 => 'UTF-8', + ]; + + /** + * Validate character set identifier. + * + * @param string $input Character set identifier + * + * @return bool True if valid, False if not valid + */ + public static function is_valid($input) + { + return is_string($input) && preg_match('|^[a-zA-Z0-9_./:#-]{2,32}$|', $input) > 0; + } + + /** + * Parse and validate charset name string. + * Sometimes charset string is malformed, there are also charset aliases, + * but we need strict names for charset conversion (specially utf8 class) + * + * @param string $input Input charset name + * + * @return string The validated charset name + */ + public static function parse_charset($input) + { + static $charsets = []; + + $charset = strtoupper($input); + + if (isset($charsets[$input])) { + return $charsets[$input]; + } + + $charset = preg_replace([ + '/^[^0-9A-Z]+/', // e.g. _ISO-8859-JP$SIO + '/\$.*$/', // e.g. _ISO-8859-JP$SIO + '/UNICODE-1-1-*/', // RFC1641/1642 + '/^X-/', // X- prefix (e.g. X-ROMAN8 => ROMAN8) + '/\*.*$/' // lang code according to RFC 2231.5 + ], '', $charset); + + if ($charset == 'BINARY') { + return $charsets[$input] = null; + } + + // allow A-Z and 0-9 only + $str = preg_replace('/[^A-Z0-9]/', '', $charset); + + $result = $charset; + + if (isset(self::$aliases[$str])) { + $result = self::$aliases[$str]; + } + // UTF + else if (preg_match('/U[A-Z][A-Z](7|8|16|32)(BE|LE)*/', $str, $m)) { + $result = 'UTF-' . $m[1] . (!empty($m[2]) ? $m[2] : ''); + } + // ISO-8859 + else if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) { + $iso = 'ISO-8859-' . ($m[1] ?: 1); + // some clients sends windows-1252 text as latin1, + // it is safe to use windows-1252 for all latin1 + $result = $iso == 'ISO-8859-1' ? 'WINDOWS-1252' : $iso; + } + // handle broken charset names e.g. WINDOWS-1250HTTP-EQUIVCONTENT-TYPE + else if (preg_match('/(WIN|WINDOWS)([0-9]+)/', $str, $m)) { + $result = 'WINDOWS-' . $m[2]; + } + // LATIN + else if (preg_match('/LATIN(.*)/', $str, $m)) { + $aliases = ['2' => 2, '3' => 3, '4' => 4, '5' => 9, '6' => 10, + '7' => 13, '8' => 14, '9' => 15, '10' => 16, + 'ARABIC' => 6, 'CYRILLIC' => 5, 'GREEK' => 7, 'GREEK1' => 7, 'HEBREW' => 8 + ]; + + // some clients sends windows-1252 text as latin1, + // it is safe to use windows-1252 for all latin1 + if ($m[1] == 1) { + $result = 'WINDOWS-1252'; + } + // we need ISO labels + else if (!empty($aliases[$m[1]])) { + $result = 'ISO-8859-'.$aliases[$m[1]]; + } + } + + $charsets[$input] = $result; + + return $result; + } + + /** + * Convert a string from one charset to another. + * + * @param string $str Input string + * @param string $from Suspected charset of the input string + * @param string $to Target charset to convert to; defaults to RCUBE_CHARSET + * + * @return string Converted string + */ + public static function convert($str, $from, $to = null) + { + static $iconv_options; + + $to = empty($to) ? RCUBE_CHARSET : self::parse_charset($to); + $from = self::parse_charset($from); + + // It is a common case when UTF-16 charset is used with US-ASCII content (#1488654) + // In that case we can just skip the conversion (use UTF-8) + if ($from == 'UTF-16' && !preg_match('/[^\x00-\x7F]/', $str)) { + $from = 'UTF-8'; + } + + if ($from == $to || empty($str) || empty($from)) { + return $str; + } + + $out = false; + $error_handler = function() { throw new \Exception(); }; + + // Ignore invalid characters + $mbstring_sc = mb_substitute_character(); + mb_substitute_character('none'); + + // If mbstring reports an illegal character in input via E_WARNING. + // FIXME: Is this really true with substitute character 'none'? + // A warning is thrown in PHP<8 also on unsupported encoding, in PHP>=8 ValueError + // is thrown instead (therefore we catch Throwable below) + set_error_handler($error_handler, E_WARNING); + + try { + $out = mb_convert_encoding($str, $to, $from); + } + catch (Throwable $e) { + $out = false; + } + catch (Exception $e) { + $out = false; + } + + restore_error_handler(); + mb_substitute_character($mbstring_sc); + + if ($out !== false) { + return $out; + } + + if ($iconv_options === null) { + if (function_exists('iconv')) { + // ignore characters not available in output charset + $iconv_options = '//IGNORE'; + if (iconv('', $iconv_options, '') === false) { + // iconv implementation does not support options + $iconv_options = ''; + } + } + else { + $iconv_options = false; + } + } + + // Fallback to iconv module, it is slower, but supports much more charsets than mbstring + if ($iconv_options !== false && $from != 'UTF7-IMAP' && $to != 'UTF7-IMAP' + && $from !== 'ISO-2022-JP' + ) { + // If iconv reports an illegal character in input it means that input string + // has been truncated. It's reported as E_NOTICE. + // PHP8 will also throw E_WARNING on unsupported encoding. + set_error_handler($error_handler, E_NOTICE | E_WARNING); + + try { + $out = iconv($from, $to . $iconv_options, $str); + } + catch (Throwable $e) { + $out = false; + } + catch (Exception $e) { + $out = false; + } + + restore_error_handler(); + + if ($out !== false) { + return $out; + } + } + + // return the original string + return $str; + } + + /** + * Converts string from standard UTF-7 (RFC 2152) to UTF-8. + * + * @param string $str Input string (UTF-7) + * + * @return string Converted string (UTF-8) + * @deprecated use self::convert() + */ + public static function utf7_to_utf8($str) + { + return self::convert($str, 'UTF-7', 'UTF-8'); + } + + /** + * Converts string from UTF-16 to UTF-8 (helper for utf-7 to utf-8 conversion) + * + * @param string $str Input string + * + * @return string The converted string + * @deprecated use self::convert() + */ + public static function utf16_to_utf8($str) + { + return self::convert($str, 'UTF-16BE', 'UTF-8'); + } + + /** + * Convert the data ($str) from RFC 2060's UTF-7 to UTF-8. + * If input data is invalid, return the original input string. + * RFC 2060 obviously intends the encoding to be unique (see + * point 5 in section 5.1.3), so we reject any non-canonical + * form, such as &ACY- (instead of &-) or &AMA-&AMA- (instead + * of &AMAAwA-). + * + * @param string $str Input string (UTF7-IMAP) + * + * @return string Output string (UTF-8) + * @deprecated use self::convert() + */ + public static function utf7imap_to_utf8($str) + { + return self::convert($str, 'UTF7-IMAP', 'UTF-8'); + } + + /** + * Convert the data ($str) from UTF-8 to RFC 2060's UTF-7. + * Unicode characters above U+FFFF are replaced by U+FFFE. + * If input data is invalid, return an empty string. + * + * @param string $str Input string (UTF-8) + * + * @return string Output string (UTF7-IMAP) + * @deprecated use self::convert() + */ + public static function utf8_to_utf7imap($str) + { + return self::convert($str, 'UTF-8', 'UTF7-IMAP'); + } + + /** + * A method to guess character set of a string. + * + * @param string $string String + * @param string $failover Default result for failover + * @param string $language User language + * + * @return string Charset name + */ + public static function detect($string, $failover = null, $language = null) + { + if (substr($string, 0, 4) == "\0\0\xFE\xFF") return 'UTF-32BE'; // Big Endian + if (substr($string, 0, 4) == "\xFF\xFE\0\0") return 'UTF-32LE'; // Little Endian + if (substr($string, 0, 2) == "\xFE\xFF") return 'UTF-16BE'; // Big Endian + if (substr($string, 0, 2) == "\xFF\xFE") return 'UTF-16LE'; // Little Endian + if (substr($string, 0, 3) == "\xEF\xBB\xBF") return 'UTF-8'; + + // heuristics + if (strlen($string) >= 4) { + if ($string[0] == "\0" && $string[1] == "\0" && $string[2] == "\0" && $string[3] != "\0") return 'UTF-32BE'; + if ($string[0] != "\0" && $string[1] == "\0" && $string[2] == "\0" && $string[3] == "\0") return 'UTF-32LE'; + if ($string[0] == "\0" && $string[1] != "\0" && $string[2] == "\0" && $string[3] != "\0") return 'UTF-16BE'; + if ($string[0] != "\0" && $string[1] == "\0" && $string[2] != "\0" && $string[3] == "\0") return 'UTF-16LE'; + } + + if (empty($language)) { + $rcube = rcube::get_instance(); + $language = $rcube->get_user_language(); + } + + // Prioritize charsets according to current language (#1485669) + $prio = null; + switch ($language) { + case 'ja_JP': + $prio = ['ISO-2022-JP', 'JIS', 'UTF-8', 'EUC-JP', 'eucJP-win', 'SJIS', 'SJIS-win']; + break; + + case 'zh_CN': + case 'zh_TW': + $prio = ['UTF-8', 'BIG-5', 'GB2312', 'EUC-TW']; + break; + + case 'ko_KR': + $prio = ['UTF-8', 'EUC-KR', 'ISO-2022-KR']; + break; + + case 'ru_RU': + $prio = ['UTF-8', 'WINDOWS-1251', 'KOI8-R']; + break; + + case 'tr_TR': + $prio = ['UTF-8', 'ISO-8859-9', 'WINDOWS-1254']; + break; + } + + // mb_detect_encoding() is not reliable for some charsets (#1490135) + // use mb_check_encoding() to make charset priority lists really working + if (!empty($prio) && function_exists('mb_check_encoding')) { + foreach ($prio as $encoding) { + if (mb_check_encoding($string, $encoding)) { + return $encoding; + } + } + } + + if (function_exists('mb_detect_encoding')) { + if (empty($prio)) { + $prio = ['UTF-8', 'SJIS', 'GB2312', + 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', + 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', + 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16', + 'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R', 'BIG-5', + 'ISO-2022-KR', 'ISO-2022-JP', + ]; + } + + $encodings = array_unique(array_merge($prio, mb_list_encodings())); + + if ($encoding = mb_detect_encoding($string, $encodings)) { + return $encoding; + } + } + + // No match, check for UTF-8 + // from http://w3.org/International/questions/qa-forms-utf-8.html + if (preg_match('/\A( + [\x09\x0A\x0D\x20-\x7E] + | [\xC2-\xDF][\x80-\xBF] + | \xE0[\xA0-\xBF][\x80-\xBF] + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} + | \xED[\x80-\x9F][\x80-\xBF] + | \xF0[\x90-\xBF][\x80-\xBF]{2} + | [\xF1-\xF3][\x80-\xBF]{3} + | \xF4[\x80-\x8F][\x80-\xBF]{2} + )*\z/xs', substr($string, 0, 2048)) + ) { + return 'UTF-8'; + } + + return $failover; + } + + /** + * Removes non-unicode characters from input. + * If the input is an array, both values and keys will be cleaned up. + * + * @param mixed $input String or array. + * + * @return mixed String or array + */ + public static function clean($input) + { + // handle input of type array + if (is_array($input)) { + foreach (array_keys($input) as $key) { + $k = is_string($key) ? self::clean($key) : $key; + $v = self::clean($input[$key]); + + if ($k !== $key) { + unset($input[$key]); + if (!array_key_exists($k, $input)) { + $input[$k] = $v; + } + } + else { + $input[$k] = $v; + } + } + return $input; + } + + if (!is_string($input) || $input == '') { + return $input; + } + + $msch = mb_substitute_character(); + mb_substitute_character('none'); + $res = mb_convert_encoding($input, 'UTF-8', 'UTF-8'); + mb_substitute_character($msch); + + return $res; + } +} diff --git a/src/include/rcube_imap_generic.php b/src/include/rcube_imap_generic.php --- a/src/include/rcube_imap_generic.php +++ b/src/include/rcube_imap_generic.php @@ -24,6 +24,14 @@ +-----------------------------------------------------------------------+ */ +define('RCUBE_CHARSET', 'UTF-8'); + +require_once "rcube_utils.php"; +require_once "rcube_result_index.php"; +require_once "rcube_message_header.php"; +require_once "rcube_mime.php"; +require_once "rcube_charset.php"; + /** * PHP based wrapper class to connect to an IMAP server */ diff --git a/src/include/rcube_message_header.php b/src/include/rcube_message_header.php new file mode 100644 --- /dev/null +++ b/src/include/rcube_message_header.php @@ -0,0 +1,338 @@ + | + +-----------------------------------------------------------------------+ +*/ + +/** + * Struct representing an e-mail message header + * + * @package Framework + * @subpackage Storage + */ +class rcube_message_header +{ + /** + * Message sequence number + * + * @var int + */ + public $id; + + /** + * Message unique identifier + * + * @var int + */ + public $uid; + + /** + * Message subject + * + * @var string + */ + public $subject; + + /** + * Message sender (From) + * + * @var string + */ + public $from; + + /** + * Message recipient (To) + * + * @var string + */ + public $to; + + /** + * Message additional recipients (Cc) + * + * @var string + */ + public $cc; + + /** + * Message Reply-To header + * + * @var string + */ + public $replyto; + + /** + * Message In-Reply-To header + * + * @var string + */ + public $in_reply_to; + + /** + * Message date (Date) + * + * @var string + */ + public $date; + + /** + * Message identifier (Message-ID) + * + * @var string + */ + public $messageID; + + /** + * Message size + * + * @var int + */ + public $size; + + /** + * Message encoding + * + * @var string + */ + public $encoding; + + /** + * Message charset + * + * @var string + */ + public $charset; + + /** + * Message Content-type + * + * @var string + */ + public $ctype; + + /** + * Message timestamp (based on message date) + * + * @var int + */ + public $timestamp; + + /** + * IMAP bodystructure string + * + * @var string + */ + public $bodystructure; + + /** + * IMAP internal date + * + * @var string + */ + public $internaldate; + + /** + * Message References header + * + * @var string + */ + public $references; + + /** + * Message priority (X-Priority) + * + * @var int + */ + public $priority; + + /** + * Message receipt recipient + * + * @var string + */ + public $mdn_to; + + /** + * IMAP folder this message is stored in + * + * @var string + */ + public $folder; + + /** + * Other message headers + * + * @var array + */ + public $others = []; + + /** + * Message flags + * + * @var array + */ + public $flags = []; + + /** + * Header name to rcube_message_header object property map + * + * @var array + */ + private $obj_headers = [ + 'date' => 'date', + 'from' => 'from', + 'to' => 'to', + 'subject' => 'subject', + 'reply-to' => 'replyto', + 'cc' => 'cc', + 'bcc' => 'bcc', + 'mbox' => 'folder', + 'folder' => 'folder', + 'content-transfer-encoding' => 'encoding', + 'in-reply-to' => 'in_reply_to', + 'content-type' => 'ctype', + 'charset' => 'charset', + 'references' => 'references', + 'disposition-notification-to' => 'mdn_to', + 'x-confirm-reading-to' => 'mdn_to', + 'message-id' => 'messageID', + 'x-priority' => 'priority', + ]; + + /** + * Returns header value + * + * @param string $name Header name + * @param bool $decode Decode the header content + * + * @param string|null Header content + */ + public function get($name, $decode = true) + { + $name = strtolower($name); + $value = null; + + if (isset($this->obj_headers[$name]) && isset($this->{$this->obj_headers[$name]})) { + $value = $this->{$this->obj_headers[$name]}; + } + else if (isset($this->others[$name])) { + $value = $this->others[$name]; + } + + if ($decode && $value !== null) { + if (is_array($value)) { + foreach ($value as $key => $val) { + $val = rcube_mime::decode_header($val, $this->charset); + $value[$key] = rcube_charset::clean($val); + } + } + else { + $value = rcube_mime::decode_header($value, $this->charset); + $value = rcube_charset::clean($value); + } + } + + return $value; + } + + /** + * Sets header value + * + * @param string $name Header name + * @param string $value Header content + */ + public function set($name, $value) + { + $name = strtolower($name); + + if (isset($this->obj_headers[$name])) { + $this->{$this->obj_headers[$name]} = $value; + } + else { + $this->others[$name] = $value; + } + } + + /** + * Factory method to instantiate headers from a data array + * + * @param array $arr Hash array with header values + * + * @return rcube_message_header instance filled with headers values + */ + public static function from_array($arr) + { + $obj = new rcube_message_header; + foreach ($arr as $k => $v) { + $obj->set($k, $v); + } + + return $obj; + } +} + + +/** + * Class for sorting an array of rcube_message_header objects in a predetermined order. + * + * @package Framework + * @subpackage Storage + */ +class rcube_message_header_sorter +{ + /** @var array Message UIDs */ + private $uids = []; + + + /** + * Set the predetermined sort order. + * + * @param array $index Numerically indexed array of IMAP UIDs + */ + function set_index($index) + { + $index = array_flip($index); + + $this->uids = $index; + } + + /** + * Sort the array of header objects + * + * @param array $headers Array of rcube_message_header objects indexed by UID + */ + function sort_headers(&$headers) + { + uksort($headers, [$this, "compare_uids"]); + } + + /** + * Sort method called by uksort() + * + * @param int $a Array key (UID) + * @param int $b Array key (UID) + */ + function compare_uids($a, $b) + { + // then find each sequence number in my ordered list + $posa = isset($this->uids[$a]) ? intval($this->uids[$a]) : -1; + $posb = isset($this->uids[$b]) ? intval($this->uids[$b]) : -1; + + // return the relative position as the comparison value + return $posa - $posb; + } +} diff --git a/src/include/rcube_mime.php b/src/include/rcube_mime.php new file mode 100644 --- /dev/null +++ b/src/include/rcube_mime.php @@ -0,0 +1,992 @@ + | + | Author: Aleksander Machniak | + +-----------------------------------------------------------------------+ +*/ + +/** + * Class for parsing MIME messages + * + * @package Framework + * @subpackage Storage + */ +class rcube_mime +{ + private static $default_charset; + + + /** + * Object constructor. + */ + function __construct($default_charset = null) + { + self::$default_charset = $default_charset; + } + + /** + * Returns message/object character set name + * + * @return string Character set name + */ + public static function get_charset() + { + if (self::$default_charset) { + return self::$default_charset; + } + + if ($charset = rcube::get_instance()->config->get('default_charset')) { + return $charset; + } + + return RCUBE_CHARSET; + } + + /** + * Parse the given raw message source and return a structure + * of rcube_message_part objects. + * + * It makes use of the rcube_mime_decode library + * + * @param string $raw_body The message source + * + * @return object rcube_message_part The message structure + */ + public static function parse_message($raw_body) + { + $conf = [ + 'include_bodies' => true, + 'decode_bodies' => true, + 'decode_headers' => false, + 'default_charset' => self::get_charset(), + ]; + + $mime = new rcube_mime_decode($conf); + + return $mime->decode($raw_body); + } + + /** + * Split an address list into a structured array list + * + * @param string|array $input Input string (or list of strings) + * @param int $max List only this number of addresses + * @param bool $decode Decode address strings + * @param string $fallback Fallback charset if none specified + * @param bool $addronly Return flat array with e-mail addresses only + * + * @return array Indexed list of addresses + */ + static function decode_address_list($input, $max = null, $decode = true, $fallback = null, $addronly = false) + { + // A common case when the same header is used many times in a mail message + if (is_array($input)) { + $input = implode(', ', $input); + } + + $a = self::parse_address_list($input, $decode, $fallback); + $out = []; + $j = 0; + + // Special chars as defined by RFC 822 need to in quoted string (or escaped). + $special_chars = '[\(\)\<\>\\\.\[\]@,;:"]'; + + if (!is_array($a)) { + return $out; + } + + foreach ($a as $val) { + $j++; + $address = trim($val['address']); + + if ($addronly) { + $out[$j] = $address; + } + else { + $name = trim($val['name']); + $string = ''; + + if ($name && $address && $name != $address) { + $string = sprintf('%s <%s>', preg_match("/$special_chars/", $name) ? '"'.addcslashes($name, '"').'"' : $name, $address); + } + else if ($address) { + $string = $address; + } + else if ($name) { + $string = $name; + } + + $out[$j] = ['name' => $name, 'mailto' => $address, 'string' => $string]; + } + + if ($max && $j == $max) { + break; + } + } + + return $out; + } + + /** + * Decode a message header value + * + * @param string $input Header value + * @param string $fallback Fallback charset if none specified + * + * @return string Decoded string + */ + public static function decode_header($input, $fallback = null) + { + $str = self::decode_mime_string((string)$input, $fallback); + + return $str; + } + + /** + * Decode a mime-encoded string to internal charset + * + * @param string $input Header value + * @param string $fallback Fallback charset if none specified + * + * @return string Decoded string + */ + public static function decode_mime_string($input, $fallback = null) + { + $default_charset = $fallback ?: self::get_charset(); + + // rfc: all line breaks or other characters not found + // in the Base64 Alphabet must be ignored by decoding software + // delete all blanks between MIME-lines, differently we can + // receive unnecessary blanks and broken utf-8 symbols + $input = preg_replace("/\?=\s+=\?/", '?==?', $input); + + // encoded-word regexp + $re = '/=\?([^?]+)\?([BbQq])\?([^\n]*?)\?=/'; + + // Find all RFC2047's encoded words + if (preg_match_all($re, $input, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) { + // Initialize variables + $tmp = []; + $out = ''; + $start = 0; + + foreach ($matches as $idx => $m) { + $pos = $m[0][1]; + $charset = $m[1][0]; + $encoding = $m[2][0]; + $text = $m[3][0]; + $length = strlen($m[0][0]); + + // Append everything that is before the text to be decoded + if ($start != $pos) { + $substr = substr($input, $start, $pos-$start); + $out .= rcube_charset::convert($substr, $default_charset); + $start = $pos; + } + $start += $length; + + // Per RFC2047, each string part "MUST represent an integral number + // of characters . A multi-octet character may not be split across + // adjacent encoded-words." However, some mailers break this, so we + // try to handle characters spanned across parts anyway by iterating + // through and aggregating sequential encoded parts with the same + // character set and encoding, then perform the decoding on the + // aggregation as a whole. + + $tmp[] = $text; + if (!empty($matches[$idx+1]) && ($next_match = $matches[$idx+1])) { + if ($next_match[0][1] == $start + && $next_match[1][0] == $charset + && $next_match[2][0] == $encoding + ) { + continue; + } + } + + $count = count($tmp); + $text = ''; + + // Decode and join encoded-word's chunks + if ($encoding == 'B' || $encoding == 'b') { + $rest = ''; + // base64 must be decoded a segment at a time. + // However, there are broken implementations that continue + // in the following word, we'll handle that (#6048) + for ($i=0; $i<$count; $i++) { + $chunk = $rest . $tmp[$i]; + $length = strlen($chunk); + if ($length % 4) { + $length = floor($length / 4) * 4; + $rest = substr($chunk, $length); + $chunk = substr($chunk, 0, $length); + } + + $text .= base64_decode($chunk); + } + } + else { // if ($encoding == 'Q' || $encoding == 'q') { + // quoted printable can be combined and processed at once + for ($i=0; $i<$count; $i++) { + $text .= $tmp[$i]; + } + + $text = str_replace('_', ' ', $text); + $text = quoted_printable_decode($text); + } + + $out .= rcube_charset::convert($text, $charset); + $tmp = []; + } + + // add the last part of the input string + if ($start != strlen($input)) { + $out .= rcube_charset::convert(substr($input, $start), $default_charset); + } + + // return the results + return $out; + } + + // no encoding information, use fallback + return rcube_charset::convert($input, $default_charset); + } + + /** + * Decode a mime part + * + * @param string $input Input string + * @param string $encoding Part encoding + * + * @return string Decoded string + */ + public static function decode($input, $encoding = '7bit') + { + switch (strtolower($encoding)) { + case 'quoted-printable': + return quoted_printable_decode($input); + case 'base64': + return base64_decode($input); + case 'x-uuencode': + case 'x-uue': + case 'uue': + case 'uuencode': + return convert_uudecode($input); + case '7bit': + default: + return $input; + } + } + + /** + * Split RFC822 header string into an associative array + */ + public static function parse_headers($headers) + { + $result = []; + $headers = preg_replace('/\r?\n(\t| )+/', ' ', $headers); + $lines = explode("\n", $headers); + $count = count($lines); + + for ($i=0; $i<$count; $i++) { + if ($p = strpos($lines[$i], ': ')) { + $field = strtolower(substr($lines[$i], 0, $p)); + $value = trim(substr($lines[$i], $p+1)); + if (!empty($value)) { + $result[$field] = $value; + } + } + } + + return $result; + } + + /** + * E-mail address list parser + */ + private static function parse_address_list($str, $decode = true, $fallback = null) + { + // remove any newlines and carriage returns before + $str = $str === null ? null : preg_replace('/\r?\n(\s|\t)?/', ' ', $str); + + // extract list items, remove comments + $str = self::explode_header_string(',;', $str, true); + + // simplified regexp, supporting quoted local part + $email_rx = '([^\s:]+|("\s*(?:[^"\f\n\r\t\v\b\s]+\s*)+"))@\S+'; + + $result = []; + + foreach ($str as $key => $val) { + $name = ''; + $address = ''; + $val = trim($val); + + // First token might be a group name, ignore it + $tokens = self::explode_header_string(' ', $val); + if (isset($tokens[0]) && $tokens[0][strlen($tokens[0])-1] == ':') { + $val = substr($val, strlen($tokens[0])); + } + + if (preg_match('/(.*)<('.$email_rx.')$/', $val, $m)) { + // Note: There are cases like "Test'); + $name = trim($m[1]); + } + else if (preg_match('/^('.$email_rx.')$/', $val, $m)) { + $address = $m[1]; + $name = ''; + } + // special case (#1489092) + else if (preg_match('/(\s*)$/', $val, $m)) { + $address = 'MAILER-DAEMON'; + $name = substr($val, 0, -strlen($m[1])); + } + else if (preg_match('/('.$email_rx.')/', $val, $m)) { + $name = $m[1]; + } + else { + $name = $val; + } + + // unquote and/or decode name + if ($name) { + // An unquoted name ending with colon is a address group name, ignore it + if ($name[strlen($name)-1] == ':') { + $name = ''; + } + + if (strlen($name) > 1 && $name[0] == '"' && $name[strlen($name)-1] == '"') { + $name = substr($name, 1, -1); + $name = stripslashes($name); + } + + if ($decode) { + $name = self::decode_header($name, $fallback); + // some clients encode addressee name with quotes around it + if (strlen($name) > 1 && $name[0] == '"' && $name[strlen($name)-1] == '"') { + $name = substr($name, 1, -1); + } + } + } + + if (!$address && $name) { + $address = $name; + $name = ''; + } + + if ($address) { + $address = self::fix_email($address); + $result[$key] = ['name' => $name, 'address' => $address]; + } + } + + return $result; + } + + /** + * Explodes header (e.g. address-list) string into array of strings + * using specified separator characters with proper handling + * of quoted-strings and comments (RFC2822) + * + * @param string $separator String containing separator characters + * @param string $str Header string + * @param bool $remove_comments Enable to remove comments + * + * @return array Header items + */ + public static function explode_header_string($separator, $str, $remove_comments = false) + { + $length = strlen($str); + $result = []; + $quoted = false; + $comment = 0; + $out = ''; + + for ($i=0; $i<$length; $i++) { + // we're inside a quoted string + if ($quoted) { + if ($str[$i] == '"') { + $quoted = false; + } + else if ($str[$i] == "\\") { + if ($comment <= 0) { + $out .= "\\"; + } + $i++; + } + } + // we are inside a comment string + else if ($comment > 0) { + if ($str[$i] == ')') { + $comment--; + } + else if ($str[$i] == '(') { + $comment++; + } + else if ($str[$i] == "\\") { + $i++; + } + continue; + } + // separator, add to result array + else if (strpos($separator, $str[$i]) !== false) { + if ($out) { + $result[] = $out; + } + $out = ''; + continue; + } + // start of quoted string + else if ($str[$i] == '"') { + $quoted = true; + } + // start of comment + else if ($remove_comments && $str[$i] == '(') { + $comment++; + } + + if ($comment <= 0) { + $out .= $str[$i]; + } + } + + if ($out && $comment <= 0) { + $result[] = $out; + } + + return $result; + } + + /** + * Interpret a format=flowed message body according to RFC 2646 + * + * @param string $text Raw body formatted as flowed text + * @param string $mark Mark each flowed line with specified character + * @param bool $delsp Remove the trailing space of each flowed line + * + * @return string Interpreted text with unwrapped lines and stuffed space removed + */ + public static function unfold_flowed($text, $mark = null, $delsp = false) + { + $text = preg_split('/\r?\n/', $text); + $last = -1; + $q_level = 0; + $marks = []; + + foreach ($text as $idx => $line) { + if ($q = strspn($line, '>')) { + // remove quote chars + $line = substr($line, $q); + // remove (optional) space-staffing + if (isset($line[0]) && $line[0] === ' ') { + $line = substr($line, 1); + } + + // The same paragraph (We join current line with the previous one) when: + // - the same level of quoting + // - previous line was flowed + // - previous line contains more than only one single space (and quote char(s)) + if ($q == $q_level + && isset($text[$last]) && $text[$last][strlen($text[$last])-1] == ' ' + && !preg_match('/^>+ {0,1}$/', $text[$last]) + ) { + if ($delsp) { + $text[$last] = substr($text[$last], 0, -1); + } + $text[$last] .= $line; + unset($text[$idx]); + + if ($mark) { + $marks[$last] = true; + } + } + else { + $last = $idx; + } + } + else { + if ($line == '-- ') { + $last = $idx; + } + else { + // remove space-stuffing + if (isset($line[0]) && $line[0] === ' ') { + $line = substr($line, 1); + } + + $last_len = isset($text[$last]) ? strlen($text[$last]) : 0; + + if ( + $last_len && $line && !$q_level && $text[$last] != '-- ' + && isset($text[$last][$last_len-1]) && $text[$last][$last_len-1] == ' ' + ) { + if ($delsp) { + $text[$last] = substr($text[$last], 0, -1); + } + $text[$last] .= $line; + unset($text[$idx]); + + if ($mark) { + $marks[$last] = true; + } + } + else { + $text[$idx] = $line; + $last = $idx; + } + } + } + $q_level = $q; + } + + if (!empty($marks)) { + foreach (array_keys($marks) as $mk) { + $text[$mk] = $mark . $text[$mk]; + } + } + + return implode("\r\n", $text); + } + + /** + * Wrap the given text to comply with RFC 2646 + * + * @param string $text Text to wrap + * @param int $length Length + * @param string $charset Character encoding of $text + * + * @return string Wrapped text + */ + public static function format_flowed($text, $length = 72, $charset = null) + { + $text = preg_split('/\r?\n/', $text); + + foreach ($text as $idx => $line) { + if ($line != '-- ') { + if ($level = strspn($line, '>')) { + // remove quote chars + $line = substr($line, $level); + // remove (optional) space-staffing and spaces before the line end + $line = rtrim($line, ' '); + if (isset($line[0]) && $line[0] === ' ') { + $line = substr($line, 1); + } + + $prefix = str_repeat('>', $level) . ' '; + $line = $prefix . self::wordwrap($line, $length - $level - 2, " \r\n$prefix", false, $charset); + } + else if ($line) { + $line = self::wordwrap(rtrim($line), $length - 2, " \r\n", false, $charset); + // space-stuffing + $line = preg_replace('/(^|\r\n)(From| |>)/', '\\1 \\2', $line); + } + + $text[$idx] = $line; + } + } + + return implode("\r\n", $text); + } + + /** + * Improved wordwrap function with multibyte support. + * The code is based on Zend_Text_MultiByte::wordWrap(). + * + * @param string $string Text to wrap + * @param int $width Line width + * @param string $break Line separator + * @param bool $cut Enable to cut word + * @param string $charset Charset of $string + * @param bool $wrap_quoted When enabled quoted lines will not be wrapped + * + * @return string Text + */ + public static function wordwrap($string, $width = 75, $break = "\n", $cut = false, $charset = null, $wrap_quoted = true) + { + // Note: Never try to use iconv instead of mbstring functions here + // Iconv's substr/strlen are 100x slower (#1489113) + + if ($charset && $charset != RCUBE_CHARSET) { + $charset = rcube_charset::parse_charset($charset); + mb_internal_encoding($charset); + } + + // Convert \r\n to \n, this is our line-separator + $string = str_replace("\r\n", "\n", $string); + $separator = "\n"; // must be 1 character length + $result = []; + + while (($stringLength = mb_strlen($string)) > 0) { + $breakPos = mb_strpos($string, $separator, 0); + + // quoted line (do not wrap) + if ($wrap_quoted && $string[0] == '>') { + if ($breakPos === $stringLength - 1 || $breakPos === false) { + $subString = $string; + $cutLength = null; + } + else { + $subString = mb_substr($string, 0, $breakPos); + $cutLength = $breakPos + 1; + } + } + // next line found and current line is shorter than the limit + else if ($breakPos !== false && $breakPos < $width) { + if ($breakPos === $stringLength - 1) { + $subString = $string; + $cutLength = null; + } + else { + $subString = mb_substr($string, 0, $breakPos); + $cutLength = $breakPos + 1; + } + } + else { + $subString = mb_substr($string, 0, $width); + + // last line + if ($breakPos === false && $subString === $string) { + $cutLength = null; + } + else { + $nextChar = mb_substr($string, $width, 1); + + if ($nextChar === ' ' || $nextChar === $separator) { + $afterNextChar = mb_substr($string, $width + 1, 1); + + // Note: mb_substr() does never return False + if ($afterNextChar === false || $afterNextChar === '') { + $subString .= $nextChar; + } + + $cutLength = mb_strlen($subString) + 1; + } + else { + $spacePos = mb_strrpos($subString, ' ', 0); + + if ($spacePos !== false) { + $subString = mb_substr($subString, 0, $spacePos); + $cutLength = $spacePos + 1; + } + else if ($cut === false) { + $spacePos = mb_strpos($string, ' ', 0); + + if ($spacePos !== false && ($breakPos === false || $spacePos < $breakPos)) { + $subString = mb_substr($string, 0, $spacePos); + $cutLength = $spacePos + 1; + } + else if ($breakPos === false) { + $subString = $string; + $cutLength = null; + } + else { + $subString = mb_substr($string, 0, $breakPos); + $cutLength = $breakPos + 1; + } + } + else { + $cutLength = $width; + } + } + } + } + + $result[] = $subString; + + if ($cutLength !== null) { + $string = mb_substr($string, $cutLength, ($stringLength - $cutLength)); + } + else { + break; + } + } + + if ($charset && $charset != RCUBE_CHARSET) { + mb_internal_encoding(RCUBE_CHARSET); + } + + return implode($break, $result); + } + + /** + * A method to guess the mime_type of an attachment. + * + * @param string $path Path to the file or file contents + * @param string $name File name (with suffix) + * @param string $failover Mime type supplied for failover + * @param bool $is_stream Set to True if $path contains file contents + * @param bool $skip_suffix Set to True if the config/mimetypes.php map should be ignored + * + * @return string + * @author Till Klampaeckel + * @see http://de2.php.net/manual/en/ref.fileinfo.php + * @see http://de2.php.net/mime_content_type + */ + public static function file_content_type($path, $name, $failover = 'application/octet-stream', $is_stream = false, $skip_suffix = false) + { + $mime_type = null; + $config = rcube::get_instance()->config; + + // Detect mimetype using filename extension + if (!$skip_suffix) { + $mime_type = self::file_ext_type($name); + } + + // try fileinfo extension if available + if (!$mime_type && function_exists('finfo_open')) { + $mime_magic = $config->get('mime_magic'); + // null as a 2nd argument should be the same as no argument + // this however is not true on all systems/versions + if ($mime_magic) { + $finfo = finfo_open(FILEINFO_MIME, $mime_magic); + } + else { + $finfo = finfo_open(FILEINFO_MIME); + } + + if ($finfo) { + $func = $is_stream ? 'finfo_buffer' : 'finfo_file'; + $mime_type = $func($finfo, $path, FILEINFO_MIME_TYPE); + finfo_close($finfo); + } + } + + // try PHP's mime_content_type + if (!$mime_type && !$is_stream && function_exists('mime_content_type')) { + $mime_type = @mime_content_type($path); + } + + // fall back to user-submitted string + if (!$mime_type) { + $mime_type = $failover; + } + + return $mime_type; + } + + /** + * File type detection based on file name only. + * + * @param string $filename Path to the file or file contents + * + * @return string|null Mimetype label + */ + public static function file_ext_type($filename) + { + static $mime_ext = []; + + if (empty($mime_ext)) { + foreach (rcube::get_instance()->config->resolve_paths('mimetypes.php') as $fpath) { + $mime_ext = array_merge($mime_ext, (array) @include($fpath)); + } + } + + // use file name suffix with hard-coded mime-type map + if (!empty($mime_ext) && $filename) { + $ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION)); + if ($ext && !empty($mime_ext[$ext])) { + return $mime_ext[$ext]; + } + } + } + + /** + * Get mimetype => file extension mapping + * + * @param string Mime-Type to get extensions for + * + * @return array List of extensions matching the given mimetype or a hash array + * with ext -> mimetype mappings if $mimetype is not given + */ + public static function get_mime_extensions($mimetype = null) + { + static $mime_types, $mime_extensions; + + // return cached data + if (is_array($mime_types)) { + return $mimetype ? (isset($mime_types[$mimetype]) ? $mime_types[$mimetype] : []) : $mime_extensions; + } + + // load mapping file + $file_paths = []; + + if ($mime_types = rcube::get_instance()->config->get('mime_types')) { + $file_paths[] = $mime_types; + } + + // try common locations + if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') { + $file_paths[] = 'C:/xampp/apache/conf/mime.types.'; + } + else { + $file_paths[] = '/etc/mime.types'; + $file_paths[] = '/etc/httpd/mime.types'; + $file_paths[] = '/etc/httpd2/mime.types'; + $file_paths[] = '/etc/apache/mime.types'; + $file_paths[] = '/etc/apache2/mime.types'; + $file_paths[] = '/etc/nginx/mime.types'; + $file_paths[] = '/usr/local/etc/httpd/conf/mime.types'; + $file_paths[] = '/usr/local/etc/apache/conf/mime.types'; + $file_paths[] = '/usr/local/etc/apache24/mime.types'; + } + + $mime_types = []; + $mime_extensions = []; + $lines = []; + $regex = "/([\w\+\-\.\/]+)\s+([\w\s]+)/i"; + + foreach ($file_paths as $fp) { + if (@is_readable($fp)) { + $lines = file($fp, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + break; + } + } + + foreach ($lines as $line) { + // skip comments or mime types w/o any extensions + if ($line[0] == '#' || !preg_match($regex, $line, $matches)) { + continue; + } + + $mime = $matches[1]; + + foreach (explode(' ', $matches[2]) as $ext) { + $ext = trim($ext); + $mime_types[$mime][] = $ext; + $mime_extensions[$ext] = $mime; + } + } + + // fallback to some well-known types most important for daily emails + if (empty($mime_types)) { + foreach (rcube::get_instance()->config->resolve_paths('mimetypes.php') as $fpath) { + $mime_extensions = array_merge($mime_extensions, (array) @include($fpath)); + } + + foreach ($mime_extensions as $ext => $mime) { + $mime_types[$mime][] = $ext; + } + } + + // Add some known aliases that aren't included by some mime.types (#1488891) + // the order is important here so standard extensions have higher prio + $aliases = [ + 'image/gif' => ['gif'], + 'image/png' => ['png'], + 'image/x-png' => ['png'], + 'image/jpeg' => ['jpg', 'jpeg', 'jpe'], + 'image/jpg' => ['jpg', 'jpeg', 'jpe'], + 'image/pjpeg' => ['jpg', 'jpeg', 'jpe'], + 'image/tiff' => ['tif'], + 'image/bmp' => ['bmp'], + 'image/x-ms-bmp' => ['bmp'], + 'message/rfc822' => ['eml'], + 'text/x-mail' => ['eml'], + ]; + + foreach ($aliases as $mime => $exts) { + if (isset($mime_types[$mime])) { + $mime_types[$mime] = array_unique(array_merge((array) $mime_types[$mime], $exts)); + } + else { + $mime_types[$mime] = $exts; + } + + foreach ($exts as $ext) { + if (!isset($mime_extensions[$ext])) { + $mime_extensions[$ext] = $mime; + } + } + } + + if ($mimetype) { + return !empty($mime_types[$mimetype]) ? $mime_types[$mimetype] : []; + } + + return $mime_extensions; + } + + /** + * Detect image type of the given binary data by checking magic numbers. + * + * @param string $data Binary file content + * + * @return string Detected mime-type or jpeg as fallback + */ + public static function image_content_type($data) + { + $type = 'jpeg'; + if (preg_match('/^\x89\x50\x4E\x47/', $data)) $type = 'png'; + else if (preg_match('/^\x47\x49\x46\x38/', $data)) $type = 'gif'; + else if (preg_match('/^\x00\x00\x01\x00/', $data)) $type = 'ico'; + // else if (preg_match('/^\xFF\xD8\xFF\xE0/', $data)) $type = 'jpeg'; + + return 'image/' . $type; + } + + /** + * Try to fix invalid email addresses + */ + public static function fix_email($email) + { + $parts = rcube_utils::explode_quoted_string('@', $email); + + foreach ($parts as $idx => $part) { + // remove redundant quoting (#1490040) + if ($part[0] == '"' && preg_match('/^"([a-zA-Z0-9._+=-]+)"$/', $part, $m)) { + $parts[$idx] = $m[1]; + } + } + + return implode('@', $parts); + } + + /** + * Fix mimetype name. + * + * @param string $type Mimetype + * + * @return string Mimetype + */ + public static function fix_mimetype($type) + { + $type = strtolower(trim($type)); + $aliases = [ + 'image/x-ms-bmp' => 'image/bmp', // #4771 + 'pdf' => 'application/pdf', // #6816 + ]; + + if (!empty($aliases[$type])) { + return $aliases[$type]; + } + + // Some versions of Outlook create garbage Content-Type: + // application/pdf.A520491B_3BF7_494D_8855_7FAC2C6C0608 + if (preg_match('/^application\/pdf.+/', $type)) { + return 'application/pdf'; + } + + // treat image/pjpeg (image/pjpg, image/jpg) as image/jpeg (#4196) + if (preg_match('/^image\/p?jpe?g$/', $type)) { + return 'image/jpeg'; + } + + return $type; + } +} diff --git a/src/include/rcube_result_index.php b/src/include/rcube_result_index.php new file mode 100644 --- /dev/null +++ b/src/include/rcube_result_index.php @@ -0,0 +1,463 @@ + | + | Author: Aleksander Machniak | + +-----------------------------------------------------------------------+ +*/ + +/** + * Get first element from an array + * + * @param array $array Input array + * + * @return mixed First element if found, Null otherwise + */ +function array_first($array) +{ + if (is_array($array)) { + reset($array); + foreach ($array as $element) { + return $element; + } + } +} + +/** + * Class for accessing IMAP's SORT/SEARCH/ESEARCH result + * + * @package Framework + * @subpackage Storage + */ +class rcube_result_index +{ + public $incomplete = false; + + protected $raw_data; + protected $mailbox; + protected $meta = []; + protected $params = []; + protected $order = 'ASC'; + + const SEPARATOR_ELEMENT = ' '; + + + /** + * Object constructor. + */ + public function __construct($mailbox = null, $data = null, $order = null) + { + $this->mailbox = $mailbox; + $this->order = $order == 'DESC' ? 'DESC' : 'ASC'; + $this->init($data); + } + + /** + * Initializes object with SORT command response + * + * @param string $data IMAP response string + */ + public function init($data = null) + { + $this->meta = []; + + $data = explode('*', (string)$data); + + // ...skip unilateral untagged server responses + for ($i=0, $len=count($data); $i<$len; $i++) { + $data_item = &$data[$i]; + if (preg_match('/^ SORT/i', $data_item)) { + // valid response, initialize raw_data for is_error() + $this->raw_data = ''; + $data_item = substr($data_item, 5); + break; + } + else if (preg_match('/^ (E?SEARCH)/i', $data_item, $m)) { + // valid response, initialize raw_data for is_error() + $this->raw_data = ''; + $data_item = substr($data_item, strlen($m[0])); + + if (strtoupper($m[1]) == 'ESEARCH') { + $data_item = trim($data_item); + // remove MODSEQ response + if (preg_match('/\(MODSEQ ([0-9]+)\)$/i', $data_item, $m)) { + $data_item = substr($data_item, 0, -strlen($m[0])); + $this->params['MODSEQ'] = $m[1]; + } + // remove TAG response part + if (preg_match('/^\(TAG ["a-z0-9]+\)\s*/i', $data_item, $m)) { + $data_item = substr($data_item, strlen($m[0])); + } + // remove UID + $data_item = preg_replace('/^UID\s*/i', '', $data_item); + + // ESEARCH parameters + while (preg_match('/^([a-z]+) ([0-9:,]+)\s*/i', $data_item, $m)) { + $param = strtoupper($m[1]); + $value = $m[2]; + + $this->params[$param] = $value; + $data_item = substr($data_item, strlen($m[0])); + + if (in_array($param, ['COUNT', 'MIN', 'MAX'])) { + $this->meta[strtolower($param)] = (int) $value; + } + } + +// @TODO: Implement compression using compressMessageSet() in __sleep() and __wakeup() ? +// @TODO: work with compressed result?! + if (isset($this->params['ALL'])) { + $data_item = implode(self::SEPARATOR_ELEMENT, + rcube_imap_generic::uncompressMessageSet($this->params['ALL'])); + } + } + + break; + } + + unset($data[$i]); + } + + $data = array_filter($data); + + if (empty($data)) { + return; + } + + $data = array_first($data); + $data = trim($data); + $data = preg_replace('/[\r\n]/', '', $data); + $data = preg_replace('/\s+/', ' ', $data); + + $this->raw_data = $data; + } + + /** + * Checks the result from IMAP command + * + * @return bool True if the result is an error, False otherwise + */ + public function is_error() + { + return $this->raw_data === null; + } + + /** + * Checks if the result is empty + * + * @return bool True if the result is empty, False otherwise + */ + public function is_empty() + { + return empty($this->raw_data) + && empty($this->meta['max']) && empty($this->meta['min']) && empty($this->meta['count']); + } + + /** + * Returns number of elements in the result + * + * @return int Number of elements + */ + public function count() + { + if (isset($this->meta['count'])) { + return $this->meta['count']; + } + + if (empty($this->raw_data)) { + $this->meta['count'] = 0; + $this->meta['length'] = 0; + } + else { + $this->meta['count'] = 1 + substr_count($this->raw_data, self::SEPARATOR_ELEMENT); + } + + return $this->meta['count']; + } + + /** + * Returns number of elements in the result. + * Alias for count() for compatibility with rcube_result_thread + * + * @return int Number of elements + */ + public function count_messages() + { + return $this->count(); + } + + /** + * Returns maximal message identifier in the result + * + * @return int|null Maximal message identifier + */ + public function max() + { + if ($this->is_empty()) { + return null; + } + + if (!isset($this->meta['max'])) { + $this->meta['max'] = null; + $all = $this->get(); + if (!empty($all)) { + $this->meta['max'] = (int) max($all); + } + } + + return $this->meta['max']; + } + + /** + * Returns minimal message identifier in the result + * + * @return int|null Minimal message identifier + */ + public function min() + { + if ($this->is_empty()) { + return null; + } + + if (!isset($this->meta['min'])) { + $this->meta['min'] = null; + $all = $this->get(); + if (!empty($all)) { + $this->meta['min'] = (int) min($all); + } + } + + return $this->meta['min']; + } + + /** + * Slices data set. + * + * @param int $offset Offset (as for PHP's array_slice()) + * @param int $length Number of elements (as for PHP's array_slice()) + */ + public function slice($offset, $length) + { + $data = $this->get(); + $data = array_slice($data, $offset, $length); + + $this->meta = []; + $this->meta['count'] = count($data); + $this->raw_data = implode(self::SEPARATOR_ELEMENT, $data); + } + + /** + * Filters data set. Removes elements not listed in $ids list. + * + * @param array $ids List of IDs to remove. + */ + public function filter($ids = []) + { + $data = $this->get(); + $data = array_intersect($data, $ids); + + $this->meta = []; + $this->meta['count'] = count($data); + $this->raw_data = implode(self::SEPARATOR_ELEMENT, $data); + } + + /** + * Reverts order of elements in the result + */ + public function revert() + { + $this->order = $this->order == 'ASC' ? 'DESC' : 'ASC'; + + if (empty($this->raw_data)) { + return; + } + + $data = $this->get(); + $data = array_reverse($data); + $this->raw_data = implode(self::SEPARATOR_ELEMENT, $data); + + $this->meta['pos'] = []; + } + + /** + * Check if the given message ID exists in the object + * + * @param int $msgid Message ID + * @param bool $get_index When enabled element's index will be returned. + * Elements are indexed starting with 0 + * + * @return mixed False if message ID doesn't exist, True if exists or + * index of the element if $get_index=true + */ + public function exists($msgid, $get_index = false) + { + if (empty($this->raw_data)) { + return false; + } + + $msgid = (int) $msgid; + $begin = implode('|', ['^', preg_quote(self::SEPARATOR_ELEMENT, '/')]); + $end = implode('|', ['$', preg_quote(self::SEPARATOR_ELEMENT, '/')]); + + if (preg_match("/($begin)$msgid($end)/", $this->raw_data, $m, + $get_index ? PREG_OFFSET_CAPTURE : null) + ) { + if ($get_index) { + $idx = 0; + if (!empty($m[0][1])) { + $idx = 1 + substr_count($this->raw_data, self::SEPARATOR_ELEMENT, 0, $m[0][1]); + } + // cache position of this element, so we can use it in get_element() + $this->meta['pos'][$idx] = (int)$m[0][1]; + + return $idx; + } + + return true; + } + + return false; + } + + /** + * Return all messages in the result. + * + * @return array List of message IDs + */ + public function get() + { + if (empty($this->raw_data)) { + return []; + } + + return explode(self::SEPARATOR_ELEMENT, $this->raw_data); + } + + /** + * Return all messages in the result. + * + * @return array List of message IDs + */ + public function get_compressed() + { + if (empty($this->raw_data)) { + return ''; + } + + return rcube_imap_generic::compressMessageSet($this->get()); + } + + /** + * Return result element at specified index + * + * @param int|string $index Element's index or "FIRST" or "LAST" + * + * @return int|null Element value + */ + public function get_element($index) + { + if (empty($this->raw_data)) { + return null; + } + + $count = $this->count(); + + // first element + if ($index === 0 || $index === '0' || $index === 'FIRST') { + $pos = strpos($this->raw_data, self::SEPARATOR_ELEMENT); + if ($pos === false) { + $result = (int) $this->raw_data; + } + else { + $result = (int) substr($this->raw_data, 0, $pos); + } + + return $result; + } + + // last element + if ($index === 'LAST' || $index == $count-1) { + $pos = strrpos($this->raw_data, self::SEPARATOR_ELEMENT); + if ($pos === false) { + $result = (int) $this->raw_data; + } + else { + $result = (int) substr($this->raw_data, $pos); + } + + return $result; + } + + // do we know the position of the element or the neighbour of it? + if (!empty($this->meta['pos'])) { + if (isset($this->meta['pos'][$index])) { + $pos = $this->meta['pos'][$index]; + } + else if (isset($this->meta['pos'][$index-1])) { + $pos = strpos($this->raw_data, self::SEPARATOR_ELEMENT, + $this->meta['pos'][$index-1] + 1); + } + else if (isset($this->meta['pos'][$index+1])) { + $pos = strrpos($this->raw_data, self::SEPARATOR_ELEMENT, + $this->meta['pos'][$index+1] - $this->length() - 1); + } + + if (isset($pos) && preg_match('/([0-9]+)/', $this->raw_data, $m, null, $pos)) { + return (int) $m[1]; + } + } + + // Finally use less effective method + $data = explode(self::SEPARATOR_ELEMENT, $this->raw_data); + + return (int) $data[$index]; + } + + /** + * Returns response parameters, e.g. ESEARCH's MIN/MAX/COUNT/ALL/MODSEQ + * or internal data e.g. MAILBOX, ORDER + * + * @param string $param Parameter name + * + * @return array|string Response parameters or parameter value + */ + public function get_parameters($param=null) + { + $params = $this->params; + $params['MAILBOX'] = $this->mailbox; + $params['ORDER'] = $this->order; + + if ($param !== null) { + return $params[$param]; + } + + return $params; + } + + /** + * Returns length of internal data representation + * + * @return int Data length + */ + protected function length() + { + if (!isset($this->meta['length'])) { + $this->meta['length'] = strlen($this->raw_data); + } + + return $this->meta['length']; + } +} diff --git a/src/include/rcube_utils.php b/src/include/rcube_utils.php new file mode 100644 --- /dev/null +++ b/src/include/rcube_utils.php @@ -0,0 +1,1715 @@ + | + | Author: Aleksander Machniak | + +-----------------------------------------------------------------------+ +*/ + +/** + * Utility class providing common functions + * + * @package Framework + * @subpackage Utils + */ +class rcube_utils +{ + // define constants for input reading + const INPUT_GET = 1; + const INPUT_POST = 2; + const INPUT_COOKIE = 4; + const INPUT_GP = 3; // GET + POST + const INPUT_GPC = 7; // GET + POST + COOKIE + + + /** + * A wrapper for PHP's explode() that does not throw a warning + * when the separator does not exist in the string + * + * @param string $separator Separator string + * @param string $string The string to explode + * + * @return array Exploded string. Still an array if there's no separator in the string + */ + public static function explode($separator, $string) + { + if (strpos($string, $separator) !== false) { + return explode($separator, $string); + } + + return [$string, null]; + } + + /** + * Helper method to set a cookie with the current path and host settings + * + * @param string $name Cookie name + * @param string $value Cookie value + * @param int $exp Expiration time + * @param bool $http_only HTTP Only + */ + public static function setcookie($name, $value, $exp = 0, $http_only = true) + { + if (headers_sent()) { + return; + } + + $attrib = session_get_cookie_params(); + $attrib['expires'] = $exp; + $attrib['secure'] = $attrib['secure'] || self::https_check(); + $attrib['httponly'] = $http_only; + + // session_get_cookie_params() return includes 'lifetime' but setcookie() does not use it, instead it uses 'expires' + unset($attrib['lifetime']); + + if (version_compare(PHP_VERSION, '7.3.0', '>=')) { + // An alternative signature for setcookie supporting an options array added in PHP 7.3.0 + setcookie($name, $value, $attrib); + } + else { + setcookie($name, $value, $attrib['expires'], $attrib['path'], $attrib['domain'], $attrib['secure'], $attrib['httponly']); + } + } + + /** + * E-mail address validation. + * + * @param string $email Email address + * @param bool $dns_check True to check dns + * + * @return bool True on success, False if address is invalid + */ + public static function check_email($email, $dns_check = true) + { + // Check for invalid (control) characters + if (preg_match('/\p{Cc}/u', $email)) { + return false; + } + + // Check for length limit specified by RFC 5321 (#1486453) + if (strlen($email) > 254) { + return false; + } + + $pos = strrpos($email, '@'); + if (!$pos) { + return false; + } + + $domain_part = substr($email, $pos + 1); + $local_part = substr($email, 0, $pos); + + // quoted-string, make sure all backslashes and quotes are + // escaped + if (substr($local_part, 0, 1) == '"') { + $local_quoted = preg_replace('/\\\\(\\\\|\")/','', substr($local_part, 1, -1)); + if (preg_match('/\\\\|"/', $local_quoted)) { + return false; + } + } + // dot-atom portion, make sure there's no prohibited characters + else if (preg_match('/(^\.|\.\.|\.$)/', $local_part) + || preg_match('/[\\ ",:;<>@]/', $local_part) + ) { + return false; + } + + // Validate domain part + if (preg_match('/^\[((IPv6:[0-9a-f:.]+)|([0-9.]+))\]$/i', $domain_part, $matches)) { + return self::check_ip(preg_replace('/^IPv6:/i', '', $matches[1])); // valid IPv4 or IPv6 address + } + else { + // If not an IP address + $domain_array = explode('.', $domain_part); + // Not enough parts to be a valid domain + if (count($domain_array) < 2) { + return false; + } + + foreach ($domain_array as $part) { + if (!preg_match('/^((xn--)?([A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])|([A-Za-z0-9]))$/', $part)) { + return false; + } + } + + // last domain part (allow extended TLD) + $last_part = array_pop($domain_array); + if (strpos($last_part, 'xn--') !== 0 + && (preg_match('/[^a-zA-Z0-9]/', $last_part) || preg_match('/^[0-9]+$/', $last_part)) + ) { + return false; + } + + $rcube = rcube::get_instance(); + + if (!$dns_check || !function_exists('checkdnsrr') || !$rcube->config->get('email_dns_check')) { + return true; + } + + // Check DNS record(s) + // Note: We can't use ANY (#6581) + foreach (['A', 'MX', 'CNAME', 'AAAA'] as $type) { + if (checkdnsrr($domain_part, $type)) { + return true; + } + } + } + + return false; + } + + /** + * Validates IPv4 or IPv6 address + * + * @param string $ip IP address in v4 or v6 format + * + * @return bool True if the address is valid + */ + public static function check_ip($ip) + { + return filter_var($ip, FILTER_VALIDATE_IP) !== false; + } + + /** + * Replacing specials characters to a specific encoding type + * + * @param string $str Input string + * @param string $enctype Encoding type: text|html|xml|js|url + * @param string $mode Replace mode for tags: show|remove|strict + * @param bool $newlines Convert newlines + * + * @return string The quoted string + */ + public static function rep_specialchars_output($str, $enctype = '', $mode = '', $newlines = true) + { + static $html_encode_arr = false; + static $js_rep_table = false; + static $xml_rep_table = false; + + if (!is_string($str)) { + $str = strval($str); + } + + // encode for HTML output + if ($enctype == 'html') { + if (!$html_encode_arr) { + $html_encode_arr = get_html_translation_table(HTML_SPECIALCHARS); + unset($html_encode_arr['?']); + } + + $encode_arr = $html_encode_arr; + + if ($mode == 'remove') { + $str = strip_tags($str); + } + else if ($mode != 'strict') { + // don't replace quotes and html tags + $ltpos = strpos($str, '<'); + if ($ltpos !== false && strpos($str, '>', $ltpos) !== false) { + unset($encode_arr['"']); + unset($encode_arr['<']); + unset($encode_arr['>']); + unset($encode_arr['&']); + } + } + + $out = strtr($str, $encode_arr); + + return $newlines ? nl2br($out) : $out; + } + + // if the replace tables for XML and JS are not yet defined + if ($js_rep_table === false) { + $js_rep_table = $xml_rep_table = []; + $xml_rep_table['&'] = '&'; + + // can be increased to support more charsets + for ($c=160; $c<256; $c++) { + $xml_rep_table[chr($c)] = "&#$c;"; + } + + $xml_rep_table['"'] = '"'; + $js_rep_table['"'] = '\\"'; + $js_rep_table["'"] = "\\'"; + $js_rep_table["\\"] = "\\\\"; + // Unicode line and paragraph separators (#1486310) + $js_rep_table[chr(hexdec('E2')).chr(hexdec('80')).chr(hexdec('A8'))] = '
'; + $js_rep_table[chr(hexdec('E2')).chr(hexdec('80')).chr(hexdec('A9'))] = '
'; + } + + // encode for javascript use + if ($enctype == 'js') { + return preg_replace(["/\r?\n/", "/\r/", '/<\\//'], ['\n', '\n', '<\\/'], strtr($str, $js_rep_table)); + } + + // encode for plaintext + if ($enctype == 'text') { + return str_replace("\r\n", "\n", $mode == 'remove' ? strip_tags($str) : $str); + } + + if ($enctype == 'url') { + return rawurlencode($str); + } + + // encode for XML + if ($enctype == 'xml') { + return strtr($str, $xml_rep_table); + } + + // no encoding given -> return original string + return $str; + } + + /** + * Read input value and make sure it is a string. + * + * @param string $fname Field name to read + * @param int $source Source to get value from (see self::INPUT_*) + * @param bool $allow_html Allow HTML tags in field value + * @param string $charset Charset to convert into + * + * @return string Request parameter value + * @see self::get_input_value() + */ + public static function get_input_string($fname, $source, $allow_html = false, $charset = null) + { + $value = self::get_input_value($fname, $source, $allow_html, $charset); + + return is_string($value) ? $value : ''; + } + + /** + * Read request parameter value and convert it for internal use + * Performs stripslashes() and charset conversion if necessary + * + * @param string $fname Field name to read + * @param int $source Source to get value from (see self::INPUT_*) + * @param bool $allow_html Allow HTML tags in field value + * @param string $charset Charset to convert into + * + * @return string|array|null Request parameter value or NULL if not set + */ + public static function get_input_value($fname, $source, $allow_html = false, $charset = null) + { + $value = null; + + if (($source & self::INPUT_GET) && isset($_GET[$fname])) { + $value = $_GET[$fname]; + } + + if (($source & self::INPUT_POST) && isset($_POST[$fname])) { + $value = $_POST[$fname]; + } + + if (($source & self::INPUT_COOKIE) && isset($_COOKIE[$fname])) { + $value = $_COOKIE[$fname]; + } + + return self::parse_input_value($value, $allow_html, $charset); + } + + /** + * Parse/validate input value. See self::get_input_value() + * Performs stripslashes() and charset conversion if necessary + * + * @param string $value Input value + * @param bool $allow_html Allow HTML tags in field value + * @param string $charset Charset to convert into + * + * @return string Parsed value + */ + public static function parse_input_value($value, $allow_html = false, $charset = null) + { + if (empty($value)) { + return $value; + } + + if (is_array($value)) { + foreach ($value as $idx => $val) { + $value[$idx] = self::parse_input_value($val, $allow_html, $charset); + } + + return $value; + } + + // remove HTML tags if not allowed + if (!$allow_html) { + $value = strip_tags($value); + } + + $rcube = rcube::get_instance(); + $output_charset = is_object($rcube->output) ? $rcube->output->get_charset() : null; + + // remove invalid characters (#1488124) + if ($output_charset == 'UTF-8') { + $value = rcube_charset::clean($value); + } + + // convert to internal charset + if ($charset && $output_charset) { + $value = rcube_charset::convert($value, $output_charset, $charset); + } + + return $value; + } + + /** + * Convert array of request parameters (prefixed with _) + * to a regular array with non-prefixed keys. + * + * @param int $mode Source to get value from (GPC) + * @param string $ignore PCRE expression to skip parameters by name + * @param bool $allow_html Allow HTML tags in field value + * + * @return array Hash array with all request parameters + */ + public static function request2param($mode = null, $ignore = 'task|action', $allow_html = false) + { + $out = []; + $src = $mode == self::INPUT_GET ? $_GET : ($mode == self::INPUT_POST ? $_POST : $_REQUEST); + + foreach (array_keys($src) as $key) { + $fname = $key[0] == '_' ? substr($key, 1) : $key; + if ($ignore && !preg_match('/^(' . $ignore . ')$/', $fname)) { + $out[$fname] = self::get_input_value($key, $mode, $allow_html); + } + } + + return $out; + } + + /** + * Convert the given string into a valid HTML identifier + * Same functionality as done in app.js with rcube_webmail.html_identifier() + * + * @param string $str String input + * @param bool $encode Use base64 encoding + * + * @param string Valid HTML identifier + */ + public static function html_identifier($str, $encode = false) + { + if ($encode) { + return rtrim(strtr(base64_encode($str), '+/', '-_'), '='); + } + + return asciiwords($str, true, '_'); + } + + /** + * Replace all css definitions with #container [def] + * and remove css-inlined scripting, make position style safe + * + * @param string $source CSS source code + * @param string $container_id Container ID to use as prefix + * @param bool $allow_remote Allow remote content + * @param string $prefix Prefix to be added to id/class identifier + * + * @return string Modified CSS source + */ + public static function mod_css_styles($source, $container_id, $allow_remote = false, $prefix = '') + { + $last_pos = 0; + $replacements = new rcube_string_replacer; + + // ignore the whole block if evil styles are detected + $source = self::xss_entity_decode($source); + $stripped = preg_replace('/[^a-z\(:;]/i', '', $source); + $evilexpr = 'expression|behavior|javascript:|import[^a]' . (!$allow_remote ? '|url\((?!data:image)' : ''); + + if (preg_match("/$evilexpr/i", $stripped)) { + return '/* evil! */'; + } + + $strict_url_regexp = '!url\s*\(\s*["\']?(https?:)//[a-z0-9/._+-]+["\']?\s*\)!Uims'; + + // remove html comments + $source = preg_replace('/(^\s*<\!--)|(-->\s*$)/m', '', $source); + + // cut out all contents between { and } + while (($pos = strpos($source, '{', $last_pos)) && ($pos2 = strpos($source, '}', $pos))) { + $nested = strpos($source, '{', $pos+1); + if ($nested && $nested < $pos2) { // when dealing with nested blocks (e.g. @media), take the inner one + $pos = $nested; + } + $length = $pos2 - $pos - 1; + $styles = substr($source, $pos+1, $length); + $output = ''; + + // check every css rule in the style block... + foreach (self::parse_css_block($styles) as $rule) { + // Remove 'page' attributes (#7604) + if ($rule[0] == 'page') { + continue; + } + + // Convert position:fixed to position:absolute (#5264) + if ($rule[0] == 'position' && strcasecmp($rule[1], 'fixed') === 0) { + $rule[1] = 'absolute'; + } + else if ($allow_remote) { + $stripped = preg_replace('/[^a-z\(:;]/i', '', $rule[1]); + + // allow data:image and strict url() values only + if ( + stripos($stripped, 'url(') !== false + && stripos($stripped, 'url(data:image') === false + && !preg_match($strict_url_regexp, $rule[1]) + ) { + $rule[1] = '/* evil! */'; + } + } + + $output .= sprintf(" %s: %s;", $rule[0] , $rule[1]); + } + + $key = $replacements->add($output . ' '); + $repl = $replacements->get_replacement($key); + $source = substr_replace($source, $repl, $pos+1, $length); + $last_pos = $pos2 - ($length - strlen($repl)); + } + + // add #container to each tag selector and prefix to id/class identifiers + if ($container_id || $prefix) { + // Exclude rcube_string_replacer pattern matches, this is needed + // for cases like @media { body { position: fixed; } } (#5811) + $excl = '(?!' . substr($replacements->pattern, 1, -1) . ')'; + $regexp = '/(^\s*|,\s*|\}\s*|\{\s*)(' . $excl . ':?[a-z0-9\._#\*\[][a-z0-9\._:\(\)#=~ \[\]"\|\>\+\$\^-]*)/im'; + $callback = function($matches) use ($container_id, $prefix) { + $replace = $matches[2]; + + if (stripos($replace, ':root') === 0) { + $replace = substr($replace, 5); + } + + if ($prefix) { + $replace = str_replace(['.', '#'], [".$prefix", "#$prefix"], $replace); + } + + if ($container_id) { + $replace = "#$container_id " . $replace; + } + + // Remove redundant spaces (for simpler testing) + $replace = preg_replace('/\s+/', ' ', $replace); + + return str_replace($matches[2], $replace, $matches[0]); + }; + + $source = preg_replace_callback($regexp, $callback, $source); + } + + // replace body definition because we also stripped off the tag + if ($container_id) { + $regexp = '/#' . preg_quote($container_id, '/') . '\s+body/i'; + $source = preg_replace($regexp, "#$container_id", $source); + } + + // put block contents back in + $source = $replacements->resolve($source); + + return $source; + } + + /** + * Explode css style. Property names will be lower-cased and trimmed. + * Values will be trimmed. Invalid entries will be skipped. + * + * @param string $style CSS style + * + * @return array List of CSS rule pairs, e.g. [['color', 'red'], ['top', '0']] + */ + public static function parse_css_block($style) + { + $pos = 0; + + // first remove comments + while (($pos = strpos($style, '/*', $pos)) !== false) { + $end = strpos($style, '*/', $pos+2); + + if ($end === false) { + $style = substr($style, 0, $pos); + } + else { + $style = substr_replace($style, '', $pos, $end - $pos + 2); + } + } + + // Replace new lines with spaces + $style = preg_replace('/[\r\n]+/', ' ', $style); + + $style = trim($style); + $length = strlen($style); + $result = []; + $pos = 0; + + while ($pos < $length && ($colon_pos = strpos($style, ':', $pos))) { + // Property name + $name = strtolower(trim(substr($style, $pos, $colon_pos - $pos))); + + // get the property value + $q = $s = false; + for ($i = $colon_pos + 1; $i < $length; $i++) { + if (($style[$i] == "\"" || $style[$i] == "'") && ($i == 0 || $style[$i-1] != "\\")) { + if ($q == $style[$i]) { + $q = false; + } + else if ($q === false) { + $q = $style[$i]; + } + } + else if ($style[$i] == "(" && !$q && ($i == 0 || $style[$i-1] != "\\")) { + $q = "("; + } + else if ($style[$i] == ")" && $q == "(" && $style[$i-1] != "\\") { + $q = false; + } + + if ($q === false && (($s = $style[$i] == ';') || $i == $length - 1)) { + break; + } + } + + $value_length = $i - $colon_pos - ($s ? 1 : 0); + $value = trim(substr($style, $colon_pos + 1, $value_length)); + + if (strlen($name) && !preg_match('/[^a-z-]/', $name) && strlen($value) && $value !== ';') { + $result[] = [$name, $value]; + } + + $pos = $i + 1; + } + + return $result; + } + + /** + * Generate CSS classes from mimetype and filename extension + * + * @param string $mimetype Mimetype + * @param string $filename Filename + * + * @return string CSS classes separated by space + */ + public static function file2class($mimetype, $filename) + { + $mimetype = strtolower($mimetype); + $filename = strtolower($filename); + + list($primary, $secondary) = rcube_utils::explode('/', $mimetype); + + $classes = [$primary ?: 'unknown']; + + if (!empty($secondary)) { + $classes[] = $secondary; + } + + if (preg_match('/\.([a-z0-9]+)$/', $filename, $m)) { + if (!in_array($m[1], $classes)) { + $classes[] = $m[1]; + } + } + + return implode(' ', $classes); + } + + /** + * Decode escaped entities used by known XSS exploits. + * See http://downloads.securityfocus.com/vulnerabilities/exploits/26800.eml for examples + * + * @param string $content CSS content to decode + * + * @return string Decoded string + */ + public static function xss_entity_decode($content) + { + $callback = function($matches) { return chr(hexdec($matches[1])); }; + + $out = html_entity_decode(html_entity_decode($content)); + $out = trim(preg_replace('/(^$)/', '', trim($out))); + $out = preg_replace_callback('/\\\([0-9a-f]{2,6})\s*/i', $callback, $out); + $out = preg_replace('/\\\([^0-9a-f])/i', '\\1', $out); + $out = preg_replace('#/\*.*\*/#Ums', '', $out); + $out = strip_tags($out); + + return $out; + } + + /** + * Check if we can process not exceeding memory_limit + * + * @param int $need Required amount of memory + * + * @return bool True if memory won't be exceeded, False otherwise + */ + public static function mem_check($need) + { + $mem_limit = parse_bytes(ini_get('memory_limit')); + $memory = function_exists('memory_get_usage') ? memory_get_usage() : 16*1024*1024; // safe value: 16MB + + return $mem_limit > 0 && $memory + $need > $mem_limit ? false : true; + } + + /** + * Check if working in SSL mode + * + * @param int $port HTTPS port number + * @param bool $use_https Enables 'use_https' option checking + * + * @return bool True in SSL mode, False otherwise + */ + public static function https_check($port = null, $use_https = true) + { + if (!empty($_SERVER['HTTPS']) && strtolower($_SERVER['HTTPS']) != 'off') { + return true; + } + + if (!empty($_SERVER['HTTP_X_FORWARDED_PROTO']) + && strtolower($_SERVER['HTTP_X_FORWARDED_PROTO']) == 'https' + && in_array($_SERVER['REMOTE_ADDR'], (array) rcube::get_instance()->config->get('proxy_whitelist', [])) + ) { + return true; + } + + if ($port && isset($_SERVER['SERVER_PORT']) && $_SERVER['SERVER_PORT'] == $port) { + return true; + } + + if ($use_https && rcube::get_instance()->config->get('use_https')) { + return true; + } + + return false; + } + + /** + * Replaces hostname variables. + * + * @param string $name Hostname + * @param string $host Optional IMAP hostname + * + * @return string Hostname + */ + public static function parse_host($name, $host = '') + { + if (!is_string($name)) { + return $name; + } + + // %n - host + $n = self::server_name(); + // %t - host name without first part, e.g. %n=mail.domain.tld, %t=domain.tld + // If %n=domain.tld then %t=domain.tld as well (remains valid) + $t = preg_replace('/^[^.]+\.(?![^.]+$)/', '', $n); + // %d - domain name without first part (up to domain.tld) + $d = preg_replace('/^[^.]+\.(?![^.]+$)/', '', self::server_name('HTTP_HOST')); + // %h - IMAP host + $h = !empty($_SESSION['storage_host']) ? $_SESSION['storage_host'] : $host; + // %z - IMAP domain without first part, e.g. %h=imap.domain.tld, %z=domain.tld + // If %h=domain.tld then %z=domain.tld as well (remains valid) + $z = preg_replace('/^[^.]+\.(?![^.]+$)/', '', $h); + // %s - domain name after the '@' from e-mail address provided at login screen. + // Returns FALSE if an invalid email is provided + $s = ''; + if (strpos($name, '%s') !== false) { + $user_email = self::idn_to_ascii(self::get_input_value('_user', self::INPUT_POST)); + $matches = preg_match('/(.*)@([a-z0-9\.\-\[\]\:]+)/i', $user_email, $s); + if ($matches < 1 || filter_var($s[1]."@".$s[2], FILTER_VALIDATE_EMAIL) === false) { + return false; + } + $s = $s[2]; + } + + return str_replace(['%n', '%t', '%d', '%h', '%z', '%s'], [$n, $t, $d, $h, $z, $s], $name); + } + + /** + * Returns the server name after checking it against trusted hostname patterns. + * + * Returns 'localhost' and logs a warning when the hostname is not trusted. + * + * @param string $type The $_SERVER key, e.g. 'HTTP_HOST', Default: 'SERVER_NAME'. + * @param bool $strip_port Strip port from the host name + * + * @return string Server name + */ + public static function server_name($type = null, $strip_port = true) + { + if (!$type) { + $type = 'SERVER_NAME'; + } + + $name = isset($_SERVER[$type]) ? $_SERVER[$type] : null; + $rcube = rcube::get_instance(); + $patterns = (array) $rcube->config->get('trusted_host_patterns'); + + if (!empty($name)) { + if ($strip_port) { + $name = preg_replace('/:\d+$/', '', $name); + } + + if (empty($patterns)) { + return $name; + } + + foreach ($patterns as $pattern) { + // the pattern might be a regular expression or just a host/domain name + if (preg_match('/[^a-zA-Z0-9.:-]/', $pattern)) { + if (preg_match("/$pattern/", $name)) { + return $name; + } + } + else if (strtolower($name) === strtolower($pattern)) { + return $name; + } + } + + $rcube->raise_error([ + 'file' => __FILE__, 'line' => __LINE__, + 'message' => "Specified host is not trusted. Using 'localhost'." + ] + , true, false + ); + } + + return 'localhost'; + } + + /** + * Returns remote IP address and forwarded addresses if found + * + * @return string Remote IP address(es) + */ + public static function remote_ip() + { + $address = isset($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : ''; + + // append the NGINX X-Real-IP header, if set + if (!empty($_SERVER['HTTP_X_REAL_IP']) && $_SERVER['HTTP_X_REAL_IP'] != $address) { + $remote_ip[] = 'X-Real-IP: ' . $_SERVER['HTTP_X_REAL_IP']; + } + + // append the X-Forwarded-For header, if set + if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) { + $remote_ip[] = 'X-Forwarded-For: ' . $_SERVER['HTTP_X_FORWARDED_FOR']; + } + + if (!empty($remote_ip)) { + $address .= ' (' . implode(',', $remote_ip) . ')'; + } + + return $address; + } + + /** + * Returns the real remote IP address + * + * @return string Remote IP address + */ + public static function remote_addr() + { + // Check if any of the headers are set first to improve performance + if (!empty($_SERVER['HTTP_X_FORWARDED_FOR']) || !empty($_SERVER['HTTP_X_REAL_IP'])) { + $proxy_whitelist = (array) rcube::get_instance()->config->get('proxy_whitelist', []); + if (in_array($_SERVER['REMOTE_ADDR'], $proxy_whitelist)) { + if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) { + foreach (array_reverse(explode(',', $_SERVER['HTTP_X_FORWARDED_FOR'])) as $forwarded_ip) { + $forwarded_ip = trim($forwarded_ip); + if (!in_array($forwarded_ip, $proxy_whitelist)) { + return $forwarded_ip; + } + } + } + + if (!empty($_SERVER['HTTP_X_REAL_IP'])) { + return $_SERVER['HTTP_X_REAL_IP']; + } + } + } + + if (!empty($_SERVER['REMOTE_ADDR'])) { + return $_SERVER['REMOTE_ADDR']; + } + + return ''; + } + + /** + * Read a specific HTTP request header. + * + * @param string $name Header name + * + * @return string|null Header value or null if not available + */ + public static function request_header($name) + { + if (function_exists('apache_request_headers')) { + $headers = apache_request_headers(); + $key = strtoupper($name); + } + else { + $headers = $_SERVER; + $key = 'HTTP_' . strtoupper(strtr($name, '-', '_')); + } + + if (!empty($headers)) { + $headers = array_change_key_case($headers, CASE_UPPER); + + return isset($headers[$key]) ? $headers[$key] : null; + } + } + + /** + * Explode quoted string + * + * @param string $delimiter Delimiter expression string for preg_match() + * @param string $string Input string + * + * @return array String items + */ + public static function explode_quoted_string($delimiter, $string) + { + $result = []; + $strlen = strlen($string); + + for ($q=$p=$i=0; $i < $strlen; $i++) { + if ($string[$i] == "\"" && (!isset($string[$i-1]) || $string[$i-1] != "\\")) { + $q = $q ? false : true; + } + else if (!$q && preg_match("/$delimiter/", $string[$i])) { + $result[] = substr($string, $p, $i - $p); + $p = $i + 1; + } + } + + $result[] = (string) substr($string, $p); + + return $result; + } + + /** + * Improved equivalent to strtotime() + * + * @param string $date Date string + * @param DateTimeZone $timezone Timezone to use for DateTime object + * + * @return int Unix timestamp + */ + public static function strtotime($date, $timezone = null) + { + $date = self::clean_datestr($date); + $tzname = $timezone ? ' ' . $timezone->getName() : ''; + + // unix timestamp + if (is_numeric($date)) { + return (int) $date; + } + + // It can be very slow when provided string is not a date and very long + if (strlen($date) > 128) { + $date = substr($date, 0, 128); + } + + // if date parsing fails, we have a date in non-rfc format. + // remove token from the end and try again + while (($ts = @strtotime($date . $tzname)) === false || $ts < 0) { + if (($pos = strrpos($date, ' ')) === false) { + break; + } + + $date = rtrim(substr($date, 0, $pos)); + } + + return (int) $ts; + } + + /** + * Date parsing function that turns the given value into a DateTime object + * + * @param string $date Date string + * @param DateTimeZone $timezone Timezone to use for DateTime object + * + * @return DateTime|false DateTime object or False on failure + */ + public static function anytodatetime($date, $timezone = null) + { + if ($date instanceof DateTime) { + return $date; + } + + $dt = false; + $date = self::clean_datestr($date); + + // try to parse string with DateTime first + if (!empty($date)) { + try { + $_date = preg_match('/^[0-9]+$/', $date) ? "@$date" : $date; + $dt = $timezone ? new DateTime($_date, $timezone) : new DateTime($_date); + } + catch (Exception $e) { + // ignore + } + } + + // try our advanced strtotime() method + if (!$dt && ($timestamp = self::strtotime($date, $timezone))) { + try { + $dt = new DateTime("@".$timestamp); + if ($timezone) { + $dt->setTimezone($timezone); + } + } + catch (Exception $e) { + // ignore + } + } + + return $dt; + } + + /** + * Clean up date string for strtotime() input + * + * @param string $date Date string + * + * @return string Date string + */ + public static function clean_datestr($date) + { + $date = trim($date); + + // check for MS Outlook vCard date format YYYYMMDD + if (preg_match('/^([12][90]\d\d)([01]\d)([0123]\d)$/', $date, $m)) { + return sprintf('%04d-%02d-%02d 00:00:00', intval($m[1]), intval($m[2]), intval($m[3])); + } + + // Clean malformed data + $date = preg_replace( + [ + '/\(.*\)/', // remove RFC comments + '/GMT\s*([+-][0-9]+)/', // support non-standard "GMTXXXX" literal + '/[^a-z0-9\x20\x09:\/\.+-]/i', // remove any invalid characters + '/\s*(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s*/i', // remove weekday names + ], + [ + '', + '\\1', + '', + '', + ], + $date + ); + + $date = trim($date); + + // try to fix dd/mm vs. mm/dd discrepancy, we can't do more here + if (preg_match('/^(\d{1,2})[.\/-](\d{1,2})[.\/-](\d{4})(\s.*)?$/', $date, $m)) { + $mdy = $m[2] > 12 && $m[1] <= 12; + $day = $mdy ? $m[2] : $m[1]; + $month = $mdy ? $m[1] : $m[2]; + $date = sprintf('%04d-%02d-%02d%s', $m[3], $month, $day, isset($m[4]) ? $m[4]: ' 00:00:00'); + } + // I've found that YYYY.MM.DD is recognized wrong, so here's a fix + else if (preg_match('/^(\d{4})\.(\d{1,2})\.(\d{1,2})(\s.*)?$/', $date, $m)) { + $date = sprintf('%04d-%02d-%02d%s', $m[1], $m[2], $m[3], isset($m[4]) ? $m[4]: ' 00:00:00'); + } + + return $date; + } + + /** + * Turns the given date-only string in defined format into YYYY-MM-DD format. + * + * Supported formats: 'Y/m/d', 'Y.m.d', 'd-m-Y', 'd/m/Y', 'd.m.Y', 'j.n.Y' + * + * @param string $date Date string + * @param string $format Input date format + * + * @return string Date string in YYYY-MM-DD format, or the original string + * if format is not supported + */ + public static function format_datestr($date, $format) + { + $format_items = preg_split('/[.-\/\\\\]/', $format); + $date_items = preg_split('/[.-\/\\\\]/', $date); + $iso_format = '%04d-%02d-%02d'; + + if (count($format_items) == 3 && count($date_items) == 3) { + if ($format_items[0] == 'Y') { + $date = sprintf($iso_format, $date_items[0], $date_items[1], $date_items[2]); + } + else if (strpos('dj', $format_items[0]) !== false) { + $date = sprintf($iso_format, $date_items[2], $date_items[1], $date_items[0]); + } + else if (strpos('mn', $format_items[0]) !== false) { + $date = sprintf($iso_format, $date_items[2], $date_items[0], $date_items[1]); + } + } + + return $date; + } + + /** + * Wrapper for idn_to_ascii with support for e-mail address. + * + * Warning: Domain names may be lowercase'd. + * Warning: An empty string may be returned on invalid domain. + * + * @param string $str Decoded e-mail address + * + * @return string Encoded e-mail address + */ + public static function idn_to_ascii($str) + { + return self::idn_convert($str, true); + } + + /** + * Wrapper for idn_to_utf8 with support for e-mail address + * + * @param string $str Decoded e-mail address + * + * @return string Encoded e-mail address + */ + public static function idn_to_utf8($str) + { + return self::idn_convert($str, false); + } + + /** + * Convert a string to ascii or utf8 (using IDNA standard) + * + * @param string $input Decoded e-mail address + * @param boolean $is_utf Convert by idn_to_ascii if true and idn_to_utf8 if false + * + * @return string Encoded e-mail address + */ + public static function idn_convert($input, $is_utf = false) + { + if ($at = strpos($input, '@')) { + $user = substr($input, 0, $at); + $domain = substr($input, $at + 1); + } + else { + $user = ''; + $domain = $input; + } + + // Note that in PHP 7.2/7.3 calling idn_to_* functions with default arguments + // throws a warning, so we have to set the variant explicitly (#6075) + $variant = defined('INTL_IDNA_VARIANT_UTS46') ? INTL_IDNA_VARIANT_UTS46 : null; + $options = 0; + + // Because php-intl extension lowercases domains and return false + // on invalid input (#6224), we skip conversion when not needed + + if ($is_utf) { + if (preg_match('/[^\x20-\x7E]/', $domain)) { + $options = defined('IDNA_NONTRANSITIONAL_TO_ASCII') ? IDNA_NONTRANSITIONAL_TO_ASCII : 0; + $domain = idn_to_ascii($domain, $options, $variant); + } + } + else if (preg_match('/(^|\.)xn--/i', $domain)) { + $options = defined('IDNA_NONTRANSITIONAL_TO_UNICODE') ? IDNA_NONTRANSITIONAL_TO_UNICODE : 0; + $domain = idn_to_utf8($domain, $options, $variant); + } + + if ($domain === false) { + return ''; + } + + return $at ? $user . '@' . $domain : $domain; + } + + /** + * Split the given string into word tokens + * + * @param string $str Input to tokenize + * @param int $minlen Minimum length of a single token + * + * @return array List of tokens + */ + public static function tokenize_string($str, $minlen = 2) + { + $expr = ['/[\s;,"\'\/+-]+/ui', '/(\d)[-.\s]+(\d)/u']; + $repl = [' ', '\\1\\2']; + + if ($minlen > 1) { + $minlen--; + $expr[] = "/(^|\s+)\w{1,$minlen}(\s+|$)/u"; + $repl[] = ' '; + } + + return array_filter(explode(" ", preg_replace($expr, $repl, $str))); + } + + /** + * Normalize the given string for fulltext search. + * Currently only optimized for ISO-8859-1 and ISO-8859-2 characters; to be extended + * + * @param string $str Input string (UTF-8) + * @param bool $as_array True to return list of words as array + * @param int $minlen Minimum length of tokens + * + * @return string|array Normalized string or a list of normalized tokens + */ + public static function normalize_string($str, $as_array = false, $minlen = 2) + { + // replace 4-byte unicode characters with '?' character, + // these are not supported in default utf-8 charset on mysql, + // the chance we'd need them in searching is very low + $str = preg_replace('/(' + . '\xF0[\x90-\xBF][\x80-\xBF]{2}' + . '|[\xF1-\xF3][\x80-\xBF]{3}' + . '|\xF4[\x80-\x8F][\x80-\xBF]{2}' + . ')/', '?', $str); + + // split by words + $arr = self::tokenize_string($str, $minlen); + + // detect character set + if (rcube_charset::convert(rcube_charset::convert($str, 'UTF-8', 'ISO-8859-1'), 'ISO-8859-1', 'UTF-8') == $str) { + // ISO-8859-1 (or ASCII) + preg_match_all('/./u', 'äâàåáãæçéêëèïîìíñöôòøõóüûùúýÿ', $keys); + preg_match_all('/./', 'aaaaaaaceeeeiiiinoooooouuuuyy', $values); + + $mapping = array_combine($keys[0], $values[0]); + $mapping = array_merge($mapping, ['ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u']); + } + else if (rcube_charset::convert(rcube_charset::convert($str, 'UTF-8', 'ISO-8859-2'), 'ISO-8859-2', 'UTF-8') == $str) { + // ISO-8859-2 + preg_match_all('/./u', 'ąáâäćçčéęëěíîłľĺńňóôöŕřśšşťţůúűüźžżý', $keys); + preg_match_all('/./', 'aaaaccceeeeiilllnnooorrsssttuuuuzzzy', $values); + + $mapping = array_combine($keys[0], $values[0]); + $mapping = array_merge($mapping, ['ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u']); + } + + foreach ($arr as $i => $part) { + $part = mb_strtolower($part); + + if (!empty($mapping)) { + $part = strtr($part, $mapping); + } + + $arr[$i] = $part; + } + + return $as_array ? $arr : implode(' ', $arr); + } + + /** + * Compare two strings for matching words (order not relevant) + * + * @param string $haystack Haystack + * @param string $needle Needle + * + * @return bool True if match, False otherwise + */ + public static function words_match($haystack, $needle) + { + $a_needle = self::tokenize_string($needle, 1); + $_haystack = implode(' ', self::tokenize_string($haystack, 1)); + $valid = strlen($_haystack) > 0; + $hits = 0; + + foreach ($a_needle as $w) { + if ($valid) { + if (stripos($_haystack, $w) !== false) { + $hits++; + } + } + else if (stripos($haystack, $w) !== false) { + $hits++; + } + } + + return $hits >= count($a_needle); + } + + /** + * Parse commandline arguments into a hash array + * + * @param array $aliases Argument alias names + * + * @return array Argument values hash + */ + public static function get_opt($aliases = []) + { + $args = []; + $bool = []; + + // find boolean (no value) options + foreach ($aliases as $key => $alias) { + if ($pos = strpos($alias, ':')) { + $aliases[$key] = substr($alias, 0, $pos); + $bool[] = $key; + $bool[] = $aliases[$key]; + } + } + + for ($i=1; $i < count($_SERVER['argv']); $i++) { + $arg = $_SERVER['argv'][$i]; + $value = true; + $key = null; + + if ($arg[0] == '-') { + $key = preg_replace('/^-+/', '', $arg); + $sp = strpos($arg, '='); + + if ($sp > 0) { + $key = substr($key, 0, $sp - 2); + $value = substr($arg, $sp+1); + } + else if (in_array($key, $bool)) { + $value = true; + } + else if ( + isset($_SERVER['argv'][$i + 1]) + && strlen($_SERVER['argv'][$i + 1]) + && $_SERVER['argv'][$i + 1][0] != '-' + ) { + $value = $_SERVER['argv'][++$i]; + } + + $args[$key] = is_string($value) ? preg_replace(['/^["\']/', '/["\']$/'], '', $value) : $value; + } + else { + $args[] = $arg; + } + + if (!empty($aliases[$key])) { + $alias = $aliases[$key]; + $args[$alias] = $args[$key]; + } + } + + return $args; + } + + /** + * Safe password prompt for command line + * from http://blogs.sitepoint.com/2009/05/01/interactive-cli-password-prompt-in-php/ + * + * @param string $prompt Prompt text + * + * @return string Password + */ + public static function prompt_silent($prompt = "Password:") + { + if (preg_match('/^win/i', PHP_OS)) { + $vbscript = sys_get_temp_dir() . 'prompt_password.vbs'; + $vbcontent = 'wscript.echo(InputBox("' . addslashes($prompt) . '", "", "password here"))'; + file_put_contents($vbscript, $vbcontent); + + $command = "cscript //nologo " . escapeshellarg($vbscript); + $password = rtrim(shell_exec($command)); + unlink($vbscript); + + return $password; + } + + $command = "/usr/bin/env bash -c 'echo OK'"; + + if (rtrim(shell_exec($command)) !== 'OK') { + echo $prompt; + $pass = trim(fgets(STDIN)); + echo chr(8)."\r" . $prompt . str_repeat("*", strlen($pass))."\n"; + + return $pass; + } + + $command = "/usr/bin/env bash -c 'read -s -p \"" . addslashes($prompt) . "\" mypassword && echo \$mypassword'"; + $password = rtrim(shell_exec($command)); + echo "\n"; + + return $password; + } + + /** + * Find out if the string content means true or false + * + * @param string $str Input value + * + * @return bool Boolean value + */ + public static function get_boolean($str) + { + $str = strtolower($str); + + return !in_array($str, ['false', '0', 'no', 'off', 'nein', ''], true); + } + + /** + * OS-dependent absolute path detection + * + * @param string $path File path + * + * @return bool True if the path is absolute, False otherwise + */ + public static function is_absolute_path($path) + { + if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') { + return (bool) preg_match('!^[a-z]:[\\\\/]!i', $path); + } + + return isset($path[0]) && $path[0] == '/'; + } + + /** + * Resolve relative URL + * + * @param string $url Relative URL + * + * @return string Absolute URL + */ + public static function resolve_url($url) + { + // prepend protocol://hostname:port + if (!preg_match('|^https?://|', $url)) { + $schema = 'http'; + $default_port = 80; + + if (self::https_check()) { + $schema = 'https'; + $default_port = 443; + } + + $host = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : null; + $port = isset($_SERVER['SERVER_PORT']) ? $_SERVER['SERVER_PORT'] : null; + + $prefix = $schema . '://' . preg_replace('/:\d+$/', '', $host); + if ($port != $default_port && $port != 80) { + $prefix .= ':' . $port; + } + + $url = $prefix . ($url[0] == '/' ? '' : '/') . $url; + } + + return $url; + } + + /** + * Generate a random string + * + * @param int $length String length + * @param bool $raw Return RAW data instead of ascii + * + * @return string The generated random string + */ + public static function random_bytes($length, $raw = false) + { + $hextab = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + $tabsize = strlen($hextab); + + // Use PHP7 true random generator + if ($raw && function_exists('random_bytes')) { + return random_bytes($length); + } + + if (!$raw && function_exists('random_int')) { + $result = ''; + while ($length-- > 0) { + $result .= $hextab[random_int(0, $tabsize - 1)]; + } + + return $result; + } + + $random = openssl_random_pseudo_bytes($length); + + if ($random === false && $length > 0) { + throw new Exception("Failed to get random bytes"); + } + + if (!$raw) { + for ($x = 0; $x < $length; $x++) { + $random[$x] = $hextab[ord($random[$x]) % $tabsize]; + } + } + + return $random; + } + + /** + * Convert binary data into readable form (containing a-zA-Z0-9 characters) + * + * @param string $input Binary input + * + * @return string Readable output (Base62) + * @deprecated since 1.3.1 + */ + public static function bin2ascii($input) + { + $hextab = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + $result = ''; + + for ($x = 0; $x < strlen($input); $x++) { + $result .= $hextab[ord($input[$x]) % 62]; + } + + return $result; + } + + /** + * Format current date according to specified format. + * This method supports microseconds (u). + * + * @param string $format Date format (default: 'd-M-Y H:i:s O') + * + * @return string Formatted date + */ + public static function date_format($format = null) + { + if (empty($format)) { + $format = 'd-M-Y H:i:s O'; + } + + if (strpos($format, 'u') !== false) { + $dt = number_format(microtime(true), 6, '.', ''); + + try { + $date = date_create_from_format('U.u', $dt); + $date->setTimeZone(new DateTimeZone(date_default_timezone_get())); + + return $date->format($format); + } + catch (Exception $e) { + // ignore, fallback to date() + } + } + + return date($format); + } + + /** + * Parses socket options and returns options for specified hostname. + * + * @param array &$options Configured socket options + * @param string $host Hostname + */ + public static function parse_socket_options(&$options, $host = null) + { + if (empty($host) || empty($options)) { + return; + } + + // get rid of schema and port from the hostname + $host_url = parse_url($host); + if (isset($host_url['host'])) { + $host = $host_url['host']; + } + + // find per-host options + if ($host && array_key_exists($host, $options)) { + $options = $options[$host]; + } + } + + /** + * Get maximum upload size + * + * @return int Maximum size in bytes + */ + public static function max_upload_size() + { + // find max filesize value + $max_filesize = parse_bytes(ini_get('upload_max_filesize')); + $max_postsize = parse_bytes(ini_get('post_max_size')); + + if ($max_postsize && $max_postsize < $max_filesize) { + $max_filesize = $max_postsize; + } + + return $max_filesize; + } + + /** + * Detect and log last PREG operation error + * + * @param array $error Error data (line, file, code, message) + * @param bool $terminate Stop script execution + * + * @return bool True on error, False otherwise + */ + public static function preg_error($error = [], $terminate = false) + { + if (($preg_error = preg_last_error()) != PREG_NO_ERROR) { + $errstr = "PCRE Error: $preg_error."; + + if ($preg_error == PREG_BACKTRACK_LIMIT_ERROR) { + $errstr .= " Consider raising pcre.backtrack_limit!"; + } + if ($preg_error == PREG_RECURSION_LIMIT_ERROR) { + $errstr .= " Consider raising pcre.recursion_limit!"; + } + + $error = array_merge(['code' => 620, 'line' => __LINE__, 'file' => __FILE__], $error); + + if (!empty($error['message'])) { + $error['message'] .= ' ' . $errstr; + } + else { + $error['message'] = $errstr; + } + + rcube::raise_error($error, true, $terminate); + + return true; + } + + return false; + } + + /** + * Generate a temporary file path in the Roundcube temp directory + * + * @param string $file_name String identifier for the type of temp file + * @param bool $unique Generate unique file names based on $file_name + * @param bool $create Create the temp file or not + * + * @return string temporary file path + */ + public static function temp_filename($file_name, $unique = true, $create = true) + { + $temp_dir = rcube::get_instance()->config->get('temp_dir'); + + // Fall back to system temp dir if configured dir is not writable + if (!is_writable($temp_dir)) { + $temp_dir = sys_get_temp_dir(); + } + + // On Windows tempnam() uses only the first three characters of prefix so use uniqid() and manually add the prefix + // Full prefix is required for garbage collection to recognise the file + $temp_file = $unique ? str_replace('.', '', uniqid($file_name, true)) : $file_name; + $temp_path = unslashify($temp_dir) . '/' . RCUBE_TEMP_FILE_PREFIX . $temp_file; + + // Sanity check for unique file name + if ($unique && file_exists($temp_path)) { + return self::temp_filename($file_name, $unique, $create); + } + + // Create the file to prevent possible race condition like tempnam() does + if ($create) { + touch($temp_path); + } + + return $temp_path; + } + + /** + * Clean the subject from reply and forward prefix + * + * @param string $subject Subject to clean + * @param string $mode Mode of cleaning : reply, forward or both + * + * @return string Cleaned subject + */ + public static function remove_subject_prefix($subject, $mode = 'both') + { + $config = rcmail::get_instance()->config; + + // Clean subject prefix for reply, forward or both + if ($mode == 'both') { + $reply_prefixes = $config->get('subject_reply_prefixes', ['Re:']); + $forward_prefixes = $config->get('subject_forward_prefixes', ['Fwd:', 'Fw:']); + $prefixes = array_merge($reply_prefixes, $forward_prefixes); + } + else if ($mode == 'reply') { + $prefixes = $config->get('subject_reply_prefixes', ['Re:']); + // replace (was: ...) (#1489375) + $subject = preg_replace('/\s*\([wW]as:[^\)]+\)\s*$/', '', $subject); + } + else if ($mode == 'forward') { + $prefixes = $config->get('subject_forward_prefixes', ['Fwd:', 'Fw:']); + } + + // replace Re:, Re[x]:, Re-x (#1490497) + $pieces = array_map(function($prefix) { + $prefix = strtolower(str_replace(':', '', $prefix)); + return "$prefix:|$prefix\[\d\]:|$prefix-\d:"; + }, $prefixes); + $pattern = '/^('.implode('|', $pieces).')\s*/i'; + do { + $subject = preg_replace($pattern, '', $subject, -1, $count); + } + while ($count); + + return trim($subject); + } + + /** + * Generates the HAproxy style PROXY protocol header for injection + * into the TCP stream, if configured. + * + * http://www.haproxy.org/download/1.6/doc/proxy-protocol.txt + * + * PROXY protocol headers must be sent before any other data is sent on the TCP socket. + * + * @param array $options Preferences array which may contain proxy_protocol (generally {driver}_conn_options) + * + * @return string Proxy protocol header data, if enabled, otherwise empty string + */ + public static function proxy_protocol_header($options = null) + { + if (empty($options) || !is_array($options) || !array_key_exists('proxy_protocol', $options)) { + return ''; + } + + if (is_array($options['proxy_protocol'])) { + $version = $options['proxy_protocol']['version']; + $options = $options['proxy_protocol']; + } + else { + $version = (int) $options['proxy_protocol']; + $options = []; + } + + $remote_addr = array_key_exists('remote_addr', $options) ? $options['remote_addr'] : self::remote_addr(); + $remote_port = array_key_exists('remote_port', $options) ? $options['remote_port'] : $_SERVER['REMOTE_PORT']; + $local_addr = array_key_exists('local_addr', $options) ? $options['local_addr'] : $_SERVER['SERVER_ADDR']; + $local_port = array_key_exists('local_port', $options) ? $options['local_port'] : $_SERVER['SERVER_PORT']; + $ip_version = strpos($remote_addr, ':') === false ? 4 : 6; + + // Text based PROXY protocol + if ($version == 1) { + // PROXY protocol does not support dual IPv6+IPv4 type addresses, e.g. ::127.0.0.1 + if ($ip_version === 6 && strpos($remote_addr, '.') !== false) { + $remote_addr = inet_ntop(inet_pton($remote_addr)); + } + if ($ip_version === 6 && strpos($local_addr, '.') !== false) { + $local_addr = inet_ntop(inet_pton($local_addr)); + } + + return "PROXY TCP{$ip_version} {$remote_addr} {$local_addr} {$remote_port} {$local_port}\r\n"; + } + + // Binary PROXY protocol + if ($version == 2) { + $addr = inet_pton($remote_addr) . inet_pton($local_addr) . pack('n', $remote_port) . pack('n', $local_port); + $head = implode([ + '0D0A0D0A000D0A515549540A', // protocol header + '21', // protocol version and command + $ip_version === 6 ? '2' : '1', // IP version type + '1' // TCP + ]); + + return pack('H*', $head) . pack('n', strlen($addr)) . $addr; + } + + return ''; + } +}