diff --git a/pykolab/imap_utf7.py b/pykolab/imap_utf7.py index 56366ee..fd5c11e 100644 --- a/pykolab/imap_utf7.py +++ b/pykolab/imap_utf7.py @@ -1,132 +1,134 @@ # Copyright (c) 2014, Menno Smits # All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Menno Smits nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL MENNO SMITS BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # This file contains two main methods used to encode and decode UTF-7 # string, described in the RFC 3501. There are some variations specific # to IMAP4rev1, so the built-in Python UTF-7 codec can't be used instead. # # The main difference is the shift character (used to switch from ASCII to # base64 encoding context), which is & in this modified UTF-7 convention, # since + is considered as mainly used in mailbox names. # Other variations and examples can be found in the RFC 3501, section 5.1.3. import binascii def encode(s): """Encode a folder name using IMAP modified UTF-7 encoding. Input is unicode; output is bytes (Python 3) or str (Python 2). If non-unicode input is provided, the input is returned unchanged. """ if not isinstance(s, str): return s res = bytearray() b64_buffer = [] def consume_b64_buffer(buf): """ Consume the buffer by encoding it into a modified base 64 representation and surround it with shift characters & and - """ if buf: res.extend(b"&" + base64_utf7_encode(buf) + b"-") del buf[:] for c in s: # printable ascii case should not be modified o = ord(c) if 0x20 <= o <= 0x7E: consume_b64_buffer(b64_buffer) # Special case: & is used as shift character so we need to escape it in ASCII if o == 0x26: # & = 0x26 res.extend(b"&-") else: res.append(o) # Bufferize characters that will be encoded in base64 and append them later # in the result, when iterating over ASCII character or the end of string else: b64_buffer.append(c) # Consume the remaining buffer if the string finish with non-ASCII characters consume_b64_buffer(b64_buffer) return bytes(res) AMPERSAND_ORD = ord("&") DASH_ORD = ord("-") def decode(s): """Decode a folder name from IMAP modified UTF-7 encoding to unicode. Input is bytes (Python 3) or str (Python 2); output is always unicode. If non-bytes/str input is provided, the input is returned unchanged. """ if not isinstance(s, bytes): return s res = [] # Store base64 substring that will be decoded once stepping on end shift character b64_buffer = bytearray() for c in s: # Shift character without anything in buffer -> starts storing base64 substring if c == AMPERSAND_ORD and not b64_buffer: b64_buffer.append(c) # End shift char. -> append the decoded buffer to the result and reset it elif c == DASH_ORD and b64_buffer: # Special case &-, representing "&" escaped if len(b64_buffer) == 1: res.append("&") else: res.append(base64_utf7_decode(b64_buffer[1:])) b64_buffer = bytearray() # Still buffering between the shift character and the shift back to ASCII elif b64_buffer: b64_buffer.append(c) # No buffer initialized yet, should be an ASCII printable char + elif isinstance(c, int): + res.append(chr(c)) else: res.append(c) # Decode the remaining buffer if any if b64_buffer: res.append(base64_utf7_decode(b64_buffer[1:])) return "".join(res) def base64_utf7_encode(buffer): s = "".join(buffer).encode("utf-16be") return binascii.b2a_base64(s).rstrip(b"\n=").replace(b"/", b",") def base64_utf7_decode(s): s_utf7 = b"+" + s.replace(b",", b"/") + b"-" return s_utf7.decode("utf-7")