diff --git a/src/msg.cpp b/src/msg.cpp index bc98a46..84c4301 100644 --- a/src/msg.cpp +++ b/src/msg.cpp @@ -1,341 +1,359 @@ extern "C" { #include "define.h" #include "msg.h" #include #include #include #include #include #include #include } #include #include #include using namespace std; struct property { uint32_t tag; uint32_t flags; uint32_t length; // or value uint32_t reserved; }; typedef list property_list; /** Convert str to an 8 bit charset if it is utf8, null strings are preserved. * * @param str reference to the mapi string of interest * @param charset pointer to the 8 bit charset to use */ +static void convert_8bit(pst_string &str, const char *charset); static void convert_8bit(pst_string &str, const char *charset) { if (!str.str) return; // null if (!str.is_utf8) return; // not utf8 + DEBUG_ENT("convert_8bit"); pst_vbuf *newer = pst_vballoc(2); - size_t rc = pst_vb_utf8to8bit(newer, str.str, strlen(str.str), charset); + size_t strsize = strlen(str.str); + size_t rc = pst_vb_utf8to8bit(newer, str.str, strsize, charset); if (rc == (size_t)-1) { // unable to convert, change the charset to utf8 free(newer->b); DEBUG_INFO(("Failed to convert utf-8 to %s\n", charset)); + DEBUG_HEXDUMPC(str.str, strsize, 0x10); } else { // null terminate the output string pst_vbgrow(newer, 1); newer->b[newer->dlen] = '\0'; free(str.str); str.str = newer->b; } free(newer); + DEBUG_RET(); } +static void empty_property(GsfOutfile *out, uint32_t tag); static void empty_property(GsfOutfile *out, uint32_t tag) { vector n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); } +static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char *contents, size_t size); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char *contents, size_t size) { if (!contents) return; vector n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); gsf_output_write(dst, size, (const guint8*)contents); gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); int bias = ((tag & 0x0000ffff) == 0x001e) ? 1 : 0; property p; p.tag = tag; p.flags = 0x6; // make all the properties writable p.length = bias + size; p.reserved = 0; prop.push_back(p); } +static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, FILE *fp); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, FILE *fp) { vector n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); size_t size = 0; const size_t bsize = 10000; char buf[bsize]; while (1) { size_t s = fread(buf, 1, bsize, fp); if (!s) break; gsf_output_write(dst, s, (const guint8*)buf); } gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); property p; p.tag = tag; p.flags = 0x6; // make all the properties writable p.length = size; p.reserved = 0; prop.push_back(p); } +static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents) { if (contents.str) { convert_8bit(contents, charset); string_property(out, prop, tag, contents.str, strlen(contents.str)); } } +static void strin0_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents); static void strin0_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents) { if (contents.str) { convert_8bit(contents, charset); string_property(out, prop, tag, contents.str, strlen(contents.str)+1); } } +static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const string &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const string &contents) { string_property(out, prop, tag, contents.c_str(), contents.size()); } +static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, pst_binary &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, pst_binary &contents) { if (contents.size) string_property(out, prop, tag, contents.data, contents.size); } +static void write_properties(GsfOutfile *out, property_list &prop, const guint8* header, size_t hlen); static void write_properties(GsfOutfile *out, property_list &prop, const guint8* header, size_t hlen) { GsfOutput* dst = gsf_outfile_new_child(out, "__properties_version1.0", false); gsf_output_write(dst, hlen, header); for (property_list::iterator i=prop.begin(); i!=prop.end(); i++) { property &p = *i; gsf_output_write(dst, sizeof(property), (const guint8*)&p); } gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); } +static void int_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value); static void int_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value) { property p; p.tag = tag; p.flags = flags; p.length = value; p.reserved = 0; prop_list.push_back(p); } +static void nzi_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value); static void nzi_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value) { if (value) int_property(prop_list, tag, flags, value); } void write_msg_email(char *fname, pst_item* item, pst_file* pst) { // this is not an email item if (!item->email) return; + DEBUG_ENT("write_msg_email"); + pst_item_email &email = *(item->email); char charset[30]; const char* body_charset = pst_default_charset(item, sizeof(charset), charset); + DEBUG_INFO(("%s body charset seems to be %s\n", fname, body_charset)); + body_charset = "iso-8859-1//TRANSLIT//IGNORE"; gsf_init(); - DEBUG_ENT("write_msg_email"); GsfOutfile *outfile; GsfOutput *output; GError *err = NULL; output = gsf_output_stdio_new(fname, &err); if (output == NULL) { gsf_shutdown(); DEBUG_INFO(("unable to open output .msg file %s\n", fname)); DEBUG_RET(); return; } struct top_property_header { uint32_t reserved1; uint32_t reserved2; uint32_t next_recipient; // same as recipient count uint32_t next_attachment; // same as attachment count uint32_t recipient_count; uint32_t attachment_count; uint32_t reserved3; uint32_t reserved4; }; top_property_header top_head; memset(&top_head, 0, sizeof(top_head)); outfile = gsf_outfile_msole_new(output); g_object_unref(G_OBJECT(output)); output = GSF_OUTPUT(outfile); property_list prop_list; int_property(prop_list, 0x00170003, 0x6, email.importance); nzi_property(prop_list, 0x0023000B, 0x6, email.delivery_report); nzi_property(prop_list, 0x00260003, 0x6, email.priority); nzi_property(prop_list, 0x0029000B, 0x6, email.read_receipt); nzi_property(prop_list, 0x002E0003, 0x6, email.original_sensitivity); nzi_property(prop_list, 0x00360003, 0x6, email.sensitivity); nzi_property(prop_list, 0x0C17000B, 0x6, email.reply_requested); nzi_property(prop_list, 0x0E01000B, 0x6, email.delete_after_submit); int_property(prop_list, 0x0E070003, 0x6, item->flags); GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x001A001E, item->ascii_type); string_property(out, prop_list, 0x0037001E, body_charset, item->subject); strin0_property(out, prop_list, 0x003B0102, body_charset, email.outlook_sender); string_property(out, prop_list, 0x003D001E, string("")); string_property(out, prop_list, 0x0040001E, body_charset, email.outlook_received_name1); string_property(out, prop_list, 0x0042001E, body_charset, email.outlook_sender_name); string_property(out, prop_list, 0x0044001E, body_charset, email.outlook_recipient_name); string_property(out, prop_list, 0x0050001E, body_charset, email.reply_to); strin0_property(out, prop_list, 0x00510102, body_charset, email.outlook_recipient); strin0_property(out, prop_list, 0x00520102, body_charset, email.outlook_recipient2); string_property(out, prop_list, 0x0064001E, body_charset, email.sender_access); string_property(out, prop_list, 0x0065001E, body_charset, email.sender_address); string_property(out, prop_list, 0x0070001E, body_charset, email.processed_subject); string_property(out, prop_list, 0x00710102, email.conversation_index); string_property(out, prop_list, 0x0072001E, body_charset, email.original_bcc); string_property(out, prop_list, 0x0073001E, body_charset, email.original_cc); string_property(out, prop_list, 0x0074001E, body_charset, email.original_to); string_property(out, prop_list, 0x0075001E, body_charset, email.recip_access); string_property(out, prop_list, 0x0076001E, body_charset, email.recip_address); string_property(out, prop_list, 0x0077001E, body_charset, email.recip2_access); string_property(out, prop_list, 0x0078001E, body_charset, email.recip2_address); string_property(out, prop_list, 0x007D001E, body_charset, email.header); string_property(out, prop_list, 0x0C1A001E, body_charset, email.outlook_sender_name2); strin0_property(out, prop_list, 0x0C1D0102, body_charset, email.outlook_sender2); string_property(out, prop_list, 0x0C1E001E, body_charset, email.sender2_access); string_property(out, prop_list, 0x0C1F001E, body_charset, email.sender2_address); string_property(out, prop_list, 0x0E02001E, body_charset, email.bcc_address); string_property(out, prop_list, 0x0E03001E, body_charset, email.cc_address); string_property(out, prop_list, 0x0E04001E, body_charset, email.sentto_address); string_property(out, prop_list, 0x0E1D001E, body_charset, email.outlook_normalized_subject); string_property(out, prop_list, 0x1000001E, body_charset, item->body); string_property(out, prop_list, 0x1013001E, body_charset, email.htmlbody); string_property(out, prop_list, 0x1035001E, body_charset, email.messageid); string_property(out, prop_list, 0x1042001E, body_charset, email.in_reply_to); string_property(out, prop_list, 0x1046001E, body_charset, email.return_path_address); // any property over 0x8000 needs entries in the __nameid to make them // either string named or numerical named properties. { vector n(50); snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { int v = (email.message_recip_me) ? 1 : // to (email.message_cc_me) ? 2 : // cc 3; // bcc property_list prop_list; int_property(prop_list, 0x0C150003, 0x6, v); // PidTagRecipientType int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x3001001E, body_charset, item->file_as); if (item->contact) { string_property(out, prop_list, 0x3002001E, body_charset, item->contact->address1_transport); string_property(out, prop_list, 0x3003001E, body_charset, item->contact->address1); } strin0_property(out, prop_list, 0x300B0102, body_charset, email.outlook_search_key); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_recipient++; top_head.recipient_count++; } } pst_item_attach *a = item->attach; while (a) { if (a->method == PST_ATTACH_EMBEDDED) { // not implemented yet } else if (a->data.data || a->i_id) { vector n(50); snprintf(&n[0], n.size(), "__attach_version1.0_#%08X", top_head.attachment_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { FILE *fp = fopen("temp_file_attachment", "w+b"); if (fp) { pst_attach_to_file(pst, a, fp); // data is now in the file fseek(fp, 0, SEEK_SET); property_list prop_list; int_property(prop_list, 0x0E210003, 0x2, top_head.attachment_count); // MAPI_ATTACH_NUM int_property(prop_list, 0x0FF40003, 0x2, 2); // PR_ACCESS read int_property(prop_list, 0x0FF70003, 0x2, 0); // PR_ACCESS_LEVEL read only int_property(prop_list, 0x0FFE0003, 0x2, 7); // PR_OBJECT_TYPE attachment int_property(prop_list, 0x37050003, 0x7, 1); // PR_ATTACH_METHOD by value int_property(prop_list, 0x370B0003, 0x7, a->position); // PR_RENDERING_POSITION int_property(prop_list, 0x37100003, 0x6, a->sequence); // PR_ATTACH_MIME_SEQUENCE GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x0FF90102, item->record_key); string_property(out, prop_list, 0x37010102, fp); string_property(out, prop_list, 0x3704001E, body_charset, a->filename1); string_property(out, prop_list, 0x3707001E, body_charset, a->filename2); string_property(out, prop_list, 0x370E001E, body_charset, a->mimetype); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_attachment++; top_head.attachment_count++; fclose(fp); } } } a = a->next; } { GsfOutput *output = gsf_outfile_new_child(out, "__nameid_version1.0", true); { GsfOutfile *out = GSF_OUTFILE (output); empty_property(out, 0x00020102); empty_property(out, 0x00030102); empty_property(out, 0x00040102); gsf_output_close(output); g_object_unref(G_OBJECT(output)); } } write_properties(out, prop_list, (const guint8*)&top_head, sizeof(top_head)); gsf_output_close(output); g_object_unref(G_OBJECT(output)); gsf_shutdown(); DEBUG_RET(); } diff --git a/src/vbuf.c b/src/vbuf.c index f3270ce..1ab7b42 100644 --- a/src/vbuf.c +++ b/src/vbuf.c @@ -1,257 +1,260 @@ #include "define.h" static int unicode_up = 0; static iconv_t i16to8; static const char *target_charset = NULL; static int target_open_from = 0; static int target_open_to = 0; static iconv_t i8totarget = (iconv_t)-1; static iconv_t target2i8 = (iconv_t)-1; #define ASSERT(x,...) { if( !(x) ) DIE(( __VA_ARGS__)); } /** DESTRUCTIVELY grow or shrink buffer */ static void pst_vbresize(pst_vbuf *vb, size_t len); static void pst_vbresize(pst_vbuf *vb, size_t len) { vb->dlen = 0; if (vb->blen >= len) { vb->b = vb->buf; return; } vb->buf = realloc(vb->buf, len); vb->b = vb->buf; vb->blen = len; } static size_t pst_vbavail(pst_vbuf * vb); static size_t pst_vbavail(pst_vbuf * vb) { return vb->blen - vb->dlen - (size_t)(vb->b - vb->buf); } static void open_targets(const char* charset); static void open_targets(const char* charset) { if (!target_charset || strcasecmp(target_charset, charset)) { if (target_open_from) iconv_close(i8totarget); if (target_open_to) iconv_close(target2i8); if (target_charset) free((char *)target_charset); target_charset = strdup(charset); target_open_from = 1; target_open_to = 1; i8totarget = iconv_open(target_charset, "utf-8"); if (i8totarget == (iconv_t)-1) { target_open_from = 0; DEBUG_WARN(("Couldn't open iconv descriptor for utf-8 to %s.\n", target_charset)); } target2i8 = iconv_open("utf-8", target_charset); if (target2i8 == (iconv_t)-1) { target_open_to = 0; DEBUG_WARN(("Couldn't open iconv descriptor for %s to utf-8.\n", target_charset)); } } } static size_t sbcs_conversion(pst_vbuf *dest, const char *inbuf, int iblen, iconv_t conversion); static size_t sbcs_conversion(pst_vbuf *dest, const char *inbuf, int iblen, iconv_t conversion) { size_t inbytesleft = iblen; size_t icresult = (size_t)-1; size_t outbytesleft = 0; char *outbuf = NULL; int myerrno; + DEBUG_ENT("sbcs_conversion"); pst_vbresize(dest, 2*iblen); do { outbytesleft = dest->blen - dest->dlen; outbuf = dest->b + dest->dlen; icresult = iconv(conversion, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft); myerrno = errno; dest->dlen = outbuf - dest->b; if (inbytesleft) pst_vbgrow(dest, 2*inbytesleft); } while ((size_t)-1 == icresult && E2BIG == myerrno); if (icresult == (size_t)-1) { DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno))); pst_unicode_init(); + DEBUG_RET(); return (size_t)-1; } - return (icresult) ? (size_t)-1 : 0; + DEBUG_RET(); + return 0; } static void pst_unicode_close(); static void pst_unicode_close() { iconv_close(i16to8); if (target_open_from) iconv_close(i8totarget); if (target_open_to) iconv_close(target2i8); if (target_charset) free((char *)target_charset); target_charset = NULL; target_open_from = 0; target_open_to = 0; unicode_up = 0; } static int utf16_is_terminated(const char *str, int length); static int utf16_is_terminated(const char *str, int length) { int len = -1; int i; for (i = 0; i < length; i += 2) { if (str[i] == 0 && str[i + 1] == 0) { len = i; } } if (len == -1) { DEBUG_WARN(("utf16 string is not zero terminated\n")); } return (len == -1) ? 0 : 1; } pst_vbuf *pst_vballoc(size_t len) { pst_vbuf *result = pst_malloc(sizeof(pst_vbuf)); if (result) { result->dlen = 0; result->blen = 0; result->buf = NULL; pst_vbresize(result, len); } else DIE(("malloc() failure")); return result; } /** out: vbavail(vb) >= len, data are preserved */ void pst_vbgrow(pst_vbuf *vb, size_t len) { if (0 == len) return; if (0 == vb->blen) { pst_vbresize(vb, len); return; } if (vb->dlen + len > vb->blen) { if (vb->dlen + len < vb->blen * 1.5) len = vb->blen * 1.5; char *nb = pst_malloc(vb->blen + len); if (!nb) DIE(("malloc() failure")); vb->blen = vb->blen + len; memcpy(nb, vb->b, vb->dlen); free(vb->buf); vb->buf = nb; vb->b = vb->buf; } else { if (vb->b != vb->buf) memcpy(vb->buf, vb->b, vb->dlen); } vb->b = vb->buf; ASSERT(pst_vbavail(vb) >= len, "vbgrow(): I have failed in my mission."); } /** set vbuf b size=len, resize if necessary, relen = how much to over-allocate */ void pst_vbset(pst_vbuf * vb, void *b, size_t len) { pst_vbresize(vb, len); memcpy(vb->b, b, len); vb->dlen = len; } /** append len bytes of b to vb, resize if necessary */ void pst_vbappend(pst_vbuf *vb, void *b, size_t len) { if (0 == vb->dlen) { pst_vbset(vb, b, len); return; } pst_vbgrow(vb, len); memcpy(vb->b + vb->dlen, b, len); vb->dlen += len; } void pst_unicode_init() { if (unicode_up) pst_unicode_close(); i16to8 = iconv_open("utf-8", "utf-16le"); if (i16to8 == (iconv_t)-1) { DEBUG_WARN(("Couldn't open iconv descriptor for utf-16le to utf-8.\n")); } unicode_up = 1; } size_t pst_vb_utf16to8(pst_vbuf *dest, const char *inbuf, int iblen) { size_t inbytesleft = iblen; size_t icresult = (size_t)-1; size_t outbytesleft = 0; char *outbuf = NULL; int myerrno; if (!unicode_up) return (size_t)-1; // failure to open iconv pst_vbresize(dest, iblen); //Bad Things can happen if a non-zero-terminated utf16 string comes through here if (!utf16_is_terminated(inbuf, iblen)) return (size_t)-1; do { outbytesleft = dest->blen - dest->dlen; outbuf = dest->b + dest->dlen; icresult = iconv(i16to8, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft); myerrno = errno; dest->dlen = outbuf - dest->b; if (inbytesleft) pst_vbgrow(dest, inbytesleft); } while ((size_t)-1 == icresult && E2BIG == myerrno); if (icresult == (size_t)-1) { DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno))); pst_unicode_init(); return (size_t)-1; } return (icresult) ? (size_t)-1 : 0; } size_t pst_vb_utf8to8bit(pst_vbuf *dest, const char *inbuf, int iblen, const char* charset) { open_targets(charset); if (!target_open_from) return (size_t)-1; // failure to open the target return sbcs_conversion(dest, inbuf, iblen, i8totarget); } size_t pst_vb_8bit2utf8(pst_vbuf *dest, const char *inbuf, int iblen, const char* charset) { open_targets(charset); if (!target_open_to) return (size_t)-1; // failure to open the target return sbcs_conversion(dest, inbuf, iblen, target2i8); }