diff --git a/regression/regression-tests.bash b/regression/regression-tests.bash index 4008493..4739a49 100644 --- a/regression/regression-tests.bash +++ b/regression/regression-tests.bash @@ -1,148 +1,151 @@ #!/bin/bash function consistency() { # check source and xml documentation for consistency ( cd .. # back to top level of project f1=/tmp/f1$$ f2=/tmp/f2$$ grep 'case 0x' src/libpst.c | awk '{print $2}' | tr A-Z a-z | sed -e 's/://g' | sort >$f1 grep '^0x' xml/libpst.in | awk '{print $1}' | (for i in {1..19}; do read a; done; cat) | sort >$f2 diff $f1 $f2 less $f1 rm -f $f1 $f2 ) } function dodii() { n="$1" fn="$2" ba=$(basename "$fn" .pst) size=$(stat -c %s $fn) rm -rf output$n if [ -z "$val" ] || [ $size -lt 10000000 ]; then echo $fn mkdir output$n $val ../src/pst2dii -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output$n -O $ba.mydii -d $fn.log $fn >$fn.dii.err 2>&1 fi } function doldif() { n="$1" fn="$2" ba=$(basename "$fn" .pst) size=$(stat -c %s $fn) rm -rf output$n if [ -z "$val" ] || [ $size -lt 10000000 ]; then echo $fn mkdir output$n $val ../src/pst2ldif -d $ba.ldif.log -b 'o=ams-cc.com, c=US' -c 'inetOrgPerson' $fn >$ba.ldif.err 2>&1 fi } function dopst() { n="$1" fn="$2" ba=$(basename "$fn" .pst) size=$(stat -c %s $fn) jobs="" [ -n "$val" ] && jobs="-j 0" rm -rf output$n if [ -z "$val" ] || [ $size -lt 100000000 ]; then echo $fn mkdir output$n if [ "$regression" == "yes" ]; then $val ../src/readpst $jobs -te -r -cv -o output$n $fn >$ba.err 2>&1 else ## only email and include deleted items, have a deleted items folder with multiple item types #$val ../src/readpst $jobs -te -r -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 ## normal recursive dump char='us-ascii' + acc="-a '.xls,.doc'" + acc='' #char='BIG-5' - echo $val ../src/readpst -a '.xls,.doc' -C $char -j 0 -r -cv -o output$n -d $ba.log $fn - $val ../src/readpst -a '.xls,.doc' -C $char -j 0 -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 + echo $val ../src/readpst $acc -C $char -j 0 -r -cv -o output$n -d $ba.log $fn + $val ../src/readpst $acc -C $char -j 0 -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 ## separate mode with filename extensions and .msg files #echo $val ../src/readpst $jobs -r -m -D -cv -o output$n -d $ba.log $fn # $val ../src/readpst $jobs -r -m -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 ## separate mode where we decode all attachments to binary files #echo $val ../src/readpst $jobs -r -S -D -cv -o output$n -d $ba.log $fn # $val ../src/readpst $jobs -r -S -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 ## testing idblock #../src/getidblock -p $fn 0 >$ba.fulldump fi fi } pushd .. make || exit popd rm -rf output* *.err *.log v="valgrind --leak-check=full" val="" func="dopst" [ "$1" == "pst" ] && func="dopst" [ "$1" == "pstv" ] && func="dopst" && val=$v [ "$1" == "ldif" ] && func="doldif" [ "$1" == "dii" ] && func="dodii" regression="" [ "$2" == "reg" ] && regression="yes" [ "$regression" == "yes" ] && val="" #$func 1 ams.pst #$func 2 sample_64.pst #$func 3 test.pst #$func 4 big_mail.pst #$func 5 mbmg.archive.pst #$func 6 Single2003-read.pst #$func 7 Single2003-unread.pst #$func 8 ol2k3high.pst #$func 9 ol97high.pst #$func 10 returned_message.pst #$func 11 flow.pst #$func 12 test-html.pst #$func 13 test-text.pst #$func 14 joe.romanowski.pst #$func 15 hourig1.pst #$func 16 test-mac.pst #$func 18 spam.pst #$func 19 rendgen.pst # single email appointment #$func 20 rendgen2.pst # email appointment with no termination date #$func 21 rendgen3.pst # mime signed email #$func 22 rendgen4.pst # appointment test cases #$func 23 rendgen5.pst # appointment test cases #$func 24 paul.sheer.pst # embedded rfc822 attachment #$func 25 jerry.pst # non ascii subject lines #$func 26 phill.bertolus.pst # possible segfault in forked process, cannot reproduce #$func 27 kaiser.pst # appointments with other character sets #$func 28 pstsample.pst # character set issue #$func 29 pstsample2.pst # embedded image in rtf data #$func 30 pstsample3.pst # exports of rtf and html +$func 31 Journal_Archives_08_29_2010.pst [ -n "$val" ] && grep 'lost:' *err | grep -v 'lost: 0 ' if [ "$regression" == "yes" ]; then ( (for i in output*; do find $i -type f; done) | while read a; do grep -v iamunique "$a" rm -f "$a" done ) >regression.txt fi diff --git a/src/msg.cpp b/src/msg.cpp index aaa4adc..c41d4ff 100644 --- a/src/msg.cpp +++ b/src/msg.cpp @@ -1,419 +1,430 @@ extern "C" { #include "define.h" #include "msg.h" #include #include #include #include #include #include #include } #include #include #include using namespace std; struct property { uint32_t tag; uint32_t flags; uint32_t length; // or value uint32_t reserved; }; typedef list property_list; /** Convert str to an 8 bit charset if it is utf8, null strings are preserved. * * @param str reference to the mapi string of interest * @param charset pointer to the 8 bit charset to use */ static void convert_8bit(pst_string &str, const char *charset); static void convert_8bit(pst_string &str, const char *charset) { if (!str.str) return; // null if (!str.is_utf8) return; // not utf8 DEBUG_ENT("convert_8bit"); pst_vbuf *newer = pst_vballoc(2); size_t strsize = strlen(str.str); size_t rc = pst_vb_utf8to8bit(newer, str.str, strsize, charset); if (rc == (size_t)-1) { // unable to convert, change the charset to utf8 free(newer->b); DEBUG_INFO(("Failed to convert utf-8 to %s\n", charset)); DEBUG_HEXDUMPC(str.str, strsize, 0x10); } else { // null terminate the output string pst_vbgrow(newer, 1); newer->b[newer->dlen] = '\0'; free(str.str); str.str = newer->b; } free(newer); DEBUG_RET(); } static void empty_property(GsfOutfile *out, uint32_t tag); static void empty_property(GsfOutfile *out, uint32_t tag) { vector n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char *contents, size_t size); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char *contents, size_t size) { if (!contents) return; size_t term = ((tag & 0x0000ffff) == 0x001e) ? 1 : ((tag & 0x0000ffff) == 0x001f) ? 2 : 0; // null terminator vector n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); gsf_output_write(dst, size, (const guint8*)contents); if (term) { memset(&n[0], 0, term); gsf_output_write(dst, term, (const guint8*)&n[0]); size += term; } gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); property p; p.tag = tag; p.flags = 0x6; // make all the properties writable p.length = size; p.reserved = 0; prop.push_back(p); } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, FILE *fp); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, FILE *fp) { vector n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); size_t size = 0; const size_t bsize = 10000; char buf[bsize]; while (1) { size_t s = fread(buf, 1, bsize, fp); if (!s) break; gsf_output_write(dst, s, (const guint8*)buf); } gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); property p; p.tag = tag; p.flags = 0x6; // make all the properties writable p.length = size; p.reserved = 0; prop.push_back(p); } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents) { if (contents.str) { convert_8bit(contents, charset); string_property(out, prop, tag, contents.str, strlen(contents.str)); } } static void strin0_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents); static void strin0_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents) { if (contents.str) { convert_8bit(contents, charset); string_property(out, prop, tag, contents.str, strlen(contents.str)+1); } } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const string &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const string &contents) { string_property(out, prop, tag, contents.c_str(), contents.size()); } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, pst_binary &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, pst_binary &contents) { if (contents.size) string_property(out, prop, tag, contents.data, contents.size); } static void write_properties(GsfOutfile *out, property_list &prop, const guint8* header, size_t hlen); static void write_properties(GsfOutfile *out, property_list &prop, const guint8* header, size_t hlen) { GsfOutput* dst = gsf_outfile_new_child(out, "__properties_version1.0", false); gsf_output_write(dst, hlen, header); for (property_list::iterator i=prop.begin(); i!=prop.end(); i++) { property &p = *i; gsf_output_write(dst, sizeof(property), (const guint8*)&p); } gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); } static void int_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value); static void int_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value) { property p; p.tag = tag; p.flags = flags; p.length = value; p.reserved = 0; prop_list.push_back(p); } static void i64_property(property_list &prop_list, uint32_t tag, uint32_t flags, FILETIME *value); static void i64_property(property_list &prop_list, uint32_t tag, uint32_t flags, FILETIME *value) { if (value) { property p; p.tag = tag; p.flags = flags; p.length = value->dwLowDateTime; p.reserved = value->dwHighDateTime; prop_list.push_back(p); } } static void nzi_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value); static void nzi_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value) { if (value) int_property(prop_list, tag, flags, value); } void write_msg_email(char *fname, pst_item* item, pst_file* pst) { // this is not an email item if (!item->email) return; DEBUG_ENT("write_msg_email"); pst_item_email &email = *(item->email); char charset[30]; const char* body_charset = pst_default_charset(item, sizeof(charset), charset); DEBUG_INFO(("%s body charset seems to be %s\n", fname, body_charset)); body_charset = "iso-8859-1//TRANSLIT//IGNORE"; gsf_init(); GsfOutfile *outfile; GsfOutput *output; GError *err = NULL; output = gsf_output_stdio_new(fname, &err); if (output == NULL) { gsf_shutdown(); DEBUG_INFO(("unable to open output .msg file %s\n", fname)); DEBUG_RET(); return; } struct top_property_header { uint32_t reserved1; uint32_t reserved2; uint32_t next_recipient; // same as recipient count uint32_t next_attachment; // same as attachment count uint32_t recipient_count; uint32_t attachment_count; uint32_t reserved3; uint32_t reserved4; }; top_property_header top_head; memset(&top_head, 0, sizeof(top_head)); outfile = gsf_outfile_msole_new(output); g_object_unref(G_OBJECT(output)); output = GSF_OUTPUT(outfile); property_list prop_list; int_property(prop_list, 0x00170003, 0x6, email.importance); nzi_property(prop_list, 0x0023000B, 0x6, email.delivery_report); nzi_property(prop_list, 0x00260003, 0x6, email.priority); nzi_property(prop_list, 0x0029000B, 0x6, email.read_receipt); nzi_property(prop_list, 0x002E0003, 0x6, email.original_sensitivity); nzi_property(prop_list, 0x00360003, 0x6, email.sensitivity); nzi_property(prop_list, 0x0C17000B, 0x6, email.reply_requested); nzi_property(prop_list, 0x0E01000B, 0x6, email.delete_after_submit); int_property(prop_list, 0x0E070003, 0x6, item->flags); i64_property(prop_list, 0x00390040, 0x6, email.sent_date); GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x001A001E, item->ascii_type); string_property(out, prop_list, 0x0037001E, body_charset, item->subject); strin0_property(out, prop_list, 0x003B0102, body_charset, email.outlook_sender); string_property(out, prop_list, 0x003D001E, string("")); string_property(out, prop_list, 0x0040001E, body_charset, email.outlook_received_name1); string_property(out, prop_list, 0x0042001E, body_charset, email.outlook_sender_name); string_property(out, prop_list, 0x0044001E, body_charset, email.outlook_recipient_name); string_property(out, prop_list, 0x0050001E, body_charset, email.reply_to); strin0_property(out, prop_list, 0x00510102, body_charset, email.outlook_recipient); strin0_property(out, prop_list, 0x00520102, body_charset, email.outlook_recipient2); string_property(out, prop_list, 0x0064001E, body_charset, email.sender_access); string_property(out, prop_list, 0x0065001E, body_charset, email.sender_address); string_property(out, prop_list, 0x0070001E, body_charset, email.processed_subject); string_property(out, prop_list, 0x00710102, email.conversation_index); string_property(out, prop_list, 0x0072001E, body_charset, email.original_bcc); string_property(out, prop_list, 0x0073001E, body_charset, email.original_cc); string_property(out, prop_list, 0x0074001E, body_charset, email.original_to); string_property(out, prop_list, 0x0075001E, body_charset, email.recip_access); string_property(out, prop_list, 0x0076001E, body_charset, email.recip_address); string_property(out, prop_list, 0x0077001E, body_charset, email.recip2_access); string_property(out, prop_list, 0x0078001E, body_charset, email.recip2_address); string_property(out, prop_list, 0x007D001E, body_charset, email.header); string_property(out, prop_list, 0x0C1A001E, body_charset, email.outlook_sender_name2); strin0_property(out, prop_list, 0x0C1D0102, body_charset, email.outlook_sender2); string_property(out, prop_list, 0x0C1E001E, body_charset, email.sender2_access); string_property(out, prop_list, 0x0C1F001E, body_charset, email.sender2_address); string_property(out, prop_list, 0x0E02001E, body_charset, email.bcc_address); string_property(out, prop_list, 0x0E03001E, body_charset, email.cc_address); string_property(out, prop_list, 0x0E04001E, body_charset, email.sentto_address); string_property(out, prop_list, 0x0E1D001E, body_charset, email.outlook_normalized_subject); string_property(out, prop_list, 0x1000001E, body_charset, item->body); string_property(out, prop_list, 0x1013001E, body_charset, email.htmlbody); string_property(out, prop_list, 0x1035001E, body_charset, email.messageid); string_property(out, prop_list, 0x1042001E, body_charset, email.in_reply_to); string_property(out, prop_list, 0x1046001E, body_charset, email.return_path_address); // any property over 0x8000 needs entries in the __nameid to make them // either string named or numerical named properties. { vector n(50); { snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { int v = 1; // to property_list prop_list; int_property(prop_list, 0x0C150003, 0x6, v); // PidTagRecipientType int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x3001001E, body_charset, item->file_as); if (item->contact) { string_property(out, prop_list, 0x3002001E, body_charset, item->contact->address1_transport); string_property(out, prop_list, 0x3003001E, body_charset, item->contact->address1); string_property(out, prop_list, 0x5ff6001E, body_charset, item->contact->address1); } strin0_property(out, prop_list, 0x300B0102, body_charset, email.outlook_search_key); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_recipient++; top_head.recipient_count++; } } if (email.cc_address.str) { snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { int v = 2; // cc property_list prop_list; int_property(prop_list, 0x0C150003, 0x6, v); // PidTagRecipientType int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x3001001E, body_charset, email.cc_address); string_property(out, prop_list, 0x3003001E, body_charset, email.cc_address); string_property(out, prop_list, 0x5ff6001E, body_charset, email.cc_address); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_recipient++; top_head.recipient_count++; } } if (email.bcc_address.str) { snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { int v = 3; // bcc property_list prop_list; int_property(prop_list, 0x0C150003, 0x6, v); // PidTagRecipientType int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x3001001E, body_charset, email.bcc_address); string_property(out, prop_list, 0x3003001E, body_charset, email.bcc_address); string_property(out, prop_list, 0x5ff6001E, body_charset, email.bcc_address); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_recipient++; top_head.recipient_count++; } } } pst_item_attach *a = item->attach; while (a) { if (a->method == PST_ATTACH_EMBEDDED) { // not implemented yet } else if (a->data.data || a->i_id) { vector n(50); snprintf(&n[0], n.size(), "__attach_version1.0_#%08X", top_head.attachment_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { FILE *fp = fopen("temp_file_attachment", "w+b"); if (fp) { pst_attach_to_file(pst, a, fp); // data is now in the file fseek(fp, 0, SEEK_SET); property_list prop_list; int_property(prop_list, 0x0E210003, 0x2, top_head.attachment_count); // MAPI_ATTACH_NUM int_property(prop_list, 0x0FF40003, 0x2, 2); // PR_ACCESS read int_property(prop_list, 0x0FF70003, 0x2, 0); // PR_ACCESS_LEVEL read only int_property(prop_list, 0x0FFE0003, 0x2, 7); // PR_OBJECT_TYPE attachment int_property(prop_list, 0x37050003, 0x7, 1); // PR_ATTACH_METHOD by value int_property(prop_list, 0x370B0003, 0x7, a->position); // PR_RENDERING_POSITION int_property(prop_list, 0x37100003, 0x6, a->sequence); // PR_ATTACH_MIME_SEQUENCE GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x0FF90102, item->record_key); string_property(out, prop_list, 0x37010102, fp); - string_property(out, prop_list, 0x3704001E, body_charset, a->filename1); - string_property(out, prop_list, 0x3707001E, body_charset, a->filename2); + if (a->filename2.str) { + // have long file name + string_property(out, prop_list, 0x3707001E, body_charset, a->filename2); + } + else if (a->filename1.str) { + // have short file name + string_property(out, prop_list, 0x3704001E, body_charset, a->filename1); + } + else { + // make up a name + const char *n = "inline"; + string_property(out, prop_list, 0x3704001E, n, strlen(n)); + } string_property(out, prop_list, 0x370E001E, body_charset, a->mimetype); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_attachment++; top_head.attachment_count++; fclose(fp); } } } a = a->next; } write_properties(out, prop_list, (const guint8*)&top_head, sizeof(top_head)); { GsfOutput *output = gsf_outfile_new_child(out, "__nameid_version1.0", true); { GsfOutfile *out = GSF_OUTFILE (output); empty_property(out, 0x00020102); empty_property(out, 0x00030102); empty_property(out, 0x00040102); gsf_output_close(output); g_object_unref(G_OBJECT(output)); } } gsf_output_close(output); g_object_unref(G_OBJECT(output)); gsf_shutdown(); DEBUG_RET(); }