diff --git a/libpst.spec.in b/libpst.spec.in index 487bbf5..65d59d4 100644 --- a/libpst.spec.in +++ b/libpst.spec.in @@ -1,415 +1,420 @@ Summary: Utilities to convert Outlook .pst files to other formats Name: @PACKAGE@ Version: @VERSION@ -Release: 2%{?dist} +Release: 3%{?dist} License: GPLv2+ Group: Applications/Productivity Source: http://www.five-ten-sg.com/%{name}/packages/%{name}-%{version}.tar.gz BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) URL: http://www.five-ten-sg.com/%{name}/ Requires: ImageMagick Requires: %{name}-libs = %{version}-%{release} BuildRequires: ImageMagick freetype-devel gd-devel libjpeg-devel zlib-devel python-devel boost-devel %{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")} %{!?python_sitearch: %global python_sitearch %(%{__python} -c "from distutils.sysconfig import get_python_lib; print get_python_lib(1)")} %description The Libpst utilities include readpst which can convert email messages to both mbox and MH mailbox formats, pst2ldif which can convert the contacts to .ldif format for import into ldap databases, and pst2dii which can convert email messages to the DII load file format used by Summation. %package libs Summary: Shared library used by the pst utilities Group: Development/Libraries %description libs The libpst-libs package contains the shared library used by the pst utilities. %package python Summary: Python bindings for libpst Group: Development/Libraries Requires: python Requires: %{name}-libs = %{version}-%{release} +%filter_provides_in %{python_sitearch}/_*.so + %description python The libpst-python package allows you to use the libpst shared object from python code. %package devel Summary: Library links and header files for libpst application development Group: Development/Libraries Requires: pkgconfig Requires: %{name}-libs = %{version}-%{release} %description devel The libpst-devel package contains the library links and header files you'll need to develop applications using the libpst shared library. You do not need to install it if you just want to use the libpst utilities. %package devel-doc Summary: Documentation for libpst.so for libpst application development Group: Documentation Requires: %{name}-doc = %{version}-%{release} %description devel-doc The libpst-devel-doc package contains the doxygen generated documentation for the libpst.so shared library. %package doc Summary: Documentation for the pst utilities in html format Group: Documentation %description doc The libpst-doc package contains the html documentation for the pst utilities. You do not need to install it if you just want to use the libpst utilities. %prep %setup -q %build %configure --enable-libpst-shared make %{?_smp_mflags} %install rm -rf $RPM_BUILD_ROOT make DESTDIR=$RPM_BUILD_ROOT install rm $RPM_BUILD_ROOT%{_libdir}/libpst.la rm $RPM_BUILD_ROOT%{_libdir}/libpst.a %clean rm -rf $RPM_BUILD_ROOT %post libs -p /sbin/ldconfig %postun libs -p /sbin/ldconfig %files %defattr(-,root,root,-) %{_bindir}/* %{_mandir}/man1/* %{_mandir}/man5/* %files libs %defattr(-,root,root,-) %{_libdir}/libpst.so.* %doc COPYING %files python %defattr(-,root,root,-) %{python_sitearch}/_*.so %exclude %{python_sitearch}/*.a %exclude %{python_sitearch}/*.la %files devel %defattr(-,root,root,-) %{_libdir}/libpst.so %{_includedir}/%{name}-@LIBPST_SO_MAJOR@/ %{_libdir}/pkgconfig/libpst.pc %files devel-doc %defattr(-,root,root,-) %{_datadir}/doc/%{name}-%{version}/devel/ %files doc %defattr(-,root,root,-) %dir %{_datadir}/doc/%{name}-%{version}/ %{_datadir}/doc/%{name}-%{version}/*.html %{_datadir}/doc/%{name}-%{version}/AUTHORS %{_datadir}/doc/%{name}-%{version}/COPYING %{_datadir}/doc/%{name}-%{version}/ChangeLog %{_datadir}/doc/%{name}-%{version}/NEWS %{_datadir}/doc/%{name}-%{version}/README %changelog +* Mon Dec 24 2012 Carl Byington - 0.6.55-3 +- filter private provides from rpm + * Tue Aug 09 2012 Carl Byington - 0.6.55-2 - rebuild for python * Thu Jul 19 2012 Fedora Release Engineering - 0.6.54-6 - Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild * Tue May 08 2012 Carl Byington - 0.6.55-1 - preserve bcc headers - document -C switch to set default character set - space after colon is not required in header fields * Tue Feb 28 2012 Fedora Release Engineering - 0.6.54-5 - Rebuilt for c++ ABI breakage * Fri Jan 13 2012 Fedora Release Engineering - 0.6.54-4 - Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild * Sat Dec 24 2011 Carl Byington - 0.6.54-3 - bump versions and prep for fedora build * Wed Nov 30 2011 Petr Pisar - 0.6.53-3 - Rebuild against boost-1.48 * Wed Nov 14 2011 Carl Byington - 0.6.54-2 - failed to bump version number * Fri Nov 04 2011 Carl Byington - 0.6.54-1 - embedded rfc822 messages might contain rtf encoded bodies * Fri Sep 02 2011 Petr Pisar - 0.6.53-2 - Rebuild against boost-1.47 * Sun Jul 10 2011 Carl Byington - 0.6.53-1 - add Status: header in output - allow fork for parallel processing of individual email folders in separate mode - proper handling of --with-boost-python option * Sun May 22 2011 Carl Byington - 0.6.52-1 - fix dangling freed pointer in embedded rfc822 message processing - allow broken outlook internet header field - it sometimes contains fragments of the message body rather than headers * Sun Apr 17 2011 Carl Byington - 0.6.51-1 - fix for buffer overrun; attachment size from the secondary list of mapi elements overwrote proper size from the primary list of mapi elements. fedora bugzilla 696263 * Tue Feb 08 2011 Fedora Release Engineering - 0.6.49-4 - Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild * Mon Feb 07 2011 Thomas Spura - 0.6.49-3 - rebuild for new boost * Fri Dec 24 2010 Carl Byington - 0.6.50-1 - rfc2047 and rfc2231 encoding for non-ascii headers and attachment filenames. * Wed Sep 29 2010 jkeating - 0.6.49-2 - Rebuilt for gcc bug 634757 * Mon Sep 13 2010 Carl Byington - 0.6.49-1 - fix to ignore embedded objects that are not email messages fedora bugzilla 633498 * Thu Sep 02 2010 Carl Byington - 0.6.48-1 - fix for broken internet headers from Outlook - fix ax_python.m4 to look for python2.7 - use mboxrd from quoting for output formats with multiple messages per file - use no from quoting for output formats with single message per file * Sat Jul 31 2010 Carl Byington - 0.6.47-6 - rebuild for python dependencies * Mon Jul 26 2010 David Malcolm - 0.6.47-4 - hack up configure so that it looks for python 2.7 * Wed Jul 21 2010 David Malcolm - 0.6.47-3 - Rebuilt for https://fedoraproject.org/wiki/Features/Python_2.7/MassRebuild * Wed Jul 07 2010 Carl Byington - 0.6.47-2 - Subpackage Licensing, add COPYING to -libs. - patches from Kenneth Berland for solaris * Fri May 07 2010 Carl Byington - 0.6.47-1 - patches from Kenneth Berland for solaris * Thu Jan 21 2010 Carl Byington - 0.6.46-1 - prefer libpthread over librt for finding sem_init function. * Thu Jan 21 2010 Carl Byington - 0.6.45-2 - rebuild for new boost package * Wed Nov 18 2009 Carl Byington - 0.6.45-1 - patch from Hugo DesRosiers to export categories and notes into vcards. - extend that patch to export categories into vcalendar appointments also. * Sun Sep 20 2009 Carl Byington - 0.6.44-1 - patch from Lee Ayres to add file name extensions in separate mode. - allow mixed items types in a folder in separate mode. * Thu Sep 12 2009 Carl Byington - 0.6.43-1 - decode more of the pst format, some minor bug fixes - add support for code pages 1200 and 1201. - add readpst -t option to select output item types, which can now be used to process folders containing mixed item types. - fix segfault with embedded appointments - add readpst -u option for Thunderbird mode .size and .type files - better detection of embedded rfc822 message attachments * Thu Sep 03 2009 Carl Byington - 0.6.42-1 - patch from Fridrich Strba to build with DJGPP DOS cross-compiler. * Sat Jul 25 2009 Fedora Release Engineering - 0.6.41-2 - Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild * Tue Jun 23 2009 Carl Byington - 0.6.41-1 - fix ax_python detection - should not use locate command - checking for fedora versions is not needed * Tue Jun 23 2009 Carl Byington - 0.6.40-1 - fedora 11 has python2.6 - remove pdf version of the man pages * Sun Jun 21 2009 Carl Byington - 0.6.39-1 - fedora > 10 moved to boost-python-devel * Sun Jun 21 2009 Carl Byington - 0.6.38-1 - add python interface to the shared library. - bump soname to version 4 for many changes to the interface. - better decoding of recurrence data in appointments. - remove readpstlog since debug log files are now plain text. - add readpst -j option for parallel jobs for each folder. - make nested mime multipart/alternative to hold the text/html parts. * Fri Apr 17 2009 Carl Byington - 0.6.37-1 - add pst_attach_to_mem() back into the shared library interface. - fix memory leak caught by valgrind. * Tue Apr 14 2009 Carl Byington - 0.6.36-1 - build separate -doc and -devel-doc subpackages. - other spec file cleanup * Wed Apr 08 2009 Carl Byington - 0.6.35-1 - properly add trailing mime boundary in all modes. - build separate libpst, libpst-libs, libpst-devel rpms. * Thu Mar 19 2009 Carl Byington - 0.6.34-1 - avoid putting mixed item types into the same output folder. * Tue Mar 17 2009 Carl Byington - 0.6.33-1 - compensate for iconv conversion to utf-7 that produces strings that are not null terminated. - don't produce empty attachment files in separate mode. * Sat Mar 14 2009 Carl Byington - 0.6.32-1 - fix ppc64 compile error * Sat Mar 14 2009 Carl Byington - 0.6.31-1 - bump version for fedora cvs tagging mistake * Sat Mar 14 2009 Carl Byington - 0.6.30-1 - track character set individually for each mapi element. - remove charset option from pst2ldif since we get that from each object now. - avoid emitting bogus empty email messages into contacts and calendar files. * Tue Feb 24 2009 Carl Byington - 0.6.29-1 - fix for 64bit on Fedora 11 * Tue Feb 24 2009 Carl Byington - 0.6.28-1 - improve decoding of multipart/report and message/rfc822 mime types. - improve character set handling. - fix embedded rfc822 messages with attachments. * Sat Feb 07 2009 Carl Byington - 0.6.27-1 - fix for const correctness on Fedora 11 * Sat Feb 07 2009 Carl Byington - 0.6.26-1 - patch from Fridrich Strba for building on mingw and general - cleanup of autoconf files. - add processing for pst files of type 0x0f. - strip and regenerate all MIME headers to avoid duplicates. - do a better job of making unique MIME boundaries. - only use base64 coding when strictly necessary. * Fri Jan 16 2009 Carl Byington - 0.6.25-1 - improve handling of content-type charset values in mime parts * Thu Dec 11 2008 Carl Byington - 0.6.24-1 - patch from Chris Eagle to build on cygwin * Thu Dec 04 2008 Carl Byington - 0.6.23-1 - bump version to avoid cvs tagging mistake in fedora * Fri Nov 28 2008 Carl Byington - 0.6.22-1 - patch from David Cuadrado to process emails with type PST_TYPE_OTHER - base64_encode_multiple() may insert newline, needs larger malloc - subject lines shorter than 2 bytes could segfault * Tue Oct 21 2008 Carl Byington - 0.6.21-1 - fix title bug with old schema in pst2ldif. - also escape commas in distinguished names per rfc4514. * Thu Oct 09 2008 Carl Byington - 0.6.20-1 - add configure option --enable-dii=no to remove dependency on libgd. - many fixes in pst2ldif by Robert Harris. - add -D option to include deleted items, from Justin Greer - fix from Justin Greer to add missing email headers - fix from Justin Greer for my_stristr() - fix for orphan children when building descriptor tree - avoid writing uninitialized data to debug log file - remove unreachable code - create dummy top-of-folder descriptor if needed for corrupt pst files * Sun Sep 14 2008 Carl Byington - 0.6.19-1 - Fix base64 encoding that could create long lines. - Initial work on a .so shared library from Bharath Acharya. * Thu Aug 28 2008 Carl Byington - 0.6.18-1 - Fixes for iconv on Mac from Justin Greer. * Tue Aug 05 2008 Carl Byington - 0.6.17-1 - More fixes for 32/64 bit portability on big endian ppc. * Tue Aug 05 2008 Carl Byington - 0.6.16-1 - Use inttypes.h for portable printing of 64 bit items. * Wed Jul 30 2008 Carl Byington - 0.6.15-1 - Patch from Robert Simpson for file handle leak in error case. - Fix for missing length on lz decompression, bug found by Chris White. * Sun Jun 15 2008 Carl Byington - 0.6.14-1 - Fix my mistake in debian packaging. * Fri Jun 13 2008 Carl Byington - 0.6.13-1 - Patch from Robert Simpson for encryption type 2. * Tue Jun 10 2008 Carl Byington - 0.6.12-1 - Patch from Joachim Metz for debian packaging and - fix for incorrect length on lz decompression * Tue Jun 03 2008 Carl Byington - 0.6.11-1 - Use ftello/fseeko to properly handle large files. - Document and properly use datasize field in b5 blocks. - Fix some MSVC compile issues and collect MSVC dependencies into one place. * Thu May 29 2008 Carl Byington - 0.6.10-1 - Patch from Robert Simpson for doubly-linked list code and arrays of unicode strings. * Fri May 16 2008 Carl Byington - 0.6.9 - Patch from Joachim Metz for 64 bit compile. - Fix pst format documentation for 8 byte backpointers. * Wed Mar 05 2008 Carl Byington - 0.6.8 - Initial version of pst2dii to convert to Summation dii load file format - changes for Fedora packaging guidelines (#434727) * Tue Jul 10 2007 Carl Byington - 0.5.5 - merge changes from Joe Nahmias version * Sun Feb 19 2006 Carl Byington - 0.5.3 - initial spec file using autoconf and http://www.fedora.us/docs/rpm-packaging-guidelines.html diff --git a/src/pst2dii.cpp.in b/src/pst2dii.cpp.in index 2e452a5..a3c8bc9 100644 --- a/src/pst2dii.cpp.in +++ b/src/pst2dii.cpp.in @@ -1,724 +1,724 @@ /* Copyright (c) 2008 Carl Byington - 510 Software Group, released under the GPL version 2 or any later version at your choice available at http://www.fsf.org/licenses/gpl.txt Based on readpst.c by David Smith */ #include #include #include using namespace std; extern "C" { #include "define.h" #include "lzfu.h" } struct file_ll { string name; int32_t stored_count; int32_t email_count; int32_t skip_count; int32_t type; file_ll() { stored_count = 0; email_count = 0; skip_count = 0; type = 0; }; }; // global settings const char* convert = "@CONVERT@"; // fully qualified path of the convert program from image magick const char* prog_name = NULL; // our arg0 name const char* bates_prefix = ""; // string to prefix bates numbers int bates_index = 0; // current bates sequence const char* output_directory = "."; const char* output_file = "load.dii"; char* font_file = NULL; int bates_color = 0xff0000; // color of bates header stamp int email_sequence = 0; // current pdf sequence number char pdf_name[PATH_MAX]; // current pdf file name FILE* dii_file = NULL; // the output dii load file pst_file pstfile; // the input pst file // pdf writer globals bool pdf_open = false; // is pdf writer started char* pst_folder; // current folder name int page_sequence; // current page number string conversion; // conversion command vector png_names; // png writer globals bool png_open = false; // is current page open int line_height; // in pixels int char_width; // in pixels int col_number, col_max; // in characters int line_number, line_max; // lines per page int x_position, y_position; // in pixels int black, red; // text colors gdImagePtr image; // current gd image const int DPI = 300; const double sz = 10.0; const int margin = DPI/2; const int LINE_SIZE = 2000; const int PAGE_WIDTH = DPI*17/2; const int PAGE_HEIGHT = DPI*11; // max size of the c_time char*. It will store the date of the email #define C_TIME_SIZE 500 static void open_png(); static void close_png(); static void version(); static void version() { printf("pst2dii v%s\n", VERSION); #if BYTE_ORDER == BIG_ENDIAN printf("Big Endian implementation being used.\n"); #elif BYTE_ORDER == LITTLE_ENDIAN printf("Little Endian implementation being used.\n"); #else # error "Byte order not supported by this library" #endif } static void usage(); static void usage() { version(); printf("Usage: %s -f ttf-font-file [OPTIONS] {PST FILENAME}\n", prog_name); printf("\t-f ttf-font-file \t- Set the font file\n"); printf("OPTIONS:\n"); printf("\t-B bates-prefix \t- Set the bates prefix string\n"); printf("\t-O dii-output-file\t- Set the dii load file output filename\n"); printf("\t-V \t- Version. Display program version\n"); printf("\t-b bates-number \t- Set the starting bates sequence number\n"); printf("\t-c bates-color \t- Specify the color of the bates stamps as 6 digit hex\n"); printf("\t-d filename \t- Debug to file.\n"); printf("\t-h \t- Help. This screen\n"); printf("\t-o dirname \t- Output directory to write files to.\n"); } static char *removeCR (char *c); static char *removeCR (char *c) { // converts /r/n to /n char *a, *b; DEBUG_ENT("removeCR"); a = b = c; while (*a != '\0') { *b = *a; if (*a != '\r') b++; a++; } *b = '\0'; DEBUG_RET(); return c; } // The sole purpose of this function is to bypass the pseudo-header prologue // that Microsoft Outlook inserts at the beginning of the internet email // headers for emails stored in their "Personal Folders" files. static char *skip_header_prologue(char *headers); static char *skip_header_prologue(char *headers) { const char *bad = "Microsoft Mail Internet Headers"; if (strncmp(headers, bad, strlen(bad)) == 0) { // Found the offensive header prologue char *pc = strchr(headers, '\n'); return pc + 1; } return headers; } static void check_filename(string &fname); static void check_filename(string &fname) { char *t = strdup(fname.c_str()); DEBUG_ENT("check_filename"); if (!t) { DEBUG_RET(); return; } char *tt = t; bool fixed = false; while ((t = strpbrk(t, " /\\:"))) { // while there are characters in the second string that we don't want *t = '_'; //replace them with an underscore fixed = true; } if (fixed) fname = string(tt); free(tt); DEBUG_RET(); } static string write_separate_attachment(string fname, pst_item_attach* current_attach, int attach_num, pst_file* pst); static string write_separate_attachment(string fname, pst_item_attach* current_attach, int attach_num, pst_file* pst) { FILE *fp = NULL; int x = 0; char *temp = NULL; // If there is a long filename (filename2) use that, otherwise // use the 8.3 filename (filename1) char *attach_filename = (current_attach->filename2.str) ? current_attach->filename2.str : current_attach->filename1.str; DEBUG_ENT("write_separate_attachment"); check_filename(fname); const char* f_name = fname.c_str(); DEBUG_INFO(("dirname=%s, pathname=%s, filename=%s\n", output_directory, f_name, attach_filename)); int len = strlen(output_directory) + 1 + strlen(f_name) + 15; if (!attach_filename) { - // generate our own (dummy) filename for the attachement + // generate our own (dummy) filename for the attachment temp = (char*)pst_malloc(len); sprintf(temp, "%s/%s_attach%i", output_directory, f_name, attach_num); } else { // have an attachment name, make sure it's unique temp = (char*)pst_malloc(len+strlen(attach_filename)); do { if (fp) fclose(fp); if (x == 0) sprintf(temp, "%s/%s_%s", output_directory, f_name, attach_filename); else sprintf(temp, "%s/%s_%s-%i", output_directory, f_name, attach_filename, x); } while ((fp = fopen(temp, "r")) && ++x < 99999999); if (x > 99999999) { DIE(("error finding attachment name. exhausted possibilities to %s\n", temp)); } } DEBUG_INFO(("Saving attachment to %s\n", temp)); if (!(fp = fopen(temp, "wb"))) { DEBUG_WARN(("write_separate_attachment: Cannot open attachment save file \"%s\"\n", temp)); } else { (void)pst_attach_to_file(pst, current_attach, fp); fclose(fp); } string rc(temp); if (temp) free(temp); DEBUG_RET(); return rc; } static void print_pdf_short(const char *line, int len, int color); static void print_pdf_short(const char *line, int len, int color) { if (line_number >= line_max) { close_png(); open_png(); } int brect[8]; gdFTStringExtra strex; strex.flags = gdFTEX_RESOLUTION; strex.linespacing = 1.20; strex.charmap = 0; strex.hdpi = DPI; strex.vdpi = DPI; char xline[len+1]; memcpy(xline, line, len); xline[len] = '\0'; char *p; char *l = xline; while ((p = strchr(l, '&'))) { *p = '\0'; char *err = gdImageStringFTEx(image, &brect[0], color, font_file, sz, 0.0, x_position, y_position, l, &strex); if (err) printf("%s", err); x_position += (brect[2]-brect[6]); l = p+1; err = gdImageStringFTEx(image, &brect[0], color, font_file, sz, 0.0, x_position, y_position, (char*)"&", &strex); if (err) printf("%s", err); x_position += (brect[2]-brect[6]); } char *err = gdImageStringFTEx(image, &brect[0], color, font_file, sz, 0.0, x_position, y_position, l, &strex); if (err) printf("%s", err); x_position += (brect[2]-brect[6]); col_number += len; } static void new_line(); static void new_line() { y_position += line_height; line_number += 1; x_position = margin; col_number = 0; } static void print_pdf_single(const char *line, int color); static void print_pdf_single(const char *line, int color) { while (*line == '\t') { char blanks[5]; memset(blanks, ' ', 5); print_pdf_short(blanks, 4, color); line++; if (col_number >= col_max) new_line(); } int n = strlen(line); while (n) { int m = col_max - col_number; // number of chars that will fit on this line m = (n > m) ? m : n; print_pdf_short(line, m, color); line += m; n -= m; if (n) new_line(); } } static void print_pdf_only(char *line, int color); static void print_pdf_only(char *line, int color) { char *p; while ((p = strchr(line, '\n'))) { *p = '\0'; print_pdf_single(line, color); *p = '\n'; line = p+1; new_line(); } print_pdf_single(line, color); } static void print_pdf(char *line); static void print_pdf(char *line) { pst_fwrite(line, 1, strlen(line), dii_file); print_pdf_only(line, black); } static void open_png() { if (!png_open) { png_open = true; int brect[8]; image = gdImageCreate(PAGE_WIDTH, PAGE_HEIGHT); gdImageColorAllocate(image, 255, 255, 255); // background color first one allocated black = gdImageColorAllocate(image, 0, 0, 0); int r = (bates_color & 0xff0000) >> 16; int g = (bates_color & 0x00ff00) >> 8; int b = (bates_color & 0x0000ff); red = gdImageColorAllocate(image, r, g, b); gdFTStringExtra strex; strex.flags = gdFTEX_RESOLUTION; strex.linespacing = 1.20; strex.charmap = 0; strex.hdpi = DPI; strex.vdpi = DPI; char line[LINE_SIZE]; char *err = gdImageStringFTEx(NULL, &brect[0], black, font_file, sz, 0.0, margin, margin, (char*)"LMgqQ", &strex); if (err) printf("%s", err); line_height = (brect[3]-brect[7]) * 12/10; char_width = (brect[2]-brect[6]) / 5; col_number = 0; col_max = (PAGE_WIDTH - margin*2) / char_width; line_number = 0; line_max = (PAGE_HEIGHT - margin*2) / line_height; x_position = margin; y_position = margin + line_height; snprintf(line, sizeof(line), "%s%06d\n", bates_prefix, bates_index++); print_pdf_only(line, red); print_pdf_only(pst_folder, red); } } static void close_png() { if (png_open) { png_open = false; char fn[PATH_MAX]; snprintf(fn, sizeof(fn), "page%d.png", ++page_sequence); FILE *pngout = fopen(fn, "wb"); if (pngout) { gdImagePng(image, pngout); fclose(pngout); } gdImageDestroy(image); // free memory png_names.push_back(fn); conversion += string(" ") + fn; } } static void open_pdf(char *line); static void open_pdf(char *line) { pst_folder = line; page_sequence = 0; conversion = string(convert); png_names.clear(); open_png(); snprintf(pdf_name, sizeof(pdf_name), "dii%06d", ++email_sequence); fprintf(dii_file, "\n@T %s\n", pdf_name); snprintf(pdf_name, sizeof(pdf_name), "%s/dii%06d.pdf", output_directory, email_sequence); } static void close_pdf(); static void close_pdf() { close_png(); conversion += string(" ") + pdf_name; (void)system(conversion.c_str()); for (vector::iterator i=png_names.begin(); i!=png_names.end(); i++) { remove((*i).c_str()); } fprintf(dii_file, "@D %s\n", pdf_name); } static void write_simple(const char *tag, const char *value); static void write_simple(const char *tag, const char *value) { if (value) fprintf(dii_file, "@%s %s\n", tag, value); } static void write_simple(const char *tag, string value); static void write_simple(const char *tag, string value) { fprintf(dii_file, "@%s %s\n", tag, value.c_str()); } static void write_simple(const char *tag, const char *value, const char *value2); static void write_simple(const char *tag, const char *value, const char *value2) { if (value) { if (value2) fprintf(dii_file, "@%s \"%s\" <%s>\n", tag, value, value2); else fprintf(dii_file, "@%s \"%s\"\n", tag, value); } } static string extract_header(char *headers, const char *field); static string extract_header(char *headers, const char *field) { string rc; int len = strlen(field) + 4; char f[len]; snprintf(f, len, "\n%s: ", field); char *p = strstr(headers, f); if (p) { p += strlen(f); char *n = strchr(p, '\n'); if (n) { *n = '\0'; rc = string(p); *n = '\n'; } else { rc = string(p); } } return rc; } static void write_normal_email(file_ll &f, pst_item* item, pst_file* pst); static void write_normal_email(file_ll &f, pst_item* item, pst_file* pst) { DEBUG_ENT("write_normal_email"); char *soh = NULL; // real start of headers. if (item->email->header.str) { // some of the headers we get from the file are not properly defined. // they can contain some email stuff too. We will cut off the header // when we see a \n\n or \r\n\r\n removeCR(item->email->header.str); char *temp = strstr(item->email->header.str, "\n\n"); if (temp) { DEBUG_INFO(("Found body text in header\n")); temp[1] = '\0'; // stop after first \n } soh = skip_header_prologue(item->email->header.str); } char folder_line[LINE_SIZE]; char line[LINE_SIZE]; // reset pdf writer to new file int bates = bates_index; // save starting index snprintf(folder_line, sizeof(folder_line), "pst folder = %s\n", f.name.c_str()); open_pdf(folder_line); // start printing this email fprintf(dii_file, "@FOLDERNAME %s\n", f.name.c_str()); string myfrom = extract_header(soh, "From"); string myto = extract_header(soh, "To"); string mycc = extract_header(soh, "Cc"); string mybcc = extract_header(soh, "Bcc"); if (myfrom.empty()) write_simple("FROM", item->email->outlook_sender_name.str, item->email->sender_address.str); else write_simple("FROM", myfrom); if (myto.empty()) write_simple("TO", item->email->sentto_address.str, item->email->recip_address.str); else write_simple("TO", myto); if (mycc.empty()) write_simple("CC", item->email->cc_address.str); else write_simple("CC", mycc); if (mybcc.empty()) write_simple("BCC", item->email->bcc_address.str); else write_simple("BCC", mybcc); if (item->email->sent_date) { time_t t = pst_fileTimeToUnixTime(item->email->sent_date); char c_time[C_TIME_SIZE]; strftime(c_time, C_TIME_SIZE, "%F", gmtime(&t)); write_simple("DATESENT", c_time); strftime(c_time, C_TIME_SIZE, "%T+0000", gmtime(&t)); write_simple("TIMESENT", c_time); } if (item->email->arrival_date) { time_t t = pst_fileTimeToUnixTime(item->email->arrival_date); char c_time[C_TIME_SIZE]; strftime(c_time, C_TIME_SIZE, "%F", gmtime(&t)); write_simple("DATERCVD", c_time); strftime(c_time, C_TIME_SIZE, "%T+0000", gmtime(&t)); write_simple("TIMERCVD", c_time); } if (item->subject.str) { write_simple("SUBJECT", item->subject.str); } write_simple("MSGID", item->email->messageid.str); write_simple("READ", (item->flags & 1) ? "Y" : "N"); DEBUG_INFO(("About to print Header\n")); fprintf(dii_file, "@HEADER\n"); if (item && item->subject.str) { DEBUG_INFO(("item->subject = %s\n", item->subject.str)); } if (soh) { // Now, write out the header... print_pdf(soh); int len = strlen(soh); if (!len || (soh[len-1] != '\n')) { snprintf(line, sizeof(line), "\n"); print_pdf(line); } } else { //make up our own headers const char *temp = item->email->outlook_sender.str; if (!temp) temp = ""; snprintf(line, sizeof(line), "From: \"%s\" <%s>\n", item->email->outlook_sender_name.str, temp); print_pdf(line); if (item->subject.str) { snprintf(line, sizeof(line), "Subject: %s\n", item->subject.str); } else { snprintf(line, sizeof(line), "Subject: \n"); } print_pdf(line); snprintf(line, sizeof(line), "To: %s\n", item->email->sentto_address.str); print_pdf(line); if (item->email->cc_address.str) { snprintf(line, sizeof(line), "Cc: %s\n", item->email->cc_address.str); print_pdf(line); } if (item->email->sent_date) { time_t em_time = pst_fileTimeToUnixTime(item->email->sent_date); char c_time[C_TIME_SIZE]; strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); snprintf(line, sizeof(line), "Date: %s\n", c_time); print_pdf(line); } } snprintf(line, sizeof(line), "\n"); print_pdf_only(line, black); fprintf(dii_file, "@HEADER-END\n"); DEBUG_INFO(("About to print Body\n")); fprintf(dii_file, "@EMAIL-BODY\n"); if (item->body.str) { removeCR(item->body.str); print_pdf(item->body.str); } else if (item->email->htmlbody.str) { removeCR(item->email->htmlbody.str); print_pdf(item->email->htmlbody.str); } else if (item->email->encrypted_body.data || item->email->encrypted_htmlbody.data) { char ln[LINE_SIZE]; snprintf(ln, sizeof(ln), "%s", "The body of this email is encrypted. This isn't supported yet, but the body is now an attachment\n"); print_pdf(ln); } fprintf(dii_file, "@EMAIL-END\n"); int attach_num = 0; for (pst_item_attach* attach = item->attach; attach; attach = attach->next) { pst_convert_utf8_null(item, &attach->filename1); pst_convert_utf8_null(item, &attach->filename2); pst_convert_utf8_null(item, &attach->mimetype); DEBUG_INFO(("Attempting Attachment encoding\n")); if (attach->data.data || attach->i_id) { string an = write_separate_attachment(f.name, attach, ++attach_num, pst); fprintf(dii_file, "@EATTACH %s\n", an.c_str()); } } close_pdf(); fprintf(dii_file, "@BATESBEG %d\n", bates); fprintf(dii_file, "@BATESEND %d\n", bates_index-1); DEBUG_RET(); } static void create_enter_dir(file_ll &f, file_ll *parent, pst_item *item); static void create_enter_dir(file_ll &f, file_ll *parent, pst_item *item) { pst_convert_utf8(item, &item->file_as); f.type = item->type; f.stored_count = (item->folder) ? item->folder->item_count : 0; f.name = ((parent) ? parent->name + "/" : "") + string(item->file_as.str); } static void close_enter_dir(file_ll &f); static void close_enter_dir(file_ll &f) { } static void process(pst_item *outeritem, file_ll *parent, pst_desc_tree *d_ptr); static void process(pst_item *outeritem, file_ll *parent, pst_desc_tree *d_ptr) { file_ll ff; pst_item *item = NULL; DEBUG_ENT("process"); create_enter_dir(ff, parent, outeritem); for (; d_ptr; d_ptr = d_ptr->next) { if (d_ptr->desc) { item = pst_parse_item(&pstfile, d_ptr, NULL); DEBUG_INFO(("item pointer is %p\n", item)); if (item) { if (item->folder && item->file_as.str && d_ptr->child ) { //if this is a non-empty folder, we want to recurse into it fprintf(stderr, "entering folder %s\n", item->file_as.str); process(item, &ff, d_ptr->child); } else if (item->email && (item->type == PST_TYPE_NOTE || item->type == PST_TYPE_SCHEDULE || item->type == PST_TYPE_REPORT)) { ff.email_count++; write_normal_email(ff, item, &pstfile); } else { ff.skip_count++; // other mapi objects } pst_freeItem(item); } else { ff.skip_count++; DEBUG_INFO(("A NULL item was seen\n")); } } } close_enter_dir(ff); DEBUG_RET(); } int main(int argc, char* const* argv) { pst_desc_tree *d_ptr; char *fname = NULL; char c; char *d_log = NULL; prog_name = argv[0]; pst_item *item = NULL; while ((c = getopt(argc, argv, "B:b:c:d:f:o:O:Vh"))!= -1) { switch (c) { case 'B': bates_prefix = optarg; break; case 'b': bates_index = atoi(optarg); break; case 'c': bates_color = (int)strtol(optarg, (char**)NULL, 16); break; case 'f': font_file = optarg; break; case 'o': output_directory = optarg; break; case 'O': output_file = optarg; break; case 'd': d_log = optarg; break; case 'h': usage(); exit(0); break; case 'V': version(); exit(0); break; default: usage(); exit(1); break; } } if (!font_file) { usage(); exit(1); } if (argc > optind) { fname = argv[optind]; } else { usage(); exit(2); } #ifdef DEBUG_ALL // force a log file if (!d_log) d_log = "pst2dii.log"; #endif DEBUG_INIT(d_log, NULL); DEBUG_ENT("main"); RET_DERROR(pst_open(&pstfile, fname, NULL), 1, ("Error opening File\n")); RET_DERROR(pst_load_index(&pstfile), 2, ("Index Error\n")); pst_load_extended_attributes(&pstfile); d_ptr = pstfile.d_head; // first record is main record item = (pst_item*)pst_parse_item(&pstfile, d_ptr, NULL); if (!item || !item->message_store) { DEBUG_RET(); DIE(("Could not get root record\n")); } d_ptr = pst_getTopOfFolders(&pstfile, item); if (!d_ptr) { DEBUG_RET(); DIE(("Top of folders record not found. Cannot continue\n")); } dii_file = fopen(output_file, "wb"); if (dii_file) { process(item, NULL, d_ptr->child); // do the children of TOPF pst_freeItem(item); pst_close(&pstfile); fclose(dii_file); } DEBUG_RET(); return 0; }