diff --git a/src/readpst.c b/src/readpst.c index d9998d9..5e6e8db 100644 --- a/src/readpst.c +++ b/src/readpst.c @@ -1,2143 +1,2143 @@ /*** * readpst.c * Part of the LibPST project * Written by David Smith * dave.s@earthcorp.com */ #include "define.h" #include "lzfu.h" #define OUTPUT_TEMPLATE "%s" #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" #define KMAIL_INDEX ".%s.index" #define SEP_MAIL_FILE_TEMPLATE "%i%s" // max size of the c_time char*. It will store the date of the email #define C_TIME_SIZE 500 struct file_ll { char *name; char *dname; FILE * output; int32_t stored_count; int32_t item_count; int32_t skip_count; int32_t type; }; int grim_reaper(); pid_t try_fork(char* folder); void process(pst_item *outeritem, pst_desc_tree *d_ptr); void write_email_body(FILE *f, char *body); void removeCR(char *c); void usage(); void version(); char* mk_kmail_dir(char* fname); int close_kmail_dir(); char* mk_recurse_dir(char* dir, int32_t folder_type); int close_recurse_dir(); char* mk_separate_dir(char *dir); int close_separate_dir(); int mk_separate_file(struct file_ll *f, char *extension); char* my_stristr(char *haystack, char *needle); void check_filename(char *fname); void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst); void write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, char** extra_mime_headers); void write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst); int valid_headers(char *header); void header_has_field(char *header, char *field, int *flag); void header_get_subfield(char *field, const char *subfield, char *body_subfield, size_t size_subfield); char* header_get_field(char *header, char *field); char* header_end_field(char *field); void header_strip_field(char *header, char *field); int test_base64(char *body); void find_html_charset(char *html, char *charset, size_t charsetlen); void find_rfc822_headers(char** extra_mime_headers); void write_body_part(FILE* f_output, pst_string *body, char *mime, char *charset, char *boundary, pst_file* pst); void write_schedule_part_data(FILE* f_output, pst_item* item, const char* sender, const char* method); void write_schedule_part(FILE* f_output, pst_item* item, const char* sender, const char* boundary); void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf, char** extra_mime_headers); void write_vcard(FILE* f_output, pst_item *item, pst_item_contact* contact, char comment[]); int write_extra_categories(FILE* f_output, pst_item* item); void write_journal(FILE* f_output, pst_item* item); void write_appointment(FILE* f_output, pst_item *item); void create_enter_dir(struct file_ll* f, pst_item *item); void close_enter_dir(struct file_ll *f); const char* prog_name; char* output_dir = "."; char* kmail_chdir = NULL; // Normal mode just creates mbox format files in the current directory. Each file is named // the same as the folder's name that it represents #define MODE_NORMAL 0 // KMail mode creates a directory structure suitable for being used directly // by the KMail application #define MODE_KMAIL 1 // recurse mode creates a directory structure like the PST file. Each directory // contains only one file which stores the emails in mboxrd format. #define MODE_RECURSE 2 // separate mode creates the same directory structure as recurse. The emails are stored in // separate files, numbering from 1 upward. Attachments belonging to the emails are // saved as email_no-filename (e.g. 1-samplefile.doc or 1-Attachment2.zip) #define MODE_SEPARATE 3 // Output Normal just prints the standard information about what is going on #define OUTPUT_NORMAL 0 // Output Quiet is provided so that only errors are printed #define OUTPUT_QUIET 1 // default mime-type for attachments that have a null mime-type #define MIME_TYPE_DEFAULT "application/octet-stream" #define RFC822 "message/rfc822" // output mode for contacts #define CMODE_VCARD 0 #define CMODE_LIST 1 // output mode for deleted items #define DMODE_EXCLUDE 0 #define DMODE_INCLUDE 1 // Output type mode flags #define OTMODE_EMAIL 1 #define OTMODE_APPOINTMENT 2 #define OTMODE_JOURNAL 4 #define OTMODE_CONTACT 8 // output settings for RTF bodies // filename for the attachment #define RTF_ATTACH_NAME "rtf-body.rtf" // mime type for the attachment #define RTF_ATTACH_TYPE "application/rtf" // global settings int mode = MODE_NORMAL; int mode_MH = 0; // a submode of MODE_SEPARATE int mode_EX = 0; // a submode of MODE_SEPARATE int mode_thunder = 0; // a submode of MODE_RECURSE int output_mode = OUTPUT_NORMAL; int contact_mode = CMODE_VCARD; int deleted_mode = DMODE_EXCLUDE; int output_type_mode = 0xff; // Default to all. int contact_mode_specified = 0; int overwrite = 0; int save_rtf_body = 1; int file_name_len = 10; // enough room for MODE_SPEARATE file name pst_file pstfile; regex_t meta_charset_pattern; int number_processors = 1; // number of cpus we have int max_children = 0; // based on number of cpus and command line args int max_child_specified = 0;// have command line arg -j int active_children; // number of children of this process, cannot be larger than max_children pid_t* child_processes; // setup by main(), and at the start of new child process #ifdef HAVE_SEMAPHORE_H int shared_memory_id; sem_t* global_children = NULL; sem_t* output_mutex = NULL; #endif int grim_reaper(int waitall) { int available = 0; #ifdef HAVE_FORK #ifdef HAVE_SEMAPHORE_H if (global_children) { //sem_getvalue(global_children, &available); //printf("grim reaper %s for pid %d (parent %d) with %d children, %d available\n", (waitall) ? "all" : "", getpid(), getppid(), active_children, available); //fflush(stdout); int i,j; for (i=0; inext) { DEBUG_INFO(("New item record\n")); if (!d_ptr->desc) { ff.skip_count++; DEBUG_WARN(("ERROR item's desc record is NULL\n")); continue; } DEBUG_INFO(("Desc Email ID %#"PRIx64" [d_ptr->d_id = %#"PRIx64"]\n", d_ptr->desc->i_id, d_ptr->d_id)); item = pst_parse_item(&pstfile, d_ptr, NULL); DEBUG_INFO(("About to process item\n")); if (!item) { ff.skip_count++; DEBUG_INFO(("A NULL item was seen\n")); continue; } if (item->subject.str) { DEBUG_INFO(("item->subject = %s\n", item->subject.str)); } if (item->folder && item->file_as.str) { DEBUG_INFO(("Processing Folder \"%s\"\n", item->file_as.str)); if (output_mode != OUTPUT_QUIET) { pst_debug_lock(); printf("Processing Folder \"%s\"\n", item->file_as.str); fflush(stdout); pst_debug_unlock(); } ff.item_count++; if (d_ptr->child && (deleted_mode == DMODE_INCLUDE || strcasecmp(item->file_as.str, "Deleted Items"))) { //if this is a non-empty folder other than deleted items, we want to recurse into it pid_t parent = getpid(); pid_t child = try_fork(item->file_as.str); if (child == 0) { // we are the child process, or the original parent if no children were available pid_t me = getpid(); process(item, d_ptr->child); #ifdef HAVE_FORK #ifdef HAVE_SEMAPHORE_H if (me != parent) { // we really were a child, forked for the sole purpose of processing this folder // free my child count slot before really exiting, since // all I am doing here is waiting for my children to exit sem_post(global_children); grim_reaper(1); // wait for all my child processes to exit exit(0); // really exit } #endif #endif } } } else if (item->contact && (item->type == PST_TYPE_CONTACT)) { DEBUG_INFO(("Processing Contact\n")); if (!(output_type_mode & OTMODE_CONTACT)) { ff.skip_count++; DEBUG_INFO(("skipping contact: not in output type list\n")); } else { if (!ff.type) ff.type = item->type; if ((ff.type != PST_TYPE_CONTACT) && (mode != MODE_SEPARATE)) { ff.skip_count++; DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type)); } else { ff.item_count++; if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".vcf" : ""); if (contact_mode == CMODE_VCARD) { pst_convert_utf8_null(item, &item->comment); write_vcard(ff.output, item, item->contact, item->comment.str); } else { pst_convert_utf8(item, &item->contact->fullname); pst_convert_utf8(item, &item->contact->address1); fprintf(ff.output, "%s <%s>\n", item->contact->fullname.str, item->contact->address1.str); } } } } else if (item->email && ((item->type == PST_TYPE_NOTE) || (item->type == PST_TYPE_SCHEDULE) || (item->type == PST_TYPE_REPORT))) { DEBUG_INFO(("Processing Email\n")); if (!(output_type_mode & OTMODE_EMAIL)) { ff.skip_count++; DEBUG_INFO(("skipping email: not in output type list\n")); } else { if (!ff.type) ff.type = item->type; if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT) && (mode != MODE_SEPARATE)) { ff.skip_count++; DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type)); } else { - ff.item_count++; char *extra_mime_headers = NULL; + ff.item_count++; + if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".eml" : ""); if (mode == MODE_SEPARATE) { // process this single email message, possibly forking pid_t parent = getpid(); pid_t child = try_fork(item->file_as.str); if (child == 0) { // we are the child process, or the original parent if no children were available pid_t me = getpid(); - mk_separate_file(&ff, (mode_EX) ? ".eml" : ""); write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers); #ifdef HAVE_FORK #ifdef HAVE_SEMAPHORE_H if (me != parent) { // we really were a child, forked for the sole purpose of processing this message // free my child count slot before really exiting, since // all I am doing here is waiting for my children to exit sem_post(global_children); grim_reaper(1); // wait for all my child processes to exit - there should not be any exit(0); // really exit } #endif #endif } } else { // process this single email message, cannot fork since not separate mode write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers); } } } } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) { DEBUG_INFO(("Processing Journal Entry\n")); if (!(output_type_mode & OTMODE_JOURNAL)) { ff.skip_count++; DEBUG_INFO(("skipping journal entry: not in output type list\n")); } else { if (!ff.type) ff.type = item->type; if ((ff.type != PST_TYPE_JOURNAL) && (mode != MODE_SEPARATE)) { ff.skip_count++; DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type)); } else { ff.item_count++; if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : ""); write_journal(ff.output, item); fprintf(ff.output, "\n"); } } } else if (item->appointment && (item->type == PST_TYPE_APPOINTMENT)) { DEBUG_INFO(("Processing Appointment Entry\n")); if (!(output_type_mode & OTMODE_APPOINTMENT)) { ff.skip_count++; DEBUG_INFO(("skipping appointment: not in output type list\n")); } else { if (!ff.type) ff.type = item->type; if ((ff.type != PST_TYPE_APPOINTMENT) && (mode != MODE_SEPARATE)) { ff.skip_count++; DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type)); } else { ff.item_count++; if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : ""); write_schedule_part_data(ff.output, item, NULL, NULL); fprintf(ff.output, "\n"); } } } else if (item->message_store) { // there should only be one message_store, and we have already done it ff.skip_count++; DEBUG_INFO(("item with message store content, type %i %s folder type %i, skipping it\n", item->type, item->ascii_type, ff.type)); } else { ff.skip_count++; DEBUG_INFO(("Unknown item type %i (%s) name (%s)\n", item->type, item->ascii_type, item->file_as.str)); } pst_freeItem(item); } close_enter_dir(&ff); DEBUG_RET(); } int main(int argc, char* const* argv) { pst_item *item = NULL; pst_desc_tree *d_ptr; char * fname = NULL; char *d_log = NULL; int c,x; char *temp = NULL; //temporary char pointer prog_name = argv[0]; time_t now = time(NULL); srand((unsigned)now); if (regcomp(&meta_charset_pattern, "]*content=\"[^>]*charset=([^>\";]*)[\";]", REG_ICASE | REG_EXTENDED)) { printf("cannot compile regex pattern to find content charset in html bodies\n"); exit(3); } // command-line option handling while ((c = getopt(argc, argv, "bc:Dd:ehj:kMo:qrSt:uVw"))!= -1) { switch (c) { case 'b': save_rtf_body = 0; break; case 'c': if (optarg && optarg[0]=='v') { contact_mode=CMODE_VCARD; contact_mode_specified = 1; } else if (optarg && optarg[0]=='l') { contact_mode=CMODE_LIST; contact_mode_specified = 1; } else { usage(); exit(0); } break; case 'D': deleted_mode = DMODE_INCLUDE; break; case 'd': d_log = optarg; break; case 'h': usage(); exit(0); break; case 'j': max_children = atoi(optarg); max_child_specified = 1; break; case 'k': mode = MODE_KMAIL; break; case 'M': mode = MODE_SEPARATE; mode_MH = 1; mode_EX = 0; break; case 'e': mode = MODE_SEPARATE; mode_MH = 1; mode_EX = 1; file_name_len = 14; break; case 'o': output_dir = optarg; break; case 'q': output_mode = OUTPUT_QUIET; break; case 'r': mode = MODE_RECURSE; mode_thunder = 0; break; case 'S': mode = MODE_SEPARATE; mode_MH = 0; mode_EX = 0; break; case 't': // email, appointment, contact, other if (!optarg) { usage(); exit(0); } temp = optarg; output_type_mode = 0; while (*temp > 0) { switch (temp[0]) { case 'e': output_type_mode |= OTMODE_EMAIL; break; case 'a': output_type_mode |= OTMODE_APPOINTMENT; break; case 'j': output_type_mode |= OTMODE_JOURNAL; break; case 'c': output_type_mode |= OTMODE_CONTACT; break; default: usage(); exit(0); break; } temp++; } break; case 'u': mode = MODE_RECURSE; mode_thunder = 1; break; case 'V': version(); exit(0); break; case 'w': overwrite = 1; break; default: usage(); exit(1); break; } } if (argc > optind) { fname = argv[optind]; } else { usage(); exit(2); } #ifdef _SC_NPROCESSORS_ONLN number_processors = sysconf(_SC_NPROCESSORS_ONLN); #endif max_children = (max_child_specified) ? max_children : number_processors * 4; active_children = 0; child_processes = (pid_t *)pst_malloc(sizeof(pid_t) * max_children); memset(child_processes, 0, sizeof(pid_t) * max_children); #ifdef HAVE_SEMAPHORE_H if (max_children) { shared_memory_id = shmget(IPC_PRIVATE, sizeof(sem_t)*2, 0777); if (shared_memory_id >= 0) { global_children = (sem_t *)shmat(shared_memory_id, NULL, 0); if (global_children == (sem_t *)-1) global_children = NULL; if (global_children) { output_mutex = &(global_children[1]); sem_init(global_children, 1, max_children); sem_init(output_mutex, 1, 1); } shmctl(shared_memory_id, IPC_RMID, NULL); } } #endif #ifdef DEBUG_ALL // force a log file if (!d_log) d_log = "readpst.log"; #endif // defined DEBUG_ALL #ifdef HAVE_SEMAPHORE_H DEBUG_INIT(d_log, output_mutex); #else DEBUG_INIT(d_log, NULL); #endif DEBUG_ENT("main"); if (output_mode != OUTPUT_QUIET) printf("Opening PST file and indexes...\n"); RET_DERROR(pst_open(&pstfile, fname), 1, ("Error opening File\n")); RET_DERROR(pst_load_index(&pstfile), 2, ("Index Error\n")); pst_load_extended_attributes(&pstfile); if (chdir(output_dir)) { x = errno; pst_close(&pstfile); DEBUG_RET(); DIE(("Cannot change to output dir %s: %s\n", output_dir, strerror(x))); } d_ptr = pstfile.d_head; // first record is main record item = pst_parse_item(&pstfile, d_ptr, NULL); if (!item || !item->message_store) { DEBUG_RET(); DIE(("Could not get root record\n")); } // default the file_as to the same as the main filename if it doesn't exist if (!item->file_as.str) { if (!(temp = strrchr(fname, '/'))) if (!(temp = strrchr(fname, '\\'))) temp = fname; else temp++; // get past the "\\" else temp++; // get past the "/" item->file_as.str = (char*)pst_malloc(strlen(temp)+1); strcpy(item->file_as.str, temp); item->file_as.is_utf8 = 1; DEBUG_INFO(("file_as was blank, so am using %s\n", item->file_as.str)); } DEBUG_INFO(("Root Folder Name: %s\n", item->file_as.str)); d_ptr = pst_getTopOfFolders(&pstfile, item); if (!d_ptr) { DEBUG_RET(); DIE(("Top of folders record not found. Cannot continue\n")); } process(item, d_ptr->child); // do the children of TOPF grim_reaper(1); // wait for all child processes pst_freeItem(item); pst_close(&pstfile); DEBUG_RET(); #ifdef HAVE_SEMAPHORE_H if (global_children) { sem_destroy(global_children); sem_destroy(output_mutex); shmdt(global_children); } #endif regfree(&meta_charset_pattern); return 0; } void write_email_body(FILE *f, char *body) { char *n = body; DEBUG_ENT("write_email_body"); if (mode != MODE_SEPARATE) { while (n) { char *p = body; while (*p == '>') p++; if (strncmp(p, "From ", 5) == 0) fprintf(f, ">"); if ((n = strchr(body, '\n'))) { n++; pst_fwrite(body, n-body, 1, f); //write just a line body = n; } } } pst_fwrite(body, strlen(body), 1, f); DEBUG_RET(); } void removeCR (char *c) { // converts \r\n to \n char *a, *b; DEBUG_ENT("removeCR"); a = b = c; while (*a != '\0') { *b = *a; if (*a != '\r') b++; a++; } *b = '\0'; DEBUG_RET(); } void usage() { DEBUG_ENT("usage"); version(); printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); printf("OPTIONS:\n"); printf("\t-V\t- Version. Display program version\n"); printf("\t-D\t- Include deleted items in output\n"); printf("\t-M\t- Write emails in the MH (rfc822) format\n"); printf("\t-S\t- Separate. Write emails in the separate format\n"); printf("\t-b\t- Don't save RTF-Body attachments\n"); printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); printf("\t-d \t- Debug to file.\n"); printf("\t-e\t- As with -M, but include extensions on output files\n"); printf("\t-h\t- Help. This screen\n"); printf("\t-j \t- Number of parallel jobs to run\n"); printf("\t-k\t- KMail. Output in kmail format\n"); printf("\t-o \t- Output directory to write files to. CWD is changed *after* opening pst file\n"); printf("\t-q\t- Quiet. Only print error messages\n"); printf("\t-r\t- Recursive. Output in a recursive format\n"); printf("\t-t[eajc]\t- Set the output type list. e = email, a = attachment, j = journal, c = contact\n"); printf("\t-u\t- Thunderbird mode. Write two extra .size and .type files\n"); printf("\t-w\t- Overwrite any output mbox files\n"); printf("\n"); printf("Only one of -k -M -r -S should be specified\n"); DEBUG_RET(); } void version() { DEBUG_ENT("version"); printf("ReadPST / LibPST v%s\n", VERSION); #if BYTE_ORDER == BIG_ENDIAN printf("Big Endian implementation being used.\n"); #elif BYTE_ORDER == LITTLE_ENDIAN printf("Little Endian implementation being used.\n"); #else # error "Byte order not supported by this library" #endif DEBUG_RET(); } char *mk_kmail_dir(char *fname) { //change to that directory //make a directory based on OUTPUT_KMAIL_DIR_TEMPLATE //allocate space for OUTPUT_TEMPLATE and form a char* with fname //return that value char *dir, *out_name, *index; int x; DEBUG_ENT("mk_kmail_dir"); if (kmail_chdir && chdir(kmail_chdir)) { x = errno; DIE(("mk_kmail_dir: Cannot change to directory %s: %s\n", kmail_chdir, strerror(x))); } dir = pst_malloc(strlen(fname)+strlen(OUTPUT_KMAIL_DIR_TEMPLATE)+1); sprintf(dir, OUTPUT_KMAIL_DIR_TEMPLATE, fname); check_filename(dir); if (D_MKDIR(dir)) { if (errno != EEXIST) { // not an error because it exists x = errno; DIE(("mk_kmail_dir: Cannot create directory %s: %s\n", dir, strerror(x))); } } kmail_chdir = pst_realloc(kmail_chdir, strlen(dir)+1); strcpy(kmail_chdir, dir); free (dir); //we should remove any existing indexes created by KMail, cause they might be different now index = pst_malloc(strlen(fname)+strlen(KMAIL_INDEX)+1); sprintf(index, KMAIL_INDEX, fname); unlink(index); free(index); out_name = pst_malloc(strlen(fname)+strlen(OUTPUT_TEMPLATE)+1); sprintf(out_name, OUTPUT_TEMPLATE, fname); DEBUG_RET(); return out_name; } int close_kmail_dir() { // change .. int x; DEBUG_ENT("close_kmail_dir"); if (kmail_chdir) { //only free kmail_chdir if not NULL. do not change directory free(kmail_chdir); kmail_chdir = NULL; } else { if (chdir("..")) { x = errno; DIE(("close_kmail_dir: Cannot move up dir (..): %s\n", strerror(x))); } } DEBUG_RET(); return 0; } // this will create a directory by that name, // then make an mbox file inside that directory. char *mk_recurse_dir(char *dir, int32_t folder_type) { int x; char *out_name; DEBUG_ENT("mk_recurse_dir"); check_filename(dir); if (D_MKDIR (dir)) { if (errno != EEXIST) { // not an error because it exists x = errno; DIE(("mk_recurse_dir: Cannot create directory %s: %s\n", dir, strerror(x))); } } if (chdir (dir)) { x = errno; DIE(("mk_recurse_dir: Cannot change to directory %s: %s\n", dir, strerror(x))); } switch (folder_type) { case PST_TYPE_APPOINTMENT: out_name = strdup("calendar"); break; case PST_TYPE_CONTACT: out_name = strdup("contacts"); break; case PST_TYPE_JOURNAL: out_name = strdup("journal"); break; case PST_TYPE_STICKYNOTE: case PST_TYPE_TASK: case PST_TYPE_NOTE: case PST_TYPE_OTHER: case PST_TYPE_REPORT: default: out_name = strdup("mbox"); break; } DEBUG_RET(); return out_name; } int close_recurse_dir() { int x; DEBUG_ENT("close_recurse_dir"); if (chdir("..")) { x = errno; DIE(("close_recurse_dir: Cannot go up dir (..): %s\n", strerror(x))); } DEBUG_RET(); return 0; } char *mk_separate_dir(char *dir) { size_t dirsize = strlen(dir) + 10; char dir_name[dirsize]; int x = 0, y = 0; DEBUG_ENT("mk_separate_dir"); do { if (y == 0) snprintf(dir_name, dirsize, "%s", dir); else snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y, ""); // enough for 9 digits allocated above check_filename(dir_name); DEBUG_INFO(("about to try creating %s\n", dir_name)); if (D_MKDIR(dir_name)) { if (errno != EEXIST) { // if there is an error, and it doesn't already exist x = errno; DIE(("mk_separate_dir: Cannot create directory %s: %s\n", dir, strerror(x))); } } else { break; } y++; } while (overwrite == 0); if (chdir(dir_name)) { x = errno; DIE(("mk_separate_dir: Cannot change to directory %s: %s\n", dir, strerror(x))); } if (overwrite) { // we should probably delete all files from this directory #if !defined(WIN32) && !defined(__CYGWIN__) DIR * sdir = NULL; struct dirent *dirent = NULL; struct stat filestat; if (!(sdir = opendir("./"))) { DEBUG_WARN(("mk_separate_dir: Cannot open dir \"%s\" for deletion of old contents\n", "./")); } else { while ((dirent = readdir(sdir))) { if (lstat(dirent->d_name, &filestat) != -1) if (S_ISREG(filestat.st_mode)) { if (unlink(dirent->d_name)) { y = errno; DIE(("mk_separate_dir: unlink returned error on file %s: %s\n", dirent->d_name, strerror(y))); } } } } #endif } // we don't return a filename here cause it isn't necessary. DEBUG_RET(); return NULL; } int close_separate_dir() { int x; DEBUG_ENT("close_separate_dir"); if (chdir("..")) { x = errno; DIE(("close_separate_dir: Cannot go up dir (..): %s\n", strerror(x))); } DEBUG_RET(); return 0; } int mk_separate_file(struct file_ll *f, char *extension) { DEBUG_ENT("mk_separate_file"); DEBUG_INFO(("opening next file to save email\n")); if (f->item_count > 999999999) { // bigger than nine 9's DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n")); } sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count, extension); if (f->output) fclose(f->output); f->output = NULL; check_filename(f->name); if (!(f->output = fopen(f->name, "w"))) { DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name)); } DEBUG_RET(); return 0; } char *my_stristr(char *haystack, char *needle) { // my_stristr varies from strstr in that its searches are case-insensitive char *x=haystack, *y=needle, *z = NULL; if (!haystack || !needle) { return NULL; } while (*y != '\0' && *x != '\0') { if (tolower(*y) == tolower(*x)) { // move y on one y++; if (!z) { z = x; // store first position in haystack where a match is made } } else { y = needle; // reset y to the beginning of the needle z = NULL; // reset the haystack storage point } x++; // advance the search in the haystack } // If the haystack ended before our search finished, it's not a match. if (*y != '\0') return NULL; return z; } void check_filename(char *fname) { char *t = fname; DEBUG_ENT("check_filename"); if (!t) { DEBUG_RET(); return; } while ((t = strpbrk(t, "/\\:"))) { // while there are characters in the second string that we don't want *t = '_'; //replace them with an underscore } DEBUG_RET(); } void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst) { FILE *fp = NULL; int x = 0; char *temp = NULL; // If there is a long filename (filename2) use that, otherwise // use the 8.3 filename (filename1) char *attach_filename = (attach->filename2.str) ? attach->filename2.str : attach->filename1.str; DEBUG_ENT("write_separate_attachment"); DEBUG_INFO(("Attachment %s Size is %#"PRIx64", data = %#"PRIxPTR", id %#"PRIx64"\n", attach_filename, (uint64_t)attach->data.size, attach->data.data, attach->i_id)); if (!attach->data.data) { // make sure we can fetch data from the id pst_index_ll *ptr = pst_getID(pst, attach->i_id); if (!ptr) { DEBUG_WARN(("Couldn't find i_id %#"PRIx64". Cannot save attachment to file\n", attach->i_id)); DEBUG_RET(); return; } } check_filename(f_name); if (!attach_filename) { // generate our own (dummy) filename for the attachement temp = pst_malloc(strlen(f_name)+15); sprintf(temp, "%s-attach%i", f_name, attach_num); } else { // have an attachment name, make sure it's unique temp = pst_malloc(strlen(f_name)+strlen(attach_filename)+15); do { if (fp) fclose(fp); if (x == 0) sprintf(temp, "%s-%s", f_name, attach_filename); else sprintf(temp, "%s-%s-%i", f_name, attach_filename, x); } while ((fp = fopen(temp, "r")) && ++x < 99999999); if (x > 99999999) { DIE(("error finding attachment name. exhausted possibilities to %s\n", temp)); } } DEBUG_INFO(("Saving attachment to %s\n", temp)); if (!(fp = fopen(temp, "w"))) { DEBUG_WARN(("write_separate_attachment: Cannot open attachment save file \"%s\"\n", temp)); } else { (void)pst_attach_to_file(pst, attach, fp); fclose(fp); } if (temp) free(temp); DEBUG_RET(); } void write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, char** extra_mime_headers) { pst_index_ll *ptr; DEBUG_ENT("write_embedded_message"); ptr = pst_getID(pf, attach->i_id); pst_desc_tree d_ptr; d_ptr.d_id = 0; d_ptr.parent_d_id = 0; d_ptr.assoc_tree = NULL; d_ptr.desc = ptr; d_ptr.no_child = 0; d_ptr.prev = NULL; d_ptr.next = NULL; d_ptr.parent = NULL; d_ptr.child = NULL; d_ptr.child_tail = NULL; pst_item *item = pst_parse_item(pf, &d_ptr, attach->id2_head); // It appears that if the embedded message contains an appointment/ // calendar item, pst_parse_item returns NULL due to the presence of // an unexpected reference type of 0x1048, which seems to represent // an array of GUIDs representing a CLSID. It's likely that this is // a reference to an internal Outlook COM class. // Log the skipped item and continue on. if (!item) { DEBUG_WARN(("write_embedded_message: pst_parse_item was unable to parse the embedded message in attachment ID %llu", attach->i_id)); } else { if (!item->email) { DEBUG_WARN(("write_embedded_message: pst_parse_item returned type %d, not an email message", item->type)); } else { fprintf(f_output, "\n--%s\n", boundary); fprintf(f_output, "Content-Type: %s\n\n", attach->mimetype.str); write_normal_email(f_output, "", item, MODE_NORMAL, 0, pf, 0, extra_mime_headers); } pst_freeItem(item); } DEBUG_RET(); } void write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst) { DEBUG_ENT("write_inline_attachment"); DEBUG_INFO(("Attachment Size is %#"PRIx64", data = %#"PRIxPTR", id %#"PRIx64"\n", (uint64_t)attach->data.size, attach->data.data, attach->i_id)); if (!attach->data.data) { // make sure we can fetch data from the id pst_index_ll *ptr = pst_getID(pst, attach->i_id); if (!ptr) { DEBUG_WARN(("Couldn't find ID pointer. Cannot save attachment to file\n")); DEBUG_RET(); return; } } fprintf(f_output, "\n--%s\n", boundary); if (!attach->mimetype.str) { fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT); } else { fprintf(f_output, "Content-Type: %s\n", attach->mimetype.str); } fprintf(f_output, "Content-Transfer-Encoding: base64\n"); if (attach->filename2.str) { // use the long filename, converted to proper encoding if needed. // it is already utf8 pst_rfc2231(&attach->filename2); fprintf(f_output, "Content-Disposition: attachment; \n filename*=%s\n\n", attach->filename2.str); } else if (attach->filename1.str) { // short filename never needs encoding fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach->filename1.str); } else { // no filename is inline fprintf(f_output, "Content-Disposition: inline\n\n"); } (void)pst_attach_to_file_base64(pst, attach, f_output); fprintf(f_output, "\n\n"); DEBUG_RET(); } int valid_headers(char *header) { // headers are sometimes really bogus - they seem to be fragments of the // message body, so we only use them if they seem to be real rfc822 headers. // this list is composed of ones that we have seen in real pst files. // there are surely others. the problem is - given an arbitrary character // string, is it a valid (or even reasonable) set of rfc822 headers? if (header) { if ((strncasecmp(header, "X-Barracuda-URL: ", 17) == 0) || (strncasecmp(header, "X-ASG-Debug-ID: ", 16) == 0) || (strncasecmp(header, "Return-Path: ", 13) == 0) || (strncasecmp(header, "Received: ", 10) == 0) || (strncasecmp(header, "Subject: ", 9) == 0) || (strncasecmp(header, "Date: ", 6) == 0) || (strncasecmp(header, "From: ", 6) == 0) || (strncasecmp(header, "X-x: ", 5) == 0) || (strncasecmp(header, "Microsoft Mail Internet Headers", 31) == 0)) { return 1; } else { if (strlen(header) > 2) { DEBUG_INFO(("Ignore bogus headers = %s\n", header)); } return 0; } } else return 0; } void header_has_field(char *header, char *field, int *flag) { DEBUG_ENT("header_has_field"); if (my_stristr(header, field) || (strncasecmp(header, field+1, strlen(field)-1) == 0)) { DEBUG_INFO(("header block has %s header\n", field+1)); *flag = 1; } DEBUG_RET(); } void header_get_subfield(char *field, const char *subfield, char *body_subfield, size_t size_subfield) { if (!field) return; DEBUG_ENT("header_get_subfield"); char search[60]; snprintf(search, sizeof(search), " %s=", subfield); field++; char *n = header_end_field(field); char *s = my_stristr(field, search); if (n && s && (s < n)) { char *e, *f, save; s += strlen(search); // skip over subfield= if (*s == '"') { s++; e = strchr(s, '"'); } else { e = strchr(s, ';'); f = strchr(s, '\n'); if (e && f && (f < e)) e = f; } if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better save = *e; *e = '\0'; snprintf(body_subfield, size_subfield, "%s", s); // copy the subfield to our buffer *e = save; DEBUG_INFO(("body %s %s from headers\n", subfield, body_subfield)); } DEBUG_RET(); } char* header_get_field(char *header, char *field) { char *t = my_stristr(header, field); if (!t && (strncasecmp(header, field+1, strlen(field)-1) == 0)) t = header; return t; } // return pointer to \n at the end of this header field, // or NULL if this field goes to the end of the string. char *header_end_field(char *field) { char *e = strchr(field+1, '\n'); while (e && ((e[1] == ' ') || (e[1] == '\t'))) { e = strchr(e+1, '\n'); } return e; } void header_strip_field(char *header, char *field) { char *t = header_get_field(header, field); if (t) { char *e = header_end_field(t); if (e) { if (t == header) e++; // if *t is not \n, we don't want to keep the \n at *e either. while (*e != '\0') { *t = *e; t++; e++; } *t = '\0'; } else { // this was the last header field, truncate the headers *t = '\0'; } } } int test_base64(char *body) { int b64 = 0; uint8_t *b = (uint8_t *)body; DEBUG_ENT("test_base64"); while (*b) { if ((*b < 32) && (*b != 9) && (*b != 10)) { DEBUG_INFO(("found base64 byte %d\n", (int)*b)); DEBUG_HEXDUMPC(body, strlen(body), 0x10); b64 = 1; break; } b++; } DEBUG_RET(); return b64; } void find_html_charset(char *html, char *charset, size_t charsetlen) { const int index = 1; const int nmatch = index+1; regmatch_t match[nmatch]; DEBUG_ENT("find_html_charset"); int rc = regexec(&meta_charset_pattern, html, nmatch, match, 0); if (rc == 0) { int s = match[index].rm_so; int e = match[index].rm_eo; if (s != -1) { char save = html[e]; html[e] = '\0'; snprintf(charset, charsetlen, "%s", html+s); // copy the html charset html[e] = save; DEBUG_INFO(("charset %s from html text\n", charset)); } else { DEBUG_INFO(("matching %d %d %d %d\n", match[0].rm_so, match[0].rm_eo, match[1].rm_so, match[1].rm_eo)); DEBUG_HEXDUMPC(html, strlen(html), 0x10); } } else { DEBUG_INFO(("regexec returns %d\n", rc)); } DEBUG_RET(); } void find_rfc822_headers(char** extra_mime_headers) { DEBUG_ENT("find_rfc822_headers"); char *headers = *extra_mime_headers; if (headers) { char *temp, *t; while ((temp = strstr(headers, "\n\n"))) { temp[1] = '\0'; t = header_get_field(headers, "\nContent-Type: "); if (t) { t++; DEBUG_INFO(("found content type header\n")); char *n = strchr(t, '\n'); char *s = strstr(t, ": "); char *e = strchr(t, ';'); if (!e || (e > n)) e = n; if (s && (s < e)) { s += 2; if (!strncasecmp(s, RFC822, e-s)) { headers = temp+2; // found rfc822 header DEBUG_INFO(("found 822 headers\n%s\n", headers)); break; } } } //DEBUG_INFO(("skipping to next block after\n%s\n", headers)); headers = temp+2; // skip to next chunk of headers } *extra_mime_headers = headers; } DEBUG_RET(); } void write_body_part(FILE* f_output, pst_string *body, char *mime, char *charset, char *boundary, pst_file* pst) { DEBUG_ENT("write_body_part"); if (body->is_utf8 && (strcasecmp("utf-8", charset))) { // try to convert to the specified charset since the target // is not utf-8, and the data came from a unicode (utf16) field // and is now in utf-8. size_t rc; DEBUG_INFO(("Convert %s utf-8 to %s\n", mime, charset)); pst_vbuf *newer = pst_vballoc(2); rc = pst_vb_utf8to8bit(newer, body->str, strlen(body->str), charset); if (rc == (size_t)-1) { // unable to convert, change the charset to utf8 free(newer->b); DEBUG_INFO(("Failed to convert %s utf-8 to %s\n", mime, charset)); charset = "utf-8"; } else { // null terminate the output string pst_vbgrow(newer, 1); newer->b[newer->dlen] = '\0'; free(body->str); body->str = newer->b; } free(newer); } removeCR(body->str); int base64 = test_base64(body->str); fprintf(f_output, "\n--%s\n", boundary); fprintf(f_output, "Content-Type: %s; charset=\"%s\"\n", mime, charset); if (base64) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); fprintf(f_output, "\n"); if (base64) { char *enc = pst_base64_encode(body->str, strlen(body->str)); if (enc) { write_email_body(f_output, enc); fprintf(f_output, "\n"); free(enc); } } else { write_email_body(f_output, body->str); } DEBUG_RET(); } void write_schedule_part_data(FILE* f_output, pst_item* item, const char* sender, const char* method) { fprintf(f_output, "BEGIN:VCALENDAR\n"); fprintf(f_output, "VERSION:2.0\n"); fprintf(f_output, "PRODID:LibPST v%s\n", VERSION); if (method) fprintf(f_output, "METHOD:%s\n", method); fprintf(f_output, "BEGIN:VEVENT\n"); if (sender) { if (item->email->outlook_sender_name.str) { fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender); } else { fprintf(f_output, "ORGANIZER;CN=\"\":MAILTO:%s\n", sender); } } write_appointment(f_output, item); fprintf(f_output, "END:VCALENDAR\n"); } void write_schedule_part(FILE* f_output, pst_item* item, const char* sender, const char* boundary) { const char* method = "REQUEST"; const char* charset = "utf-8"; char fname[30]; if (!item->appointment) return; // inline appointment request fprintf(f_output, "\n--%s\n", boundary); fprintf(f_output, "Content-Type: %s; method=\"%s\"; charset=\"%s\"\n\n", "text/calendar", method, charset); write_schedule_part_data(f_output, item, sender, method); fprintf(f_output, "\n"); // attachment appointment request snprintf(fname, sizeof(fname), "i%i.ics", rand()); fprintf(f_output, "\n--%s\n", boundary); fprintf(f_output, "Content-Type: %s; charset=\"%s\"; name=\"%s\"\n", "text/calendar", "utf-8", fname); fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", fname); write_schedule_part_data(f_output, item, sender, method); fprintf(f_output, "\n"); } void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf, char** extra_mime_headers) { char boundary[60]; char altboundary[66]; char *altboundaryp = NULL; char body_charset[30]; char buffer_charset[30]; char body_report[60]; char sender[60]; int sender_known = 0; char *temp = NULL; time_t em_time; char *c_time; char *headers = NULL; int has_from, has_subject, has_to, has_cc, has_date, has_msgid; has_from = has_subject = has_to = has_cc = has_date = has_msgid = 0; DEBUG_ENT("write_normal_email"); pst_convert_utf8_null(item, &item->email->header); headers = valid_headers(item->email->header.str) ? item->email->header.str : valid_headers(*extra_mime_headers) ? *extra_mime_headers : NULL; // setup default body character set and report type strncpy(body_charset, pst_default_charset(item, sizeof(buffer_charset), buffer_charset), sizeof(body_charset)); body_charset[sizeof(body_charset)-1] = '\0'; strncpy(body_report, "delivery-status", sizeof(body_report)); body_report[sizeof(body_report)-1] = '\0'; // setup default sender pst_convert_utf8(item, &item->email->sender_address); if (item->email->sender_address.str && strchr(item->email->sender_address.str, '@')) { temp = item->email->sender_address.str; sender_known = 1; } else { temp = "MAILER-DAEMON"; } strncpy(sender, temp, sizeof(sender)); sender[sizeof(sender)-1] = '\0'; // convert the sent date if it exists, or set it to a fixed date if (item->email->sent_date) { em_time = pst_fileTimeToUnixTime(item->email->sent_date); c_time = ctime(&em_time); if (c_time) c_time[strlen(c_time)-1] = '\0'; //remove end \n else c_time = "Fri Dec 28 12:06:21 2001"; } else c_time = "Fri Dec 28 12:06:21 2001"; // create our MIME boundaries here. snprintf(boundary, sizeof(boundary), "--boundary-LibPST-iamunique-%i_-_-", rand()); snprintf(altboundary, sizeof(altboundary), "alt-%s", boundary); // we will always look at the headers to discover some stuff if (headers ) { char *t; removeCR(headers); temp = strstr(headers, "\n\n"); if (temp) { // cut off our real rfc822 headers here temp[1] = '\0'; // pointer to all the embedded MIME headers. // we use these to find the actual rfc822 headers for embedded message/rfc822 mime parts // but only for the outermost message if (!*extra_mime_headers) *extra_mime_headers = temp+2; DEBUG_INFO(("Found extra mime headers\n%s\n", temp+2)); } // Check if the headers have all the necessary fields header_has_field(headers, "\nFrom: ", &has_from); header_has_field(headers, "\nTo: ", &has_to); header_has_field(headers, "\nSubject: ", &has_subject); header_has_field(headers, "\nDate: ", &has_date); header_has_field(headers, "\nCC: ", &has_cc); header_has_field(headers, "\nMessage-Id: ", &has_msgid); // look for charset and report-type in Content-Type header t = header_get_field(headers, "\nContent-Type: "); header_get_subfield(t, "charset", body_charset, sizeof(body_charset)); header_get_subfield(t, "report-type", body_report, sizeof(body_report)); // derive a proper sender email address if (!sender_known) { t = header_get_field(headers, "\nFrom: "); if (t) { // assume address is on the first line, rather than on a continuation line t++; char *n = strchr(t, '\n'); char *s = strchr(t, '<'); char *e = strchr(t, '>'); if (s && e && n && (s < e) && (e < n)) { char save = *e; *e = '\0'; snprintf(sender, sizeof(sender), "%s", s+1); *e = save; } } } // Strip out the mime headers and some others that we don't want to emit header_strip_field(headers, "\nMicrosoft Mail Internet Headers"); header_strip_field(headers, "\nMIME-Version: "); header_strip_field(headers, "\nContent-Type: "); header_strip_field(headers, "\nContent-Transfer-Encoding: "); header_strip_field(headers, "\nContent-class: "); header_strip_field(headers, "\nX-MimeOLE: "); header_strip_field(headers, "\nBcc:"); header_strip_field(headers, "\nX-From_: "); } DEBUG_INFO(("About to print Header\n")); if (item && item->subject.str) { pst_convert_utf8(item, &item->subject); DEBUG_INFO(("item->subject = %s\n", item->subject.str)); } if (mode != MODE_SEPARATE) { // most modes need this separator line. // procmail produces this separator without the quotes around the // sender email address, but apparently some Mac email client needs // those quotes, and they don't seem to cause problems for anyone else. fprintf(f_output, "From \"%s\" %s\n", sender, c_time); } // print the supplied email headers if (headers) { int len = strlen(headers); if (len > 0) { fprintf(f_output, "%s", headers); // make sure the headers end with a \n if (headers[len-1] != '\n') fprintf(f_output, "\n"); //char *h = headers; //while (*h) { // char *e = strchr(h, '\n'); // int d = 1; // normally e points to trailing \n // if (!e) { // e = h + strlen(h); // e points to trailing null // d = 0; // } // // we could do rfc2047 encoding here if needed // fprintf(f_output, "%.*s\n", (int)(e-h), h); // h = e + d; //} } } // create required header fields that are not already written if (!has_from) { if (item->email->outlook_sender_name.str){ pst_rfc2047(item, &item->email->outlook_sender_name, 1); fprintf(f_output, "From: %s <%s>\n", item->email->outlook_sender_name.str, sender); } else { fprintf(f_output, "From: <%s>\n", sender); } } if (!has_subject) { if (item->subject.str) { pst_rfc2047(item, &item->subject, 0); fprintf(f_output, "Subject: %s\n", item->subject.str); } else { fprintf(f_output, "Subject: \n"); } } if (!has_to && item->email->sentto_address.str) { pst_rfc2047(item, &item->email->sentto_address, 0); fprintf(f_output, "To: %s\n", item->email->sentto_address.str); } if (!has_cc && item->email->cc_address.str) { pst_rfc2047(item, &item->email->cc_address, 0); fprintf(f_output, "Cc: %s\n", item->email->cc_address.str); } if (!has_date && item->email->sent_date) { char c_time[C_TIME_SIZE]; struct tm stm; gmtime_r(&em_time, &stm); strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", &stm); fprintf(f_output, "Date: %s\n", c_time); } if (!has_msgid && item->email->messageid.str) { pst_convert_utf8(item, &item->email->messageid); fprintf(f_output, "Message-Id: %s\n", item->email->messageid.str); } // add forensic headers to capture some .pst stuff that is not really // needed or used by mail clients pst_convert_utf8_null(item, &item->email->sender_address); if (item->email->sender_address.str && !strchr(item->email->sender_address.str, '@') && strcmp(item->email->sender_address.str, ".") && (strlen(item->email->sender_address.str) > 0)) { fprintf(f_output, "X-libpst-forensic-sender: %s\n", item->email->sender_address.str); } if (item->email->bcc_address.str) { pst_convert_utf8(item, &item->email->bcc_address); fprintf(f_output, "X-libpst-forensic-bcc: %s\n", item->email->bcc_address.str); } // add our own mime headers fprintf(f_output, "MIME-Version: 1.0\n"); if (item->type == PST_TYPE_REPORT) { // multipart/report for DSN/MDN reports fprintf(f_output, "Content-Type: multipart/report; report-type=%s;\n\tboundary=\"%s\"\n", body_report, boundary); } else { fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); } fprintf(f_output, "\n"); // end of headers, start of body // now dump the body parts if ((item->type == PST_TYPE_REPORT) && (item->email->report_text.str)) { write_body_part(f_output, &item->email->report_text, "text/plain", body_charset, boundary, pst); fprintf(f_output, "\n"); } if (item->body.str && item->email->htmlbody.str) { // start the nested alternative part fprintf(f_output, "\n--%s\n", boundary); fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", altboundary); altboundaryp = altboundary; } else { altboundaryp = boundary; } if (item->body.str) { write_body_part(f_output, &item->body, "text/plain", body_charset, altboundaryp, pst); } if (item->email->htmlbody.str) { find_html_charset(item->email->htmlbody.str, body_charset, sizeof(body_charset)); write_body_part(f_output, &item->email->htmlbody, "text/html", body_charset, altboundaryp, pst); } if (item->body.str && item->email->htmlbody.str) { // end the nested alternative part fprintf(f_output, "\n--%s--\n", altboundary); } if (item->email->rtf_compressed.data && save_rtf) { pst_item_attach* attach = (pst_item_attach*)pst_malloc(sizeof(pst_item_attach)); DEBUG_INFO(("Adding RTF body as attachment\n")); memset(attach, 0, sizeof(pst_item_attach)); attach->next = item->attach; item->attach = attach; attach->data.data = pst_lzfu_decompress(item->email->rtf_compressed.data, item->email->rtf_compressed.size, &attach->data.size); attach->filename2.str = strdup(RTF_ATTACH_NAME); attach->filename2.is_utf8 = 1; attach->mimetype.str = strdup(RTF_ATTACH_TYPE); attach->mimetype.is_utf8 = 1; } if (item->email->encrypted_body.data) { pst_item_attach* attach = (pst_item_attach*)pst_malloc(sizeof(pst_item_attach)); DEBUG_INFO(("Adding encrypted text body as attachment\n")); attach = (pst_item_attach*) pst_malloc(sizeof(pst_item_attach)); memset(attach, 0, sizeof(pst_item_attach)); attach->next = item->attach; item->attach = attach; attach->data.data = item->email->encrypted_body.data; attach->data.size = item->email->encrypted_body.size; item->email->encrypted_body.data = NULL; } if (item->email->encrypted_htmlbody.data) { pst_item_attach* attach = (pst_item_attach*)pst_malloc(sizeof(pst_item_attach)); DEBUG_INFO(("Adding encrypted HTML body as attachment\n")); attach = (pst_item_attach*) pst_malloc(sizeof(pst_item_attach)); memset(attach, 0, sizeof(pst_item_attach)); attach->next = item->attach; item->attach = attach; attach->data.data = item->email->encrypted_htmlbody.data; attach->data.size = item->email->encrypted_htmlbody.size; item->email->encrypted_htmlbody.data = NULL; } if (item->type == PST_TYPE_SCHEDULE) { write_schedule_part(f_output, item, sender, boundary); } // other attachments { pst_item_attach* attach; int attach_num = 0; for (attach = item->attach; attach; attach = attach->next) { pst_convert_utf8_null(item, &attach->filename1); pst_convert_utf8_null(item, &attach->filename2); pst_convert_utf8_null(item, &attach->mimetype); DEBUG_INFO(("Attempting Attachment encoding\n")); if (attach->method == PST_ATTACH_EMBEDDED) { DEBUG_INFO(("have an embedded rfc822 message attachment\n")); if (attach->mimetype.str) { DEBUG_INFO(("which already has a mime-type of %s\n", attach->mimetype.str)); free(attach->mimetype.str); } attach->mimetype.str = strdup(RFC822); attach->mimetype.is_utf8 = 1; find_rfc822_headers(extra_mime_headers); write_embedded_message(f_output, attach, boundary, pst, extra_mime_headers); } else if (attach->data.data || attach->i_id) { if (mode == MODE_SEPARATE && !mode_MH) write_separate_attachment(f_name, attach, ++attach_num, pst); else write_inline_attachment(f_output, attach, boundary, pst); } } } fprintf(f_output, "\n--%s--\n\n", boundary); DEBUG_RET(); } void write_vcard(FILE* f_output, pst_item* item, pst_item_contact* contact, char comment[]) { char* result = NULL; size_t resultlen = 0; char time_buffer[30]; // We can only call rfc escape once per printf, since the second call // may free the buffer returned by the first call. // I had tried to place those into a single printf - Carl. DEBUG_ENT("write_vcard"); // make everything utf8 pst_convert_utf8_null(item, &contact->fullname); pst_convert_utf8_null(item, &contact->surname); pst_convert_utf8_null(item, &contact->first_name); pst_convert_utf8_null(item, &contact->middle_name); pst_convert_utf8_null(item, &contact->display_name_prefix); pst_convert_utf8_null(item, &contact->suffix); pst_convert_utf8_null(item, &contact->nickname); pst_convert_utf8_null(item, &contact->address1); pst_convert_utf8_null(item, &contact->address2); pst_convert_utf8_null(item, &contact->address3); pst_convert_utf8_null(item, &contact->home_po_box); pst_convert_utf8_null(item, &contact->home_street); pst_convert_utf8_null(item, &contact->home_city); pst_convert_utf8_null(item, &contact->home_state); pst_convert_utf8_null(item, &contact->home_postal_code); pst_convert_utf8_null(item, &contact->home_country); pst_convert_utf8_null(item, &contact->home_address); pst_convert_utf8_null(item, &contact->business_po_box); pst_convert_utf8_null(item, &contact->business_street); pst_convert_utf8_null(item, &contact->business_city); pst_convert_utf8_null(item, &contact->business_state); pst_convert_utf8_null(item, &contact->business_postal_code); pst_convert_utf8_null(item, &contact->business_country); pst_convert_utf8_null(item, &contact->business_address); pst_convert_utf8_null(item, &contact->other_po_box); pst_convert_utf8_null(item, &contact->other_street); pst_convert_utf8_null(item, &contact->other_city); pst_convert_utf8_null(item, &contact->other_state); pst_convert_utf8_null(item, &contact->other_postal_code); pst_convert_utf8_null(item, &contact->other_country); pst_convert_utf8_null(item, &contact->other_address); pst_convert_utf8_null(item, &contact->business_fax); pst_convert_utf8_null(item, &contact->business_phone); pst_convert_utf8_null(item, &contact->business_phone2); pst_convert_utf8_null(item, &contact->car_phone); pst_convert_utf8_null(item, &contact->home_fax); pst_convert_utf8_null(item, &contact->home_phone); pst_convert_utf8_null(item, &contact->home_phone2); pst_convert_utf8_null(item, &contact->isdn_phone); pst_convert_utf8_null(item, &contact->mobile_phone); pst_convert_utf8_null(item, &contact->other_phone); pst_convert_utf8_null(item, &contact->pager_phone); pst_convert_utf8_null(item, &contact->primary_fax); pst_convert_utf8_null(item, &contact->primary_phone); pst_convert_utf8_null(item, &contact->radio_phone); pst_convert_utf8_null(item, &contact->telex); pst_convert_utf8_null(item, &contact->job_title); pst_convert_utf8_null(item, &contact->profession); pst_convert_utf8_null(item, &contact->assistant_name); pst_convert_utf8_null(item, &contact->assistant_phone); pst_convert_utf8_null(item, &contact->company_name); pst_convert_utf8_null(item, &item->body); // the specification I am following is (hopefully) RFC2426 vCard Mime Directory Profile fprintf(f_output, "BEGIN:VCARD\n"); fprintf(f_output, "FN:%s\n", pst_rfc2426_escape(contact->fullname.str, &result, &resultlen)); //fprintf(f_output, "N:%s;%s;%s;%s;%s\n", fprintf(f_output, "N:%s;", (!contact->surname.str) ? "" : pst_rfc2426_escape(contact->surname.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->first_name.str) ? "" : pst_rfc2426_escape(contact->first_name.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->middle_name.str) ? "" : pst_rfc2426_escape(contact->middle_name.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->display_name_prefix.str) ? "" : pst_rfc2426_escape(contact->display_name_prefix.str, &result, &resultlen)); fprintf(f_output, "%s\n", (!contact->suffix.str) ? "" : pst_rfc2426_escape(contact->suffix.str, &result, &resultlen)); if (contact->nickname.str) fprintf(f_output, "NICKNAME:%s\n", pst_rfc2426_escape(contact->nickname.str, &result, &resultlen)); if (contact->address1.str) fprintf(f_output, "EMAIL:%s\n", pst_rfc2426_escape(contact->address1.str, &result, &resultlen)); if (contact->address2.str) fprintf(f_output, "EMAIL:%s\n", pst_rfc2426_escape(contact->address2.str, &result, &resultlen)); if (contact->address3.str) fprintf(f_output, "EMAIL:%s\n", pst_rfc2426_escape(contact->address3.str, &result, &resultlen)); if (contact->birthday) fprintf(f_output, "BDAY:%s\n", pst_rfc2425_datetime_format(contact->birthday, sizeof(time_buffer), time_buffer)); if (contact->home_address.str) { //fprintf(f_output, "ADR;TYPE=home:%s;%s;%s;%s;%s;%s;%s\n", fprintf(f_output, "ADR;TYPE=home:%s;", (!contact->home_po_box.str) ? "" : pst_rfc2426_escape(contact->home_po_box.str, &result, &resultlen)); fprintf(f_output, "%s;", ""); // extended Address fprintf(f_output, "%s;", (!contact->home_street.str) ? "" : pst_rfc2426_escape(contact->home_street.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->home_city.str) ? "" : pst_rfc2426_escape(contact->home_city.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->home_state.str) ? "" : pst_rfc2426_escape(contact->home_state.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->home_postal_code.str) ? "" : pst_rfc2426_escape(contact->home_postal_code.str, &result, &resultlen)); fprintf(f_output, "%s\n", (!contact->home_country.str) ? "" : pst_rfc2426_escape(contact->home_country.str, &result, &resultlen)); fprintf(f_output, "LABEL;TYPE=home:%s\n", pst_rfc2426_escape(contact->home_address.str, &result, &resultlen)); } if (contact->business_address.str) { //fprintf(f_output, "ADR;TYPE=work:%s;%s;%s;%s;%s;%s;%s\n", fprintf(f_output, "ADR;TYPE=work:%s;", (!contact->business_po_box.str) ? "" : pst_rfc2426_escape(contact->business_po_box.str, &result, &resultlen)); fprintf(f_output, "%s;", ""); // extended Address fprintf(f_output, "%s;", (!contact->business_street.str) ? "" : pst_rfc2426_escape(contact->business_street.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->business_city.str) ? "" : pst_rfc2426_escape(contact->business_city.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->business_state.str) ? "" : pst_rfc2426_escape(contact->business_state.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->business_postal_code.str) ? "" : pst_rfc2426_escape(contact->business_postal_code.str, &result, &resultlen)); fprintf(f_output, "%s\n", (!contact->business_country.str) ? "" : pst_rfc2426_escape(contact->business_country.str, &result, &resultlen)); fprintf(f_output, "LABEL;TYPE=work:%s\n", pst_rfc2426_escape(contact->business_address.str, &result, &resultlen)); } if (contact->other_address.str) { //fprintf(f_output, "ADR;TYPE=postal:%s;%s;%s;%s;%s;%s;%s\n", fprintf(f_output, "ADR;TYPE=postal:%s;",(!contact->other_po_box.str) ? "" : pst_rfc2426_escape(contact->other_po_box.str, &result, &resultlen)); fprintf(f_output, "%s;", ""); // extended Address fprintf(f_output, "%s;", (!contact->other_street.str) ? "" : pst_rfc2426_escape(contact->other_street.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->other_city.str) ? "" : pst_rfc2426_escape(contact->other_city.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->other_state.str) ? "" : pst_rfc2426_escape(contact->other_state.str, &result, &resultlen)); fprintf(f_output, "%s;", (!contact->other_postal_code.str) ? "" : pst_rfc2426_escape(contact->other_postal_code.str, &result, &resultlen)); fprintf(f_output, "%s\n", (!contact->other_country.str) ? "" : pst_rfc2426_escape(contact->other_country.str, &result, &resultlen)); fprintf(f_output, "LABEL;TYPE=postal:%s\n", pst_rfc2426_escape(contact->other_address.str, &result, &resultlen)); } if (contact->business_fax.str) fprintf(f_output, "TEL;TYPE=work,fax:%s\n", pst_rfc2426_escape(contact->business_fax.str, &result, &resultlen)); if (contact->business_phone.str) fprintf(f_output, "TEL;TYPE=work,voice:%s\n", pst_rfc2426_escape(contact->business_phone.str, &result, &resultlen)); if (contact->business_phone2.str) fprintf(f_output, "TEL;TYPE=work,voice:%s\n", pst_rfc2426_escape(contact->business_phone2.str, &result, &resultlen)); if (contact->car_phone.str) fprintf(f_output, "TEL;TYPE=car,voice:%s\n", pst_rfc2426_escape(contact->car_phone.str, &result, &resultlen)); if (contact->home_fax.str) fprintf(f_output, "TEL;TYPE=home,fax:%s\n", pst_rfc2426_escape(contact->home_fax.str, &result, &resultlen)); if (contact->home_phone.str) fprintf(f_output, "TEL;TYPE=home,voice:%s\n", pst_rfc2426_escape(contact->home_phone.str, &result, &resultlen)); if (contact->home_phone2.str) fprintf(f_output, "TEL;TYPE=home,voice:%s\n", pst_rfc2426_escape(contact->home_phone2.str, &result, &resultlen)); if (contact->isdn_phone.str) fprintf(f_output, "TEL;TYPE=isdn:%s\n", pst_rfc2426_escape(contact->isdn_phone.str, &result, &resultlen)); if (contact->mobile_phone.str) fprintf(f_output, "TEL;TYPE=cell,voice:%s\n", pst_rfc2426_escape(contact->mobile_phone.str, &result, &resultlen)); if (contact->other_phone.str) fprintf(f_output, "TEL;TYPE=msg:%s\n", pst_rfc2426_escape(contact->other_phone.str, &result, &resultlen)); if (contact->pager_phone.str) fprintf(f_output, "TEL;TYPE=pager:%s\n", pst_rfc2426_escape(contact->pager_phone.str, &result, &resultlen)); if (contact->primary_fax.str) fprintf(f_output, "TEL;TYPE=fax,pref:%s\n", pst_rfc2426_escape(contact->primary_fax.str, &result, &resultlen)); if (contact->primary_phone.str) fprintf(f_output, "TEL;TYPE=phone,pref:%s\n", pst_rfc2426_escape(contact->primary_phone.str, &result, &resultlen)); if (contact->radio_phone.str) fprintf(f_output, "TEL;TYPE=pcs:%s\n", pst_rfc2426_escape(contact->radio_phone.str, &result, &resultlen)); if (contact->telex.str) fprintf(f_output, "TEL;TYPE=bbs:%s\n", pst_rfc2426_escape(contact->telex.str, &result, &resultlen)); if (contact->job_title.str) fprintf(f_output, "TITLE:%s\n", pst_rfc2426_escape(contact->job_title.str, &result, &resultlen)); if (contact->profession.str) fprintf(f_output, "ROLE:%s\n", pst_rfc2426_escape(contact->profession.str, &result, &resultlen)); if (contact->assistant_name.str || contact->assistant_phone.str) { fprintf(f_output, "AGENT:BEGIN:VCARD\n"); if (contact->assistant_name.str) fprintf(f_output, "FN:%s\n", pst_rfc2426_escape(contact->assistant_name.str, &result, &resultlen)); if (contact->assistant_phone.str) fprintf(f_output, "TEL:%s\n", pst_rfc2426_escape(contact->assistant_phone.str, &result, &resultlen)); } if (contact->company_name.str) fprintf(f_output, "ORG:%s\n", pst_rfc2426_escape(contact->company_name.str, &result, &resultlen)); if (comment) fprintf(f_output, "NOTE:%s\n", pst_rfc2426_escape(comment, &result, &resultlen)); if (item->body.str) fprintf(f_output, "NOTE:%s\n", pst_rfc2426_escape(item->body.str, &result, &resultlen)); write_extra_categories(f_output, item); fprintf(f_output, "VERSION: 3.0\n"); fprintf(f_output, "END:VCARD\n\n"); if (result) free(result); DEBUG_RET(); } /** * write extra vcard or vcalendar categories from the extra keywords fields * * @param f_output open file pointer * @param item pst item containing the keywords * @return true if we write a categories line */ int write_extra_categories(FILE* f_output, pst_item* item) { char* result = NULL; size_t resultlen = 0; pst_item_extra_field *ef = item->extra_fields; const char *fmt = "CATEGORIES:%s"; int category_started = 0; while (ef) { if (strcmp(ef->field_name, "Keywords") == 0) { fprintf(f_output, fmt, pst_rfc2426_escape(ef->value, &result, &resultlen)); fmt = ", %s"; category_started = 1; } ef = ef->next; } if (category_started) fprintf(f_output, "\n"); if (result) free(result); return category_started; } void write_journal(FILE* f_output, pst_item* item) { char* result = NULL; size_t resultlen = 0; char time_buffer[30]; pst_item_journal* journal = item->journal; // make everything utf8 pst_convert_utf8_null(item, &item->subject); pst_convert_utf8_null(item, &item->body); fprintf(f_output, "BEGIN:VJOURNAL\n"); fprintf(f_output, "DTSTAMP:%s\n", pst_rfc2445_datetime_format_now(sizeof(time_buffer), time_buffer)); if (item->create_date) fprintf(f_output, "CREATED:%s\n", pst_rfc2445_datetime_format(item->create_date, sizeof(time_buffer), time_buffer)); if (item->modify_date) fprintf(f_output, "LAST-MOD:%s\n", pst_rfc2445_datetime_format(item->modify_date, sizeof(time_buffer), time_buffer)); if (item->subject.str) fprintf(f_output, "SUMMARY:%s\n", pst_rfc2426_escape(item->subject.str, &result, &resultlen)); if (item->body.str) fprintf(f_output, "DESCRIPTION:%s\n", pst_rfc2426_escape(item->body.str, &result, &resultlen)); if (journal && journal->start) fprintf(f_output, "DTSTART;VALUE=DATE-TIME:%s\n", pst_rfc2445_datetime_format(journal->start, sizeof(time_buffer), time_buffer)); fprintf(f_output, "END:VJOURNAL\n"); if (result) free(result); } void write_appointment(FILE* f_output, pst_item* item) { char* result = NULL; size_t resultlen = 0; char time_buffer[30]; pst_item_appointment* appointment = item->appointment; // make everything utf8 pst_convert_utf8_null(item, &item->subject); pst_convert_utf8_null(item, &item->body); pst_convert_utf8_null(item, &appointment->location); fprintf(f_output, "DTSTAMP:%s\n", pst_rfc2445_datetime_format_now(sizeof(time_buffer), time_buffer)); if (item->create_date) fprintf(f_output, "CREATED:%s\n", pst_rfc2445_datetime_format(item->create_date, sizeof(time_buffer), time_buffer)); if (item->modify_date) fprintf(f_output, "LAST-MOD:%s\n", pst_rfc2445_datetime_format(item->modify_date, sizeof(time_buffer), time_buffer)); if (item->subject.str) fprintf(f_output, "SUMMARY:%s\n", pst_rfc2426_escape(item->subject.str, &result, &resultlen)); if (item->body.str) fprintf(f_output, "DESCRIPTION:%s\n", pst_rfc2426_escape(item->body.str, &result, &resultlen)); if (appointment && appointment->start) fprintf(f_output, "DTSTART;VALUE=DATE-TIME:%s\n", pst_rfc2445_datetime_format(appointment->start, sizeof(time_buffer), time_buffer)); if (appointment && appointment->end) fprintf(f_output, "DTEND;VALUE=DATE-TIME:%s\n", pst_rfc2445_datetime_format(appointment->end, sizeof(time_buffer), time_buffer)); if (appointment && appointment->location.str) fprintf(f_output, "LOCATION:%s\n", pst_rfc2426_escape(appointment->location.str, &result, &resultlen)); if (appointment) { switch (appointment->showas) { case PST_FREEBUSY_TENTATIVE: fprintf(f_output, "STATUS:TENTATIVE\n"); break; case PST_FREEBUSY_FREE: // mark as transparent and as confirmed fprintf(f_output, "TRANSP:TRANSPARENT\n"); case PST_FREEBUSY_BUSY: case PST_FREEBUSY_OUT_OF_OFFICE: fprintf(f_output, "STATUS:CONFIRMED\n"); break; } if (appointment->is_recurring) { const char* rules[] = {"DAILY", "WEEKLY", "MONTHLY", "YEARLY"}; const char* days[] = {"SU", "MO", "TU", "WE", "TH", "FR", "SA"}; pst_recurrence *rdata = pst_convert_recurrence(appointment); fprintf(f_output, "RRULE:FREQ=%s", rules[rdata->type]); if (rdata->count) fprintf(f_output, ";COUNT=%u", rdata->count); if ((rdata->interval != 1) && (rdata->interval)) fprintf(f_output, ";INTERVAL=%u", rdata->interval); if (rdata->dayofmonth) fprintf(f_output, ";BYMONTHDAY=%d", rdata->dayofmonth); if (rdata->monthofyear) fprintf(f_output, ";BYMONTH=%d", rdata->monthofyear); if (rdata->position) fprintf(f_output, ";BYSETPOS=%d", rdata->position); if (rdata->bydaymask) { char byday[40]; int empty = 1; int i=0; memset(byday, 0, sizeof(byday)); for (i=0; i<6; i++) { int bit = 1 << i; if (bit & rdata->bydaymask) { char temp[40]; snprintf(temp, sizeof(temp), "%s%s%s", byday, (empty) ? ";BYDAY=" : ";", days[i]); strcpy(byday, temp); empty = 0; } } fprintf(f_output, "%s", byday); } fprintf(f_output, "\n"); pst_free_recurrence(rdata); } switch (appointment->label) { case PST_APP_LABEL_NONE: if (!write_extra_categories(f_output, item)) fprintf(f_output, "CATEGORIES:NONE\n"); break; case PST_APP_LABEL_IMPORTANT: fprintf(f_output, "CATEGORIES:IMPORTANT\n"); break; case PST_APP_LABEL_BUSINESS: fprintf(f_output, "CATEGORIES:BUSINESS\n"); break; case PST_APP_LABEL_PERSONAL: fprintf(f_output, "CATEGORIES:PERSONAL\n"); break; case PST_APP_LABEL_VACATION: fprintf(f_output, "CATEGORIES:VACATION\n"); break; case PST_APP_LABEL_MUST_ATTEND: fprintf(f_output, "CATEGORIES:MUST-ATTEND\n"); break; case PST_APP_LABEL_TRAVEL_REQ: fprintf(f_output, "CATEGORIES:TRAVEL-REQUIRED\n"); break; case PST_APP_LABEL_NEEDS_PREP: fprintf(f_output, "CATEGORIES:NEEDS-PREPARATION\n"); break; case PST_APP_LABEL_BIRTHDAY: fprintf(f_output, "CATEGORIES:BIRTHDAY\n"); break; case PST_APP_LABEL_ANNIVERSARY: fprintf(f_output, "CATEGORIES:ANNIVERSARY\n"); break; case PST_APP_LABEL_PHONE_CALL: fprintf(f_output, "CATEGORIES:PHONE-CALL\n"); break; } } fprintf(f_output, "END:VEVENT\n"); if (result) free(result); } void create_enter_dir(struct file_ll* f, pst_item *item) { pst_convert_utf8(item, &item->file_as); f->type = item->type; f->stored_count = (item->folder) ? item->folder->item_count : 0; DEBUG_ENT("create_enter_dir"); if (mode == MODE_KMAIL) f->name = mk_kmail_dir(item->file_as.str); else if (mode == MODE_RECURSE) { f->name = mk_recurse_dir(item->file_as.str, f->type); if (mode_thunder) { FILE *type_file = fopen(".type", "w"); fprintf(type_file, "%d\n", item->type); fclose(type_file); } } else if (mode == MODE_SEPARATE) { // do similar stuff to recurse here. mk_separate_dir(item->file_as.str); f->name = (char*) pst_malloc(file_name_len); memset(f->name, 0, file_name_len); } else { f->name = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1); sprintf(f->name, OUTPUT_TEMPLATE, item->file_as.str); } f->dname = (char*) pst_malloc(strlen(item->file_as.str)+1); strcpy(f->dname, item->file_as.str); if (overwrite != 1) { int x = 0; char *temp = (char*) pst_malloc (strlen(f->name)+10); //enough room for 10 digits sprintf(temp, "%s", f->name); check_filename(temp); while ((f->output = fopen(temp, "r"))) { DEBUG_INFO(("need to increase filename because one already exists with that name\n")); DEBUG_INFO(("- increasing it to %s%d\n", f->name, x)); x++; sprintf(temp, "%s%08d", f->name, x); DEBUG_INFO(("- trying \"%s\"\n", f->name)); if (x == 99999999) { DIE(("create_enter_dir: Why can I not create a folder %s? I have tried %i extensions...\n", f->name, x)); } fclose(f->output); } if (x > 0) { //then the f->name should change free (f->name); f->name = temp; } else { free(temp); } } DEBUG_INFO(("f->name = %s\nitem->folder_name = %s\n", f->name, item->file_as.str)); if (mode != MODE_SEPARATE) { check_filename(f->name); if (!(f->output = fopen(f->name, "w"))) { DIE(("create_enter_dir: Could not open file \"%s\" for write\n", f->name)); } } DEBUG_RET(); } void close_enter_dir(struct file_ll *f) { DEBUG_INFO(("processed item count for folder %s is %i, skipped %i, total %i \n", f->dname, f->item_count, f->skip_count, f->stored_count)); if (output_mode != OUTPUT_QUIET) { pst_debug_lock(); printf("\t\"%s\" - %i items done, %i items skipped.\n", f->dname, f->item_count, f->skip_count); fflush(stdout); pst_debug_unlock(); } if (f->output) { struct stat st; fclose(f->output); stat(f->name, &st); if (!st.st_size) { DEBUG_WARN(("removing empty output file %s\n", f->name)); remove(f->name); } } free(f->name); free(f->dname); if (mode == MODE_KMAIL) close_kmail_dir(); else if (mode == MODE_RECURSE) { if (mode_thunder) { FILE *type_file = fopen(".size", "w"); fprintf(type_file, "%i %i\n", f->item_count, f->stored_count); fclose(type_file); } close_recurse_dir(); } else if (mode == MODE_SEPARATE) close_separate_dir(); } diff --git a/xml/libpst.in b/xml/libpst.in index 71977b5..a922c04 100644 --- a/xml/libpst.in +++ b/xml/libpst.in @@ -1,2003 +1,2005 @@ @PACKAGE@ Utilities - Version @VERSION@ Packages The various source and binary packages are available at http://www.five-ten-sg.com/@PACKAGE@/packages/. The most recent documentation is available at http://www.five-ten-sg.com/@PACKAGE@/. The most recent developer documentation for the shared library is available at http://www.five-ten-sg.com/@PACKAGE@/devel/. A Mercurial source code repository for this project is available at http://hg.five-ten-sg.com/@PACKAGE@/. This version can now convert both 32 bit Outlook files (pre 2003), and the 64 bit Outlook 2003 pst files. Utilities are supplied to convert email messages to both mbox and MH mailbox formats, and to DII load file format for use with many of the CT Summation products. Contacts can be converted to a simple list, to vcard format, or to ldif format for import to an LDAP server. The libpff project has some excellent documentation of the pst file format. - 2009-09-14 + 2011-05-27 readpst 1 readpst @VERSION@ readpst convert PST (MS Outlook Personal Folders) files to mbox and other formats Synopsis readpst pstfile Description readpst is a program that can read an Outlook PST (Personal Folders) file and convert it into an mbox file, a format suitable for KMail, a recursive mbox structure, or separate emails. Options -D Include deleted items in the output. -M Output messages in MH (rfc822) format as separate files. This will create folders as named in the PST file, and will put each email together with any attachments into its own file. These files will be numbered from 1 to n with no leading zeros. This format has no from quoting. -S Output messages into separate files. This will create folders as named in the PST file, and will put each email in its own file. These files will be numbered from 1 to n with no leading zeros. Attachments will also be saved in the same folder as the email message. The attachments for message $m are saved as $m-$name where $name is (the original name of the attachment, or 'attach$n' if the attachment had no name), where $n is another sequential index with no leading zeros. This format has no from quoting. -V Show program version and exit. -b Do not save the attachments for the RTF format of the email body. -c format Set the Contact output mode. Use -cv for vcard format or -cl for an email list. -d debug-file Specify name of debug log file. The log file is now an ascii file, instead of the binary file used in previous versions. -e Same as the M option, but each output file will include an extension from (.eml, .ics, .vcf). This format has no from quoting. -h Show summary of options and exit. -j jobs Specifies the maximum number of parallel jobs. Specify 0 to suppress - running parallel jobs. + running parallel jobs. Folders may be processed in parallel. Output + formats that place each mail message in a separate file (-M, -S, -e) + may process the contents of individual folders in parallel. -k Changes the output format to KMail. This format uses mboxrd from quoting. -o output-directory Specifies the output directory. The directory must already exist, and is entered after the PST file is opened, but before any processing of files commences. -q Changes to silent mode. No feedback is printed to the screen, except for error messages. -r Changes the output format to Recursive. This will create folders as named in the PST file, and will put all emails in a file called "mbox" inside each folder. These files are then compatible with all mbox-compatible email clients. This format uses mboxrd from quoting. -t output-type-codes Specifies the item types that are processed. The argument is a sequence of single letters from (e,a,j,c) for (email, appointment, journal, contact) types. The default is to process all item types. -u Sets Thunderbird mode, a submode of recursive mode. This causes two extra .type and .size meta files to be created. This format uses mboxrd from quoting. -w Overwrite any previous output files. Beware: When used with the -S switch, this will remove all files from the target folder before writing. This is to keep the count of emails and attachments correct. From Quoting Output formats that place each mail message in a separate file (-M, -S, -e) don't do any from quoting. Output formats that place multiple email messages in a single file (-k, -r, -u) now use mboxrd from quoting rules. If none of those switches are specified, the default output format uses mboxrd from quoting rules, since it produces multiple email messages in a single file. Earlier versions used mboxo from quoting rules for all output formats. Author This manual page was originally written by Dave Smith <dave.s@earthcorp.com>, and updated by Joe Nahmias <joe@nahmias.net> for the Debian GNU/Linux system (but may be used by others). It was subsequently updated by Brad Hards <bradh@frogmouth.net>, and converted to xml format by Carl Byington <carl@five-ten-sg.com>. Copyright Copyright (C) 2002 by David Smith <dave.s@earthcorp.com>. XML version Copyright (C) 2008 by 510 Software Group <carl@five-ten-sg.com>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, please write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. Version @VERSION@ - 2009-09-14 + 2011-05-27 lspst 1 lspst @VERSION@ lspst list PST (MS Outlook Personal Folders) file data Synopsis lspst pstfile Options -V Show program version and exit. -d debug-file Specify name of debug log file. The log file is now an ascii file, instead of the binary file used in previous versions. -h Show summary of options and exit. Description lspst is a program that can read an Outlook PST (Personal Folders) file and produce a simple listing of the data (contacts, email subjects, etc). Author lspst was written by Joe Nahmias <joe@nahmias.net> based on readpst. This man page was written by 510 Software Group <carl@five-ten-sg.com>. Copyright Copyright (C) 2004 by Joe Nahmias <joe@nahmias.net>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, please write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. Version @VERSION@ - 2009-09-14 + 2011-05-27 pst2ldif 1 pst2ldif @VERSION@ pst2ldif extract contacts from a MS Outlook .pst file in .ldif format Synopsis pst2ldif pstfilename Options -V Show program version. Subsequent options are then ignored. -b ldap-base Sets the ldap base value used in the dn records. You probably want to use something like "o=organization, c=US". -c class Sets the objectClass values for the contact items. This class needs to be defined in the schema used by your LDAP server, and at a minimum it must contain the ldap attributes given below. This option may be specified multiple times to generate entries with multiple object classes. -d debug-file Specify name of debug log file. The log file is now an ascii file, instead of the binary file used in previous versions. -l extra-line Specify an extra line to be added to each ldap entry. This option may be specified multiple times to add multiple lines to each ldap entry. -o Use the old ldap schema, rather than the default new ldap schema. The old schema generates multiple postalAddress attributes for a single entry. The new schema generates a single postalAddress (and homePostalAddress when available) attribute with $ delimiters as specified in RFC4517. Using the old schema also generates two extra leading entries, one for "dn:ldap base", and one for "dn: cn=root, ldap base". -h Show summary of options. Subsequent options are then ignored. Description pst2ldif reads the contact information from a MS Outlook .pst file and produces a .ldif file that may be used to import those contacts into an LDAP database. The following ldap attributes are generated for the old ldap schema: cn givenName sn personalTitle company mail postalAddress l st postalCode c homePhone telephoneNumber facsimileTelephoneNumber mobile description The following attributes are generated for the new ldap schema: cn givenName sn title o mail postalAddress homePostalAddress l st postalCode c homePhone telephoneNumber facsimileTelephoneNumber mobile description labeledURI Copyright Copyright (C) 2008 by 510 Software Group <carl@five-ten-sg.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, please write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. Version @VERSION@ - 2009-09-14 + 2011-05-27 pst2dii 1 pst2dii @VERSION@ pst2dii extract email messages from a MS Outlook .pst file in DII load format Synopsis pst2dii -f ttf-font-file pstfilename Options -B bates-prefix Sets the bates prefix string. The bates sequence number is appended to this string, and printed on each page. -O dii-output-file Name of the output DII load file. -V Show program version. Subsequent options are then ignored. -b bates-number Starting bates sequence number. The default is zero. -c bates-color Font color for the bates stamp on each page, specified as 6 hex digits as rrggbb values. The default is ff0000 for bright red. -d debug-file Specify name of debug log file. The log file is now an ascii file, instead of the binary file used in previous versions. -f ttf-font-file Specify name of a true type font file. This should be a fixed pitch font. -h Show summary of options. Subsequent options are then ignored. -o output-directory Specifies the output directory. The directory must already exist. Description pst2dii reads the email messages from a MS Outlook .pst file and produces a DII load file that may be used to import message summaries into a Summation DII system. The DII output file contains references to the image and attachment files in the output directory. Copyright Copyright (C) 2008 by 510 Software Group <carl@five-ten-sg.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, please write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. Version @VERSION@ - 2009-09-14 + 2011-05-27 outlook.pst 5 outlook.pst format of MS Outlook .pst file Synopsis outlook.pst Overview Low level or primitive items in a .pst file are identified by an I_ID value. Higher level or composite items in a .pst file are identified by a D_ID value. There are two separate b-trees indexed by these I_ID and D_ID values. Starting with Outlook 2003, the file format changed from one with 32 bit pointers, to one with 64 bit pointers. We describe both formats here. 32 bit File Header The 32 bit file header is located at offset 0 in the .pst file. We only support index types 0x0e, 0x0f, 0x15, and 0x17, and encryption types 0x00, 0x01 and 0x02. Index type 0x0e is the older 32 bit Outlook format. Index type 0x0f seems to be rare, and so far the data seems to be identical to that in type 0x0e files. Index type 0x17 is the newer 64 bit Outlook format. Index type 0x15 seems to be rare, and according to the libpff project should have the same format as type 0x17 files. It was found in a 64-bit pst file created by Visual Recovery. It may be that index types less than 0x10 are 32 bit, and index types greater than or equal to 0x10 are 64 bit, and the low order four bits of the index type is some subtype or minor version number. Encryption type 0x00 is no encryption, type 0x01 is "compressible" encryption which is a simple substitution cipher, and type 0x02 is "strong" encryption, which is a simple three rotor Enigma cipher from WWII. offsetIndex1 is the file offset of the root of the index1 b-tree, which contains (I_ID, offset, size, unknown) tuples for each item in the file. backPointer1 is the value that should appear in the parent pointer of that root node. offsetIndex2 is the file offset of the root of the index2 b-tree, which contains (D_ID, DESC-I_ID, TREE-I_ID, PARENT-D_ID) tuples for each item in the file. backPointer2 is the value that should appear in the parent pointer of that root node. 64 bit File Header The 64 bit file header is located at offset 0 in the .pst file. 32 bit Index 1 Node The 32 bit index1 b-tree nodes are 512 byte blocks with the following format. The itemCount specifies the number of 12 byte records that are active. The nodeLevel is non-zero for this style of nodes. The leaf nodes have a different format. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a triple of (I_ID, backPointer, offset) where the offset points to the next deeper node in the tree, the backPointer value must match the backPointer in that deeper node, and I_ID is the lowest I_ID value in the subtree. 64 bit Index 1 Node The 64 bit index1 b-tree nodes are 512 byte blocks with the following format. The itemCount specifies the number of 24 byte records that are active. The nodeLevel is non-zero for this style of nodes. The leaf nodes have a different format. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a triple of (I_ID, backPointer, offset) where the offset points to the next deeper node in the tree, the backPointer value must match the backPointer in that deeper node, and I_ID is the lowest I_ID value in the subtree. 32 bit Index 1 Leaf Node The 32 bit index1 b-tree leaf nodes are 512 byte blocks with the following format. The itemCount specifies the number of 12 byte records that are active. The nodeLevel is zero for these leaf nodes. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a tuple of (I_ID, offset, size, unknown) The two low order bits of the I_ID value seem to be flags. I have never seen a case with bit zero set. Bit one indicates that the item is not encrypted. Note that references to these I_ID values elsewhere may have the low order bit set (and I don't know what that means), but when we do the search in this tree we need to clear that bit so that we can find the correct item. 64 bit Index 1 Leaf Node The 64 bit index1 b-tree leaf nodes are 512 byte blocks with the following format. The itemCount specifies the number of 24 byte records that are active. The nodeLevel is zero for these leaf nodes. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a tuple of (I_ID, offset, size, unknown) The two low order bits of the I_ID value seem to be flags. I have never seen a case with bit zero set. Bit one indicates that the item is not encrypted. Note that references to these I_ID values elsewhere may have the low order bit set (and I don't know what that means), but when we do the search in this tree we need to clear that bit so that we can find the correct item. 32 bit Index 2 Node The 32 bit index2 b-tree nodes are 512 byte blocks with the following format. The itemCount specifies the number of 12 byte records that are active. The nodeLevel is non-zero for this style of nodes. The leaf nodes have a different format. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a triple of (D_ID, backPointer, offset) where the offset points to the next deeper node in the tree, the backPointer value must match the backPointer in that deeper node, and D_ID is the lowest D_ID value in the subtree. 64 bit Index 2 Node The 64 bit index2 b-tree nodes are 512 byte blocks with the following format. The itemCount specifies the number of 24 byte records that are active. The nodeLevel is non-zero for this style of nodes. The leaf nodes have a different format. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a triple of (D_ID, backPointer, offset) where the offset points to the next deeper node in the tree, the backPointer value must match the backPointer in that deeper node, and D_ID is the lowest D_ID value in the subtree. 32 bit Index 2 Leaf Node The 32 bit index2 b-tree leaf nodes are 512 byte blocks with the following format. The itemCount specifies the number of 16 byte records that are active. The nodeLevel is zero for these leaf nodes. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a tuple of (D_ID, DESC-I_ID, TREE-I_ID, PARENT-D_ID) The DESC-I_ID points to the main data for this item (Associated Descriptor Items 0x7cec, 0xbcec, or 0x0101) via the index1 tree. The TREE-I_ID is zero or points to an Associated Tree Item 0x0002 via the index1 tree. The PARENT-D_ID points to the parent of this item in this index2 tree. 64 bit Index 2 Leaf Node The 64 bit index2 b-tree leaf nodes are 512 byte blocks with the following format. The itemCount specifies the number of 32 byte records that are active. The nodeLevel is zero for these leaf nodes. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a tuple of (D_ID, DESC-I_ID, TREE-I_ID, PARENT-D_ID) The DESC-I_ID points to the main data for this item (Associated Descriptor Items 0x7cec, 0xbcec, or 0x0101) via the index1 tree. The TREE-I_ID is zero or points to an Associated Tree Item 0x0002 via the index1 tree. The PARENT-D_ID points to the parent of this item in this index2 tree. 32 bit Associated Tree Item 0x0002 A D_ID value may point to an entry in the index2 tree with a non-zero TREE-I_ID which points to this descriptor block via the index1 tree. It maps local ID2 values (referenced in the main data for the original D_ID item) to I_ID values. This descriptor block contains triples of (ID2, I_ID, CHILD-I_ID) where the local ID2 data can be found via I_ID, and CHILD-I_ID is either zero or it points to another Associated Tree Item via the index1 tree. In the above 32 bit leaf node, we have a tuple of (0x61, 0x02a82c, 0x02a836, 0) 0x02a836 is the I_ID of the associated tree, and we can lookup that I_ID value in the index1 b-tree to find the (offset,size) of the data in the .pst file. 64 bit Associated Tree Item 0x0002 This descriptor block contains a tree that maps local ID2 values to I_ID entries, similar to the 32 bit version described above. Associated Descriptor Item 0xbcec Contains information about the item, which may be email, contact, or other outlook types. In the above leaf node, we have a tuple of (0x21, 0x00e638, 0, 0) 0x00e638 is the I_ID of the associated descriptor, and we can lookup that I_ID value in the index1 b-tree to find the (offset,size) of the data in the .pst file. This descriptor is eventually decoded to a list of MAPI elements. Note the signature of 0xbcec. There are other descriptor block formats with other signatures. Note the indexOffset of 0x013c - starting at that position in the descriptor block, we have an array of two byte integers. The first integer (0x000b) is a (count-1) of the number of overlapping pairs following the count. The first pair is (0, 0xc), the next pair is (0xc, 0x14) and the last (12th) pair is (0x123, 0x13b). These pairs are (start,end+1) offsets of items in this block. So we have count+2 integers following the count value. Note the b5offset of 0x0020, which is a type that I will call an index reference. Such index references have at least two different forms, and may point to data either in this block, or in some other block. External pointer references have the low order 4 bits all set, and are ID2 values that can be used to fetch data. This value of 0x0020 is an internal pointer reference, which needs to be right shifted by 4 bits to become 0x0002, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xc, 0x14) pair. So far we have only described internal index references where the high order 16 bits are zero. That suffices for single descriptor blocks. But in the case of the type 0x0101 descriptor block, we have an array of subblocks. In this case, the high order 16 bits of an internal index reference are used to select the subblock. Each subblock starts with a 16 bit indexOffset which points to the count and array of 16 bit integer pairs which are offsets in the current subblock. Finally, we have the offset and size of the "b5" block located at offset 0xc with a size of 8 bytes in this descriptor block. The "b5" block has the following format: Note the descoffset of 0x0040, which again is an index reference. In this case, it is an internal pointer reference, which needs to be right shifted by 4 bits to become 0x0004, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0x14, 0x7c) pair. The datasize (6) plus the b5 code (02) gives the size of the entries, in this case 8 bytes. We now have the offset 0x14 of the descriptor array, composed of 8 byte entries that describe MAPI elements. Each descriptor entry has the following format: For some reference types (2, 3, 0xb) the value is used directly. Otherwise, the value is an index reference, which is either an ID2 value, or an offset, to be right shifted by 4 bits and used to fetch a pair from the index table to find the offset and size of the item in this descriptor block. The following reference types are known, but not all of these are implemented in the code yet. The following item types are known, but not all of these are implemented in the code yet. Associated Descriptor Item 0x7cec This style of descriptor block is similar to the 0xbcec format. This descriptor is also eventually decoded to a list of MAPI elements. Note the signature of 0x7cec. There are other descriptor block formats with other signatures. Note the indexOffset of 0x017a - starting at that position in the descriptor block, we have an array of two byte integers. The first integer (0x0006) is a (count-1) of the number of overlapping pairs following the count. The first pair is (0, 0xc), the next pair is (0xc, 0x14) and the last (7th) pair is (0x160, 0x179). These pairs are (start,end+1) offsets of items in this block. So we have count+2 integers following the count value. Note the 7coffset of 0x0040, which is an index reference. In this case, it is an internal reference pointer, which needs to be right shifted by 4 bits to become 0x0004, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0x14, 0xea) pair. We have the offset and size of the "7c" block located at offset 0x14 with a size of 214 bytes in this case. The "7c" block starts with a header with the following format: Note the b5Offset of 0x0020, which is an index reference. In this case, it is an internal reference pointer, which needs to be right shifted by 4 bits to become 0x0002, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xc, 0x14) pair. Finally, we have the offset and size of the "b5" block located at offset 0xc with a size of 8 bytes in this descriptor block. The "b5" block has the following format: Note the descoffset of 0x0060, which again is an index reference. In this case, it is an internal pointer reference, which needs to be right shifted by 4 bits to become 0x0006, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xea, 0xf0) pair. The datasize (2) plus the b5 code (04) gives the size of the entries, in this case 6 bytes. We now have the offset 0xea of an unused block of data in an unknown format, composed of 6 byte entries. That gives us (0xf0 - 0xea)/6 = 1, so we have a recordCount of one. We have seen cases where the descoffset in the b5 block is zero, and the index2Offset in the 7c block is zero. This has been seen for objects that seem to be attachments on messages that have been read. Before the message was read, it did not have any attachments. Note the index2Offset above of 0x0080, which again is an index reference. In this case, it is an internal pointer reference, which needs to be right shifted by 4 bits to become 0x0008, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xf0, 0x155) pair. This is an array of tables of four byte integers. We will call these the IND2 tables. The size of each of these tables is specified by the recordSize field of the "7c" header. The number of these tables is the above recordCount value derived from the "b5" block. Now the remaining data in the "7c" block after the header starts at offset 0x2a. There should be itemCount 8 byte items here, with the following format: The ind2Offset is a byte offset into the current IND2 table of some value. If that is a four byte integer value, then once we fetch that, we have the same triple (item type, reference type, value) as we find in the 0xbcec style descriptor blocks. If not, then this value is used directly. These 8 byte descriptors are processed recordCount times, each time using the next IND2 table. The item and reference types are as described above for the 0xbcec format descriptor block. 32 bit Associated Descriptor Item 0x0101 This descriptor block contains a list of I_ID values. It is used when an I_ID (that would normally point to a type 0x7cec or 0xbcec descriptor block) contains more data than can fit in any single descriptor of those types. In this case, it points to a type 0x0101 block, which contains a list of I_ID values that themselves point to the actual descriptor blocks. The total length value in the 0x0101 header is the sum of the lengths of the blocks pointed to by the list of I_ID values. The result is an array of subblocks, that may contain index references where the high order 16 bits specify which descriptor subblock to use. Only the first descriptor subblock contains the signature (0xbcec or 0x7cec). 64 bit Associated Descriptor Item 0x0101 This descriptor block contains a list of I_ID values, similar to the 32 bit version described above.