diff --git a/package b/package index 93e4e56..5d16760 100644 --- a/package +++ b/package @@ -1,78 +1,78 @@ #!/bin/bash T=`grep AM_INIT_AUTOMAKE configure.in | cut -d'(' -f2` NAME=`echo $T | cut -d, -f1` VER=`echo $T | cut -d, -f2 | cut -d')' -f1` BALL=$NAME-$VER.tar.gz web=/home/httpd/html/510sg/$NAME distlog=/tmp/distcheck chown --recursive root:root * make -f *cvs ./configure >/dev/null (cd xml; make; make distclean) cp -a html/*html $web make distcheck >$distlog 2>&1 if [ $? -eq 0 ]; then if [ -f $BALL ]; then # expand locally to see the tarball rm -rf junk mkdir junk cd junk tar xfz ../$BALL cd $NAME-$VER ./configure >/dev/null make >/dev/null make install >/dev/null cd .. cd .. # build rpm on target target5=pmg2 scp $BALL $target5:/tmp ssh $target5 "cd /tmp; rpmbuild -ta $BALL" # build rpm on target target4=host62 scp $BALL $target4:/tmp ssh $target4 "cd /tmp; rpmbuild -ta $BALL" # add packages to the web site wp=$web/packages wp4=$wp/centos4 wp5=$wp/centos5 mkdir -p $wp4 $wp5 rp=/usr/src/redhat mv -f $BALL $wp scp $target4:$rp/SRPMS/$NAME-$VER*rpm $wp scp $target4:$rp/RPMS/i386/$NAME-$VER*rpm $wp4 scp $target5:$rp/RPMS/i386/$NAME-$VER*rpm $wp5 (cd $web; chown --recursive web:web *; ls -alR) rpm -ql -p $wp4/$NAME-$VER*6.rpm rpm -ql -p $wp5/$NAME-$VER*6.rpm fi else tail -10 $distlog fi chown --recursive carl:carl * -args="--recursive -z -e ssh --times --perms --links --relative --owner --group --numeric-ids --delete" -echo rsync $args $web ns1:/ -rsync $args $web ns1:/ - -if [ -f /usr/local/bin/pst2ldif ]; then - pushd /home/ldap - pst=outlook.pst - #rm -f pst2ldif.log my.log - /usr/local/bin/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $pst >ams.ldif2 - #./readpstlog pst2ldif.log | less >my.log - #hexdump -C $pst >pst.dump - - grep '^dn:' ams.ldif2 | sort >aaa - grep '^dn:' ams.ldif | sort >bbb - echo 'differences from nightly ldap' - diff aaa bbb - echo 'end differences' - rm -f aaa bbb - popd -fi +#args="--recursive -z -e ssh --times --perms --links --relative --owner --group --numeric-ids --delete" +#echo rsync $args $web ns1:/ +#rsync $args $web ns1:/ +# +#if [ -f /usr/local/bin/pst2ldif ]; then +# pushd /home/ldap +# pst=outlook.pst +# #rm -f pst2ldif.log my.log +# /usr/local/bin/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $pst >ams.ldif2 +# #./readpstlog pst2ldif.log | less >my.log +# #hexdump -C $pst >pst.dump +# +# grep '^dn:' ams.ldif2 | sort >aaa +# grep '^dn:' ams.ldif | sort >bbb +# echo 'differences from nightly ldap' +# diff aaa bbb +# echo 'end differences' +# rm -f aaa bbb +# popd +#fi diff --git a/xml/libpst.in b/xml/libpst.in index d90445c..c451359 100644 --- a/xml/libpst.in +++ b/xml/libpst.in @@ -1,1308 +1,1309 @@ @PACKAGE@ Utilities - Version @VERSION@ Packages This is a fork of the libpst project at SourceForge. Another fork is located at http://alioth.debian.org/projects/libpst/ The various source and binary packages are available at http://www.five-ten-sg.com/@PACKAGE@/packages/ The most recent documentation is available at http://www.five-ten-sg.com/@PACKAGE@/ 2007-07-10 readpst 1 readpst @VERSION@ readpst convert PST (MS Outlook Personal Folders) files to mbox format Synopsis readpst files Description readpst is a program that can read an Outlook PST (Personal Folders) file and convert it into an mbox file, a format suitable for KMail, a recursive mbox structure, or separate emails. Options -b Do not save the attachments for the RTF format of the email body. -c format Set the Contact output mode. Use -cv for vcard format or -cl for an email list. -d debug-file Specify name of debug log file. Defaults to "readpst.log". The log file is not an ascii file, it is a binary file readable by readpstlog. -h Show summary of options and exit. -k Changes the output format to KMail. -o output-directory Specifies the output directory. The directory must already exist, and is entered after the PST file is opened, but before any processing of files commences. -q Changes to silent mode. No feedback is printed to the screen, except for error messages. -r Changes the output format to Recursive. This will create folders as named in the PST file, and will put all emails in a file called "mbox" inside each folder. These files are then compatible with all mbox-compatible email clients. -S Output messages into separate files. This will create folders as named in the PST file, and will put each email in its own file. These files will be numbered from 1 increasing in intervals of 1 (ie 1, 2, 3, ...). Any attachments are saved alongside each email as XXXXXXXXX-attach1, XXXXXXXXX-attach2 and so on, or with the name of the attachment if one is present. -M Output messages in MH format as separate files. This will create folders as named in the PST file, and will put each email together with any attachments into its own file. These files will be numbered from 1 to n with no leading zeros. -V Show program version and exit. -w Overwrite any previous output files. Beware: When used with the -S switch, this will remove all files from the target folder before writing. This is to keep the count of emails and attachments correct. See Also readpstlog 1 Author This manual page was originally written by Dave Smith <dave.s@earthcorp.com>, and updated by Joe Nahmias <joe@nahmias.net> for the Debian GNU/Linux system (but may be used by others). It was subsequently updated by Brad Hards <bradh@frogmouth.net>, and converted to xml format by Carl Byington <carl@five-ten-sg.com>. Copyright Copyright (C) 2002 by David Smith <dave.s@earthcorp.com>. XML version Copyright (C) 2006 by 510 Software Group <carl@five-ten-sg.com>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, please write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. CVS Version $Id$ 2007-07-10 readpstlog 1 readpstlog @VERSION@ readpstlog convert a readpst logfile to text format Synopsis readpstlog logfile Description readpstlog is a program that converts the binary logfile generated by readpst to a more desirable text format. Options -f format Sets the format of the text log output. Currently, the only valid output format is T, for text; anything else gives the default. -t include-types Print only the specified types of log messages. Types are specified in a comma-delimited list (e.g. 3,10,5,6). -x exclude-types Exclude the specified types of log messages. Types are specified in a comma-delimited list (e.g. 3,10,5,6). Message Types readpstlog understands the following types of log messages: 1 File accesses 2 Index accesses 3 New email found 4 Warnings 5 Read accesses 6 Informational messages 7 Main function calls 8 Decrypting calls 10 Function calls 11 HexDump calls Author This manual page was written by Joe Nahmias <joe@nahmias.net> for the Debian GNU/Linux system (but may be used by others). It was converted to xml format by Carl Byington <carl@five-ten-sg.com>. Copyright Copyright (C) 2002 by David Smith <dave.s@earthcorp.com>. XML version Copyright (C) 2005 by 510 Software Group <carl@five-ten-sg.com>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, please write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. CVS Version $Id$ 2007-07-10 pst2ldif 1 pst2ldif @VERSION@ pst2ldif extract contacts from a MS Outlook .pst file in .ldif format Synopsis pst2ldif pstfilename Options -h Show summary of options. Subsequent options are then ignored. -V include-types Show program version. Subsequent options are then ignored. -b ldap-base Sets the ldap base value used in the dn records. You probably want to use something like "o=organization, c=US". -c class Sets the objectClass values for the contact items. This class needs to be defined in the schema used by your LDAP server, and at a minimum it must contain the ldap attributes given below. Description pst2ldif reads the contact information from a MS Outlook .pst file and produces a .ldif file that may be used to import those contacts into an LDAP database. The following ldap attributes are generated: cn givenName sn personalTitle company mail postalAddress l st postalCode c homePhone telephoneNumber facsimileTelephoneNumber mobile description Copyright Copyright (C) 2006 by 510 Software Group <carl@five-ten-sg.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, please write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. CVS Version $Id$ 2007-07-10 outlook.pst 5 outlook.pst format of MS Outlook .pst file Synopsis outlook.pst Overview Each item in a .pst file is identified by two id values ID1 and ID2. There are two separate b-trees indexed by these ID1 and ID2 values. File Header The file header is located at offset 0 in the .pst file. We only support index type 0x0E and encryption type 0x01. offsetIndex1 is the file offset of the root of the index1 b-tree, which contains (ID1, offset, size, unknown) tuples for each item in the file. backPointer1 is the value that should appear in the parent pointer of that root node. offsetIndex2 is the file offset of the root of the index2 b-tree, which contains (ID2, DESC-ID1, LIST-ID1, PARENT-ID2) tuples for each item in the file. backPointer2 is the value that should appear in the parent pointer of that root node. Index 1 Node The index1 b-tree nodes are 516 byte blocks with the following format. The itemCount specifies the number of 12 byte records that are active. The nodeLevel is non-zero for this style of nodes. The leaf nodes have a different format. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a triple of (ID, backPointer, offset) where the offset points to the next deeper node in the tree, the backPointer value must match the backPointer in that deeper node, and ID is the lowest ID value in the subtree. Index 1 Leaf Node The index1 b-tree leaf nodes are 516 byte blocks with the following format. The itemCount specifies the number of 12 byte records that are active. The nodeLevel is zero for these leaf nodes. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a tuple of (ID1, offset, size, unknown) Index 2 Node The index2 b-tree nodes are 516 byte blocks with the following format. The itemCount specifies the number of 12 byte records that are active. The nodeLevel is non-zero for this style of nodes. The leaf nodes have a different format. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a triple of (ID2, backPointer, offset) where the offset points to the next deeper node in the tree, the backPointer value must match the backPointer in that deeper node, and ID2 is the lowest ID2 value in the subtree. Index 2 Leaf Node The index2 b-tree leaf nodes are 516 byte blocks with the following format. The itemCount specifies the number of 16 byte records that are active. The nodeLevel is zero for these leaf nodes. The backPointer must match the backPointer from the triple that pointed to this node. Each item in this node is a tuple of (ID2, DESC-ID1, LIST-ID1, PARENT-ID2) Associated List Item Contains associations between id1 and id2 for the items controlled by the record. In the above leaf node, we have a tuple of (0x61, 0x02a82c, 0x02a836, 0) 0x02a836 is the ID1 of the associated list, and we can lookup that ID1 value in the index1 b-tree to find the (offset,size) of the data in the .pst file. Associated Descriptor Item 0xbcec Contains information about the item, which may be email, contact, or other outlook types. In the above leaf node, we have a tuple of (0x21, 0x00e638, 0, 0) 0x00e638 is the ID1 of the associated descriptor, and we can lookup that ID1 value in the index1 b-tree to find the (offset,size) of the data in the .pst file. Note the signature of 0xbcec. There are other descriptor block formats with other signatures. Note the indexOffset of 0x013c - starting at that position in the descriptor block, we have an array of two byte integers. The first integer (0x000b) is a (count-1) of the number of overlapping pairs following the count. The first pair is (0, 0xc), the next pair is (0xc, 0x14) and the last (12th) pair is (0x123, 0x13b). These pairs are (start,end+1) offsets of items in this block. So we have count+2 integers following the count value. Note the offset of 0x0020, which needs to be right shifted by 4 bits to become 0x0002, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xc, 0x14) pair. Finally, we have the offset and size of the "b5" block located at offset 0xc with a size of 8 bytes in this descriptor block. The "b5" block has the following format: Note the "b5" offset of 0x0040, which needs to be right shifted by 4 bits to become 0x0004, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0x14, 0x7c) pair. We now have the offset 0x14 of the descriptor array, composed of 8 byte entries. Each descriptor entry has the following format: For some reference types (2, 3, 0xb) the value is used directly. Otherwise, the value is generally a non-zero offset, to be right shifted by 4 bits and used to fetch a pair from the index table to find the offset and size of the item in this descriptor block. However, if (value AND 0xf) == 0xf, then the value is an ID2 index. The following reference types are known, but not all of these are implemented in the code yet. The following item types are known, but not all of these are implemented in the code yet. Note: it appears that some types can have a IPOS value or a ID2 value depending on the size of the field in question. It is safer to check every field than for me to say what the "usually" contain. Absolute values though, are generally going to be constant. Associated Descriptor Item 0x7cec This style of descriptor block is similar to the BCEC format. Note the signature of 0x7cec. There are other descriptor block formats with other signatures. Note the indexOffset of 0x017a - starting at that position in the descriptor block, we have an array of two byte integers. The first integer (0x0006) is a (count-1) of the number of overlapping pairs following the count. The first pair is (0, 0xc), the next pair is (0xc, 0x14) and the last (7th) pair is (0x160, 0x179). These pairs are (start,end+1) offsets of items in this block. So we have count+2 integers following the count value. Note the offset of 0x0040, which needs to be right shifted by 4 bits to become 0x0004, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0x14, 0xea) pair. We have the offset and size of the "7c" block located at offset 0x14 with a size of 214 bytes in this case. The "7c" block starts with a header with the following format: Note the b5Offset of 0x0020, which needs to be right shifted by 4 bits to become 0x0002, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xc, 0x14) pair. Finally, we have the offset and size of the "b5" block located at offset 0xc with a size of 8 bytes in this descriptor block. The "b5" block has the following format: Note the "b5" offset of 0x0060, which needs to be right shifted by 4 bits to become 0x0006, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xea, 0xf0) pair. That gives us (0xf0 - 0xea)/6 = 1, so we have a recordCount of one. The actual data between 0xea and 0xf0 is unknown and unused here. Note the index2Offset above of 0x0080, which needs to be right shifted by 4 bits to become 0x0008, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xf0, 0x155) pair. This is an array of tables of four byte integers. We will call these the IND2 tables. The size of each of these tables is specified by the recordSize field of the "7c" header. The number of these tables is the above recordCount value derived from the "b5" block. Now the remaining data in the "7c" block after the header starts at offset 0x2a. There should be itemCount 8 byte items here, with the following format: The ind2Offset is a byte offset into the current IND2 table of a four byte integer value. Once we fetch that, we have the same triple (item type, reference type, value) as we find in the 0xbcec style descriptor blocks. These 8 byte descriptors are processed recordCount times, each time using the next IND2 table. The item and reference types are as described above for the 0xbcec format descriptor block.