#!/usr/bin/perl

# $Header: /mhub4/sources/imap-tools/imap_to_maildir.pl,v 1.1 2011/11/13 21:33:23 rick Exp $

################################################################################
# imap_to_maildir is a utility for copying mailboxes and messages              #
# from a user account on an IMAP server to a Maildir system.                   #
#                                                                              #
# imap_to_maildir.pl is called like this:                                      #
#      ./imap_to_maildir.pl -S imaphost/user/password -u <user> -M <maildir>   #
#                                                                              #
# For example:  ./imap_to_maildir.pl                  \                        #
#                  -S imap.gmail.com:993/rsanders/mypass  \                    #
#                  -u rick                            \                        #
#                  -M /users/rick/Maildir                                      #
# Optional arguments:                                                          #
#       -a <DD-MMM-YYYY> copy only messages after this date                    #
#	-d debug                                                               #
#       -I log IMAP protocol commands                                          #
#       -L logfile                                                             #
#       -m mailbox list (copies only the specified mailboxes, see usage()      #
################################################################################

use Socket;
use IO::Socket;
use FileHandle;
use Fcntl;
use Getopt::Std;
use File::Path;
use Time::HiRes;
use File::Path qw(make_path);
use IMAP::Utils;

#################################################################
#            Main program.                                      #
#################################################################

   init();

   #  Get list of all messages on the IMAP server
   #
   connectToHost($sourceHost, \$conn);
   unless ( login($sourceUser,$sourcePwd, $conn) ) {
       Log("Check your username and password");
       print STDOUT "Login failed: Check your username and password\n";
       exit;
   }
   @mbxs = getMailboxList($sourceUser, $conn);
   namespace( $conn, \$prefix, \$delimiter, $opt_x );

   foreach $mbx ( @mbxs ) {
        $dstmbx = $mbx;
        $dstmbx =~ s/^inbox/INBOX/i;
        $dstmbx =~ s/$prefix// if $prefix;

        unless ( $delimiter eq '.' ) {
           # Mailboxes may not contain dots, replace them with -
           $dstmbx =~ s/\./-/g;   
        }

        Log("Copying messages in $mbx mailbox") if $dump_flags;
        my @msgs;

        if ( $sent_after ) {
           getDatedMsgList( $mbx, $sent_after, \@msgs, $conn, 'EXAMINE' );
        } else {
           getMsgList( $mbx, \@msgs, $conn, 'EXAMINE' );
        }
             
        my $i = $#msgs + 1;
        Log("   $mbx has $i messages");

        $folder = get_folder_name( $maildir, $dstmbx, $delimiter, $prefix );

        build_folder( $folder, $username ) if !-d $folder or $folder eq $maildir;
        Log("folder $folder") if $debug;
        my $i = $#msgs + 1;
        my $msgnums;
        foreach $msgnum ( @msgs ) {
             ($msgnum,$date,$flags,$rfc822_size) = split(/\|/, $msgnum);
             fetchMsg( $msgnum, $conn, \$message );
             my $size = length( $message );

             $msgfile = generate_filename( $folder, $size, $rfc822_size, $flags );
             next if !$msgfile;   #  Failed to generate a unique filename
             if ( !open (M, ">$msgfile") ) {
                Log("Error opening $msgfile: $!");
                next;
             }
             Log("   Copying message $msgnum") if $debug;
             print M $message;
             close M;
             $added++;
 
             $msgnums .= "$msgnum ";
        }
   }

   logout( $conn );
   Log("Copied $added total messages");

   exit;


sub init {

   $version = 'V1.0';
   $os = $ENV{'OS'};

   processArgs();

   if ($timeout eq '') { $timeout = 60; }

   IMAP::Utils::init();
   #  Open the logFile
   #
   if ( $logfile ) {
      openLog($logfile);
   }
   Log("\n$0 starting");

   if ( $dump_flags ) {
      Log("Dumping only those messages with one of the following flags: $dump_flags");
   }

   chomp( $localhost = `uname -n` );
}

#  getMailboxList
#
#  get a list of the user's mailboxes from the source host
#
sub getMailboxList {

my $user = shift;
my $conn = shift;
my @mbxs;
my @mailboxes;

   #  Get a list of the user's mailboxes
   #
  if ( $mbxList ) {
      #  The user has supplied a list of mailboxes so only processes
      #  the ones in that list
      @mbxs = split(/,/, $mbxList);
      foreach $mbx ( @mbxs ) {
         trim( *mbx );
         push( @mailboxes, $mbx );
      }
      return @mailboxes;
   }

   if ($debug) { Log("Get list of user's mailboxes",2); }

   sendCommand ($conn, "1 LIST \"\" *");
   undef @response;
   while ( 1 ) {
	$response = readResponse ($conn);
	if ( $response =~ /^1 OK/i ) {
           last;
	}
	elsif ( $response !~ /^\*/ ) {
           Log ("unexpected response: $response");
           return 0;
	}
   }

   undef @mbxs;

   for $i (0 .. $#response) {
        $response[$i] =~ s/\s+/ /;
        if ( $response[$i] =~ /"$/ ) {
           $response[$i] =~ /\* LIST \((.*)\) "(.+)" "(.+)"/i;
           $mbx = $3;
        } elsif ( $response[$i] =~ /\* LIST \((.*)\) NIL (.+)/i ) {
           $mbx= $2;
        } else {
           $response[$i] =~ /\* LIST \((.*)\) "(.+)" (.+)/i;
           $mbx = $3;
        }
	$mbx =~ s/^\s+//;  $mbx =~ s/\s+$//;

	if ($response[$i] =~ /NOSELECT/i) {
		if ($debug) { Log("$mbx is set NOSELECT,skip it",2); }
		next;
	}
	if (($mbx =~ /^\#/) && ($user ne 'anonymous')) {
		#  Skip public mbxs unless we are migrating them
		next;
	}
	if ($mbx =~ /^\./) {
		# Skip mailboxes starting with a dot
		next;
	}
	push ( @mbxs, $mbx ) if $mbx ne '';
   }

   if ( $mbxList ) {
      #  The user has supplied a list of mailboxes so only processes
      #  those
      @mbxs = split(/,/, $mbxList);
   }

   return @mbxs;
}

#  getMsgList
#
#  Get a list of the user's messages in the indicated mailbox on
#  the source host
#
sub getMsgList {

my $mailbox = shift;
my $msgs    = shift;
my $conn    = shift;
my $mode    = shift;
my $seen;
my $empty;
my $msgnum;
my $from;
my $flags;

   $mode = 'EXAMINE' unless $mode;
   sendCommand ($conn, "1 $mode \"$mailbox\"");
   undef @response;
   $empty=0;
   while ( 1 ) {
	$response = readResponse ( $conn );
	if ( $response =~ / 0 EXISTS/i ) { $empty=1; }
	if ( $response =~ /^1 OK/i ) {
		last;
	}
	elsif ( $response !~ /^\*/ ) {
		Log ("unexpected response: $response");
		return 0;
	}
   }

   sendCommand ( $conn, "1 FETCH 1:* (uid flags internaldate body[header.fields (From Date)] RFC822.SIZE)");
   
   undef @response;
   while ( 1 ) {
	$response = readResponse ( $conn );
	if ( $response =~ /^1 OK/i ) {
		last;
	} 
        last if $response =~ /^1 NO|^1 BAD|^\* BYE/;
   }

   @msgs  = ();
   $flags = $rfc822_size = '';
   for $i (0 .. $#response) {
	last if $response[$i] =~ /^1 OK FETCH complete/i;

        if ($response[$i] =~ /FLAGS/) {
           #  Get the list of flags
           $response[$i] =~ /FLAGS \(([^\)]*)/;
           $flags = $1;
           $flags =~ s/\\Recent//;
        }

        if ( $response[$i] =~ /INTERNALDATE/) {
           $response[$i] =~ /INTERNALDATE (.+) BODY/i;
           # $response[$i] =~ /INTERNALDATE "(.+)" BODY/;
           $date = $1;
           
           $date =~ /"(.+)"/;
           $date = $1;
           $date =~ s/"//g;
        }

        if ( $response[$i] =~ /RFC822.SIZE/ ) {
           $response[$i] =~ /RFC822.SIZE\s+(.+)(.*)/;
           $rfc822_size = $1;
           $rfc822_size =~ s/[^\d.]//g;
        }

        if ( $response[$i] =~ /\* (.+) FETCH/ ) {
           ($msgnum) = split(/\s+/, $1);
        }

        if ( $msgnum and $date and $rfc822_size ) {
	   push (@$msgs,"$msgnum|$date|$flags|$rfc822_size");
           $msgnum = $date = $rfc822_size = '';
        }
   }

   return 1;

}

#  getDatedMsgList
#
#  Get a list of the user's messages in a mailbox on
#  the host which were sent after the specified date
#
sub getDatedMsgList {

my $mailbox = shift;
my $cutoff_date = shift;
my $msgs    = shift;
my $conn    = shift;
my $oper    = shift;
my ($seen, $empty, @list,$msgid, $rfc822_size);

    #  Get a list of messages sent after the specified date

    @list  = ();
    @$msgs = ();

    sendCommand ($conn, "1 $oper \"$mailbox\"");
    while ( 1 ) {
        $response = readResponse ($conn);
        if ( $response =~ / EXISTS/i) {
            $response =~ /\* ([^EXISTS]*)/;
            #Log("     There are $1 messages in $mailbox");
        } elsif ( $response =~ /^1 OK/i ) {
            last;
        } elsif ( $response =~ /^1 NO/i ) {
            Log ("unexpected SELECT response: $response");
            return 0;
        } elsif ( $response !~ /^\*/ ) {
            Log ("unexpected SELECT response: $response");
            return 0;
        }
    }

    my ($date,$ts) = split(/\s+/, $cutoff_date);

    #
    #  Get list of messages sent before the reference date
    #
    Log("Get messages sent after $date") if $debug;
    $nums = "";
    sendCommand ($conn, "1 SEARCH SINCE \"$date\"");
    while ( 1 ) {
	$response = readResponse ($conn);
	if ( $response =~ /^1 OK/i ) {
	    last;
	}
	elsif ( $response =~ /^\*\s+SEARCH/i ) {
	    ($nums) = ($response =~ /^\*\s+SEARCH\s+(.*)/i);
	}
	elsif ( $response !~ /^\*/ ) {
	    Log ("unexpected SEARCH response: $response");
	    return;
	}
    }
    Log("$nums") if $debug;
    if ( $nums eq "" ) {
	Log ("     $mailbox has no messages sent before $date") if $debug;
	return;
    }
    my @number = split(/\s+/, $nums);
    $n = $#number + 1;

    $nums =~ s/\s+/ /g;
    @msgList = ();
    @msgList = split(/ /, $nums);

    if ($#msgList == -1) {
	#  No msgs in this mailbox
	return 1;
    }

    $n = $#msgList + 1;
    Log("   $mailbox has $n messages after $sent_after");

@$msgs  = ();
for $num (@msgList) {

     sendCommand ( $conn, "1 FETCH $num (uid flags internaldate body[header.fields (Message-Id Date)] RFC822.SIZE)");
     
     undef @response;
     while ( 1 ) {
	$response = readResponse   ( $conn );
	if   ( $response =~ /^1 OK/i ) {
		last;
	}   
        last if $response =~ /^1 NO|^1 BAD|^\* BYE/;
     }

     $flags = $rfc822_size = '';
     my $msgid;
     foreach $_ ( @response ) {
	  last if /^1 OK FETCH complete/i;
          if ( /FLAGS/ ) {
             #  Get the list of flags
             /FLAGS \(([^\)]*)/;
             $flags = $1;
             $flags =~ s/\\Recent//;
          }
   
          if ( /Message-Id:\s*(.+)/i ) {
             $msgid = $1;
          }

          if ( /INTERNALDATE/) {
             /INTERNALDATE (.+) BODY/i;
             $date = $1;
             $date =~ /"(.+)"/;
             $date = $1;
             $date =~ s/"//g;
             ####  next if check_cutoff_date( $date, $cutoff_date );
          }

          if ( /RFC822.SIZE/ ) {
             /RFC822.SIZE\s+(.+)(.*)/;
             $rfc822_size = $1;
             $rfc822_size =~ s/[^\d.]//g;
          }

          if ( /\* (.+) FETCH/ ) {
             ($msgnum) = split(/\s+/, $1);
          }

          if ( $msgnum and $date and $rfc822_size ) {
             push (@$msgs,"$msgnum|$date|$flags|$msgid|$rfc822_size");
             $msgnum=$msgid=$date=$flags=$rfc822_size='';
          }
      }
   }

   foreach $_ ( @$msgs ) {
      Log("getDated found $_") if $debug;
   }

   return 1;
}


sub usage {

   print STDOUT "usage:\n";
   print STDOUT " iu-imaptomaildir -S Host/User/Password -u <user> -g <group> -M <maildir>\n";
   print STDOUT " <dir> is the file directory to write the message structure\n";
   print STDOUT " Optional arguments:\n";
   print STDOUT "          -d debug\n";
   print STDOUT "          -I log IMAP commands\n";
   print STDOUT "          -L logfile\n";
   print STDOUT "          -m mailbox list (eg \"Inbox,Drafts,Notes\". Default is all mailboxes)\n";
   print STDOUT "          -a <DD-MMM-YYYY> copy only messages after this date\n";
   exit;

}

sub processArgs {

   if ( !getopts( "dS:L:mu:g:M::hf:F:Ia:x" ) ) {
      usage();
   }

   if ( $opt_S =~ /\\/ ) {
      ($sourceHost, $sourceUser, $sourcePwd) = split(/\\/, $opt_S);
   } else {
      ($sourceHost, $sourceUser, $sourcePwd) = split(/\//, $opt_S);
   }

   $userid   = scalar getpwnam($opt_u) or -1;
   $groupid   = scalar getpwnam($opt_g) or -1;
   $maildir    = $opt_M;
   $mbxList    = $opt_m;
   $logfile    = $opt_L;
   $debug      = 1 if $opt_d;
   $showIMAP   = 1 if $opt_I;
   $sent_after = $opt_a;

   if ( !$maildir ) {
      print "You must specify the username and the directory where the user's Maildir is located\n";
      print "For example:  -u rick -M /mhub4/rick/Maildir.\n";
      usage();
      exit;
   }

   if ( !-d $maildir ) {
      print "Fatal error: $maildir does not exist\n";
      exit;
   }

   validate_date( $sent_after ) if $sent_after;

   if ( $dump_flags ) {
      foreach my $flag ( split(/,/, $dump_flags) ) {
          $flag = ucfirst( lc($flag) );
          $flag = 'Seen'   if $flag eq 'Read';
          $flag = 'Unseen' if $flag eq 'Unread';
          $dump_flags{$flag} = 1;
      }
   }

   if ( $extension ) {
      $extension = '.' . $extension unless $extension =~ /^\./;
   }

   usage() if $opt_h;

}

sub flags_ok {

my $flags = shift;
my $ok = 0;

   #  If the user has specified that only messages with
   #  certain flags be dumped then honor his request.

   return 1 unless %dump_flags;

   $flags =~ s/\\//g;
   Log("flags $flags") if $debug;
   foreach $flag ( split(/\s+/, $flags) ) {
      $flag = ucfirst( lc($flag) );
      $ok = 1 if $dump_flags{$flag};
   }

   #  Special case for Unseen messages for which there isn't a 
   #  standard flag.  
   if ( $dump_flags{Unseen} ) {
      #  Unseen messages should be dumped too.
      $ok = 1 unless $flags =~ /Seen/;
   }

   return $ok;

}

sub generate_filename {

my $folder      = shift;
my $size        = shift;
my $rfc822_size = shift;
my $flags       = shift;
my $status;
my $tries;
my $msgfn;
my $seen;

   #  Get a unique filename

   Log("Generate a filename") if $debug;
   $seen = ',S' if $flags =~ /Seen/;

   while( 1 ) {
      my $now = time();
      my ($sec, $msec) = Time::HiRes::gettimeofday();

      $msgfn = $sec . '.M' . $msec . 'P' . $$ . "$localhost,S=$size,W=$rfc822_size:2$seen";
      $msgfn = $folder . '/cur/' . $msgfn;
      last if $tries++ > 100;
      next if -e $msgfn;
   }

   return $msgfn;

}

sub get_folder_name {

my $maildir   = shift; 
my $mbx       = shift;
my $delimiter = shift;
my $prefix    = shift;

   #  Convert an IMAP mailbox name to a Maildir folder name.  IMAP mbxs
   #  are hierarchal while Maildir folders are flat and must start with
   #  a '.' character.

   Log("Convert IMAP mbx $mbx name to Maildir folder name") if $debug;

   my $folder = $maildir . '/';
   if ( uc( $mbx ) eq 'INBOX' ) {
      #  Inbox is special case
      return $maildir;
   }

   $delimiter = "\\." if $delimiter eq '.';

   foreach my $term ( split(/$delimiter/, $mbx ) ) {
       $folder .= '.' . $term;
   }

   return $folder;
}

sub build_folder {

my $folder   = shift;
my $username = shift;
my @subdirs = qw( new cur tmp );

   Log("Create the directories for the $folder folder");

   foreach my $subdir ( @subdirs ) {
      my $stat = scalar make_path("$folder/$subdir");
      if ( ! $stat ) {
         Log("Error creating $folder/$subdir:  $!");
         exit;
      }
   }

   my $stat = chown $userid, $groupid, $folder if $opt_u and $opt_g;   # Set ownership
   if ( ! $stat ) {
      Log("Failed to chown $username for $folder: $!");
      exit;
   }

}
