# author:      Vincent DiBartolo (vdibart@nodroidsallowed.com)
# description: Get some data (e.g. a MySQL dump), compare it to the previous one and email it if
#              the sha1 hash is different.
#TODO: improve error handling when cookie is expired, etc.
use strict;
use MIME::Lite;
use Digest::SHA1 qw(sha1_base64);
use Getopt::Std;

#----------------------------------- JOBS TO RUN -----------------------------------#

# hash of different sets of data to run through
my (%jobs);

# command for wget
my ($wget) = "wget --cookies=off --quiet --output-document /dev/stdout";

# Live MySQL databases
$jobs{"mysql"}{"command"}        = "mysqldump --opt --hex-blob --all-databases --user=scott --password=tiger";
$jobs{"mysql"}{"outfile"}        = "mysql.sql";
$jobs{"mysql"}{"zipfile"}        = "mysql.zip";

# Bloglines blogroll
#$jobs{"bloglines"}{"command"}   = "$wget --header=\"Cookie: BloglinesTracker=qzIzPhMKSl4QEg\" http://www.bloglines.com/export";
#$jobs{"bloglines"}{"outfile"}   = "Blogroll.xml";
#$jobs{"bloglines"}{"filter"}    = "";

# Google Reader blogroll
$jobs{"greader"}{"command"}      = "$wget --header=\"Cookie: SID=NnCjXlBvZiifq2Q\" http://www.google.com/reader/subscriptions/export";
$jobs{"greader"}{"outfile"}      = "Blogroll.xml";

# del.icio.us bookmarks
$jobs{"del.icio.us"}{"command"}  = "$wget --http-user=scott --http-password=tiger --no-check-certificate https://api.del.icio.us/v1/posts/all"; 
$jobs{"del.icio.us"}{"outfile"}  = "del.icio.us.xml";

# this file
$jobs{"mydump"}{"command"}       = "cat $ENV{HOME}/perl/mydump.pl";
$jobs{"mydump"}{"outfile"}       = "mydump.tmp.pl";

#-------------------- GLOBAL VARIABLE DECLARATION/INITIALIZATION --------------------#


# output path
my ($output_path) = "/path/to/write/files";

# where to email the output
my ($to) = 'you@email.com';

# where email is coming from
my ($from) = 'me '; 

# subject line of the email
my ($subject) = "File Backups from " . localtime();

# suffix to append to each job name to get the digest file's name
my ($digest_suffix) = "_digest.txt";

# debugging on/off
my ($debug) = 0;

# override on/off
my ($override) = 0;

# test only mode
my ($test_only) = 0;

# the message that this script will send out
my ($email);

# the body of the message
my ($msg_body) = "";

# attach this if there are errors
my (@errors);

# reusable list to hold some random dump contents
my (@dump_contents);

# files to clean up when done
my (@to_deletes);

#-------------------- COMMAND-LINE PROCESSING --------------------#


if ($Getopt::Std::opt_h){
    print "\n";
    print "Usage: mydump.pl [-e ] [-d] | [-o] | [-t] | [-h]\n";
    print "               -e  = override default email address ($to)\n";
    print "               -d         = debug\n";
    print "               -o         = override (send data even if don't have to)\n";
    print "               -t         = test only - don't email results (will override -o if it's set also)\n";
    print "               -h         = get this help message\n\n";

# override the email address
if ($Getopt::Std::opt_e){
    $to = $Getopt::Std::opt_e;

# Turn debugging on
if ($Getopt::Std::opt_d){
    $debug = 1;
    msg("Debugging is on.");

# Turn email override on
if ($Getopt::Std::opt_o){
    $override = 1;
    msg("Email override is on.");

# Test only - don't send emails
if ($Getopt::Std::opt_t){
    $test_only = 1;
    $override  = 0;
    msg("Testing only - no email will be sent.");

#-------------------- SUB-ROUTINES --------------------#

# Print debug message
sub msg{
    print "[" . localtime() . "]: " . $_[0] . "\n" if $debug;
    return 1;
}#sub msg

# initialize the email that will be sent
sub init{

    # hate to do this but it makes everything else a little easier
    msg("Changing working directory to $output_path...") or die "Could not change to output directory: $!";
    chdir $output_path;

    # Create a new multipart message
    $email = MIME::Lite->new(
			     From    =>$from,
			     To      =>$to,
			     Subject =>$subject,
			     Type    =>'multipart/mixed'
			    ) or die "Error creating multipart container: $!\n";

}#sub init

# validate that we got what we expected to get
sub validate_contents{

    my ($job, $outfile) = @_;

    # check that there is a file
    my ($filesize) = -s $outfile;
    return error($job, "$outfile is empty.") unless $filesize > 0;

    # check that file contents match extension given - just for XML files right now
    #if( ($outfile =~ m/.*\.xml/) && ($dump_contents[0] !~ m/^\<\?xml/) ){
	#return error($job, "XML file $outfile doesn't have ';
    msg("Existing digest for '$job' is: $digest");

    return $digest;

}#sub get_old_digest

# get a digest of a dump from the data source
sub get_new_digest{

    my ($job)          = $_[0];
    my ($dump_command) = $jobs{$job}{"command"};

    # clear out the contents from the last iteration
    @dump_contents = ();

    # die if it doesn't exist
    open(NEW_DUMP, "$dump_command |") or return "";
    my (@unfiltered) = ;

    # now see if have to filter out the contents
    my ($filter) = $jobs{$job}{"filter"};
    if( defined $filter ){

	foreach my $line (@unfiltered){
	    next if $line =~ /$filter/;
	    push(@dump_contents, $line);

    } else {
	@dump_contents = @unfiltered;


    my ($digest) = sha1_base64(@dump_contents);
    msg("New digest for '$job' is:      $digest");

    return $digest;

}#sub get_new_digest

# attach the contents from the previous command retrieval
sub attach_contents{

    my ($job)     = $_[0];
    my ($outfile) = $jobs{$job}{"outfile"};

    # write the file to the output directory
    msg("Writing file $outfile for '$job'...");
    open(FILE, ">$outfile") or return error($job, "Could not open $outfile");
    print FILE join('', @dump_contents);

    # make sure this will get deleted later
    push(@to_deletes, $outfile);

    # make sure we got what we were looking for
    my ($filesize) = validate_contents($job, $outfile);
    return 0 unless $filesize;

    # if wants it in zip format, have an extra step
    if( defined $jobs{$job}{"zipfile"} ){

	my ($zipfile) = $jobs{$job}{"zipfile"};
	msg("Zipping up file $zipfile for '$job'...");
	system("zip $zipfile $outfile > /dev/null");
	push(@to_deletes, $zipfile);

	# attach file
	msg("Attaching $zipfile...");
	$email->attach(Type     =>'application/zip',
		       Path     =>$zipfile,
		       Filename =>$zipfile,
		       Disposition => 'attachment'
		      ) or return error($job, "Error adding $zipfile: $!");

	# bookkeeping
	my ($filesize) = -s $zipfile;
	$msg_body .= "Includes '$zipfile' from job '$job' ($filesize bytes)\n";

    } else {

	# attach file
	msg("Attaching $outfile...");
	$email->attach(Type     =>'text/plain',
		       Path     =>$outfile,
		       Filename =>$outfile,
		       Disposition => 'attachment'
		    ) or return error($job, "Error adding $outfile: $!");

	# bookkeeping
	$msg_body .= "Includes '$outfile' from job '$job' ($filesize bytes)\n";


    return 1;

}#sub attach_contents

# write the data's digest to a file for checking again next run
sub write_digest{

    my ($job, $digest) = @_;
    my ($digest_file)  = $job.$digest_suffix;

    # first write the digest file
    msg("Overwriting digest file $digest_file...");
    open(FILE, ">$digest_file");
    print FILE $digest;

    return 1;

}#sub write_digest

# email the data dump (the digest is not the same)
sub email_files{

    msg("Emailing the results to $to...");
    return 1 if $test_only;

    # add the message body
    $email->attach(Type =>'text/plain',
		   Filename => 'Contents.txt',
		   Data =>$msg_body
		  ) or die "Error adding the text message part: $!\n";

    # if there were errors
	$email->attach(Type =>'text/plain',
		       Filename => 'Errors.txt',
		       Data => join('\n', @errors)
		      ) or die "Error adding the text message part: $!\n";


    return 1;

}#sub email_files

# remove the zip file that was created
sub cleanup{

    foreach my $file (@to_deletes){
	msg("Removing file '$file'...");


#------------------ MAIN PROCESSING ------------------#

my ($send_mail) = 0;

#main processing
foreach my $job (keys %jobs){

    my ($old_digest) = get_old_digest($job);
    my ($new_digest) = get_new_digest($job);

    error($job, "Couldn't open data source to get data dump.") if $new_digest eq "";

    if( ($new_digest ne $old_digest) || ($override > 0) ){
	next unless attach_contents($job);
	write_digest($job, $new_digest);
	$send_mail += 1;


email_files() if $send_mail > 0;

msg("Processing completed.");