#!/usr/bin/perl # apollias-website-statistics-saver.pl # Version 1.1 # # by Apollia of Astroblahhh.Com. http://astroblahhh.com/ # # Released Feb. 19, 2014. # # # Available here: # # http://astroblahhh.com/software/perl/apollias-website-statistics-saver/v1_1/apollias-website-statistics-saver-v1_1.zip # # http://astroblahhh.com/software/perl/apollias-website-statistics-saver/v1_1/apollias-website-statistics-saver-v1_1.txt # # # This script must be used with an accompanying, separate config file: # # http://astroblahhh.com/software/perl/apollias-website-statistics-saver/v1_1/Apollias_Website_Statistics_Saver__Additional_Config-v1_1.txt # # If you download the zip file, both this script and its config file are # included in it. # # # I wrote this script to automate an annoying monthly chore I used to # have to do manually - saving my websites' raw statistics logs, which # otherwise get deleted after 30 days. # # My web host is DreamHost. http://dreamhost.com/ But, this script # might work with other web hosts, perhaps with some modifications. # # Instead of keeping this script in a web-viewable location, I keep # it, and its additional, separate config file, and its automatically- # generated file of the last date/time it output a zip file, in a # folder just outside of the folder of web-viewable stuff: # # /home/username/apstatsaver # # You can upload this script using whatever SFTP program you use. # On DreamHost, this script even runs even if I only set Owner:Read # permissions on this script. # # # This script is intended to be run periodically by a cron job. # # When this script is run, this script first checks for an external file - # Apollias_Website_Statistics_Saver__Last_Zip_Date.pl - which by default is stored # in the same directory as this script - to find out the last date/time # this script successfully saved a zip file of statistics. (It's OK if # that file doesn't exist.) # # If that date/time is too recent, then, this script aborts. But if the # date/time was long ago enough, or the date/time file isn't found, then # this script attempts to make a new zip file of statistics, and upon # success, outputs Apollias_Website_Statistics_Saver__Last_Zip_Date.pl, containing # the last date/time (in Unix timestamp format) this script successfully # saved a zip file of statistics. # # This script might not catch every possible error, but, if some things # appear to have gone wrong, then, this script attempts to email you about it. # # # For convenience, some of this script's settings are stored in an external # file - Apollias_Website_Statistics_Saver__Additional_Config-v1_1.pl, which # is checked for by default in the same directory as this script. # # That file lets you specify directories containing statistics to zip, a # custom prefix for the zip file name, a "from" email address to send # notifications from, and a "to" email address to send notifications to. # # # More instructions are below and inside # Apollias_Website_Statistics_Saver__Additional_Config-v1_1.pl. # # # ----- # # # Except for parts not by Apollia: # # Copyright (C) 2013 Apollia # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/ . # # # Contact info: http://astroblahhh.com/contact-apollia.shtml ################################################################ # # Some stuff you should leave alone # # This just gets the script's location. Other important things' # locations are relative to this script's location. use Cwd 'abs_path'; use File::Basename qw( dirname basename ); $this_script_path=abs_path($0); $this_script_parentdirs=dirname( $this_script_path ); print "Running $this_script_path...\n\n\n"; ################ # # Constants for readability # use constant false => 0; use constant true => 1; # # End of Constants for readability # ################ # End of some stuff you should leave alone # ################################################################ ####################################################################### # # Some stuff you can change, but probably shouldn't # $name_of_last_zip_date_file = "Apollias_Website_Statistics_Saver__Last_Zip_Date.pl"; $name_of_additional_config_file = "Apollias_Website_Statistics_Saver__Additional_Config-v1_1.pl"; # Those two additional files are stored by default in the same directory # as this script itself: $path_to_last_zip_date_file = $this_script_parentdirs . "/" . $name_of_last_zip_date_file; $path_to_additional_config_file = $this_script_parentdirs . "/" . $name_of_additional_config_file; # And this folder for the zip files this script saves # goes in this script's folder too: $final_location_for_zip_file=$this_script_parentdirs . "/log-zips/"; # Below is setting which probably doesn't really do anything important # and likely doesn't even entirely work right, since Linux doesn't have # creation timestamps on files, and I copied/pasted this (and some # other) code from an old Perl script I wrote for Windows. # # Didn't feel like updating/perfecting such a minor, insignificant thing. $ORDER="mod"; # default: "mod" # alpha|creation|mod $should_use_full_paths=false; # Not important to change. This and the next variable are relics # of the zip backup script I wrote that this script is partially # derived from. $should_halt_script_if_any_things_to_zip_dont_exist=false; # If true, the script will halt if it finds any of the folders in # @dirs_containing_statistics_to_zip don't exist, and try to email # you about it. # # If false, the script will just go ahead and create the zip file # without the missing folders - and try to email you about # the missing folders. # End of some stuff you can change, but probably shouldn't. # ####################################################################### # # Some stuff you can change # # # Below, you can set how many days need to go by before this script # will create a new zip file of statistics. # # This script won't just automatically run every x days, however. # To make this script run periodically, you'll need to set up a cron # job to run this script. Exactly how to do that depends on your # web host. $number_of_days_that_must_pass_since_last_zip_date_before_this_script_zips_again = 20; # Remember to uncomment the below variables once you put in some # valid email addresses. (Uncomment means, remove the # from the # beginning of the line.) # # The below addresses are only used if this script's additional # config file isn't found or doesn't set them. #$email_address_to_send_from=''; #$email_address_to_send_notifs_to=''; # You must use single quotes above, because @ does weird things # inside quotes in Perl. $path_to_email_program = '/usr/sbin/sendmail'; # Correct on DreamHost, as of 01/19/2014, but might have to be # modified depending on your web host. # End of stuff you can change. # # # Below this point, nothing else in this script needs to be changed. # # However, you should customize the settings inside this file: # # Apollias_Website_Statistics_Saver__Additional_Config-v1_1.pl # ####################################################################### ################# # # Start of last zip date stuff use Date::Parse; if (-e $path_to_last_zip_date_file) # If the last zip date file exists... { print "Found last zip date file!\n"; require $path_to_last_zip_date_file; print "Last zip date: $last_zip_date\n\n"; $unix_timestamp_of_last_zip_date = $last_zip_date; $unix_timestamp_of_now = time(); print "Unix timestamp of last zip date: $unix_timestamp_of_last_zip_date\n"; print "Current Unix timestamp: $unix_timestamp_of_now\n\n"; $number_of_seconds_in_number_of_days_that_must_pass_since_last_zip_date_before_this_script_zips_again = ( ($number_of_days_that_must_pass_since_last_zip_date_before_this_script_zips_again * 24) * 60) * 60; print "Number of seconds in number of days that must pass since last zip date before this script zips again: "; print $number_of_seconds_in_number_of_days_that_must_pass_since_last_zip_date_before_this_script_zips_again; print "\n\n"; $difference_between_then_and_now = $unix_timestamp_of_now - $unix_timestamp_of_last_zip_date; print "Difference between last zip date and now: $difference_between_then_and_now\n\n"; if ( $difference_between_then_and_now <= $number_of_seconds_in_number_of_days_that_must_pass_since_last_zip_date_before_this_script_zips_again ) { print "Not enough time has elapsed since the last zip date! Aborting.\n\n"; die; } else { print "Can zip again!\n\n"; } } else { print "No last zip date found! Can probably proceed with zip.\n\n"; } # End of last zip date stuff # ################# sub Email_Notif { # A slightly modified version of code from: # # http://wiki.dreamhost.com/Sendmail $subject=$_[0]; $body=$_[1]; $from_address=$email_address_to_send_from; $to_address=$email_address_to_send_notifs_to; print "From address: $from_address"; print "To address: $to_address"; open (MAIL, "|$path_to_email_program -t $to_address") || die "Can't open $path_to_email_program !\n"; print MAIL "To: $to_address\n"; print MAIL "From: $from_address\n"; print MAIL "Subject: $subject\n"; print MAIL "$body"; close (MAIL); } ################# # # Start of additional config file stuff if (-e $path_to_additional_config_file) # If the additional config file exists... { print "Found path to additional config!\n"; require $path_to_additional_config_file; } else # Couldn't find the additional config file. Can't proceed without it. { Email_Notif("Web Stats Saver Error", "No additional config file found! Can't proceed.\n"); die; } # End of additional config file stuff # ################# my @nonexistent_paths; my $count_of_existent_paths; sub Create_Zip_Dest_Path_If_Necessary { $var=$_[0]; if (!-e $$var) { mkdir $$var; } } Create_Zip_Dest_Path_If_Necessary("final_location_for_zip_file"); sub CheckForZipDestPathErrors { $var=$_[0]; if (!-d $$var) { print "Error: "; print $$var; print " is not a directory!\n"; Email_Notif("$zip_filename_prefix Web Stats Saver Error", "Sorry, the path provided in the $var variable doesn't lead to a directory. Please put a different path in $var, or make $$var into a directory, and run this script again."); die; } } CheckForZipDestPathErrors("final_location_for_zip_file"); sub AddSlashIfNeeded { $this_string=$_[0]; $lastchar = substr($this_string,length($this_string)-1,1); if ($lastchar ne "/") { $this_string .= "/"; } return $this_string; } $final_location_for_zip_file=AddSlashIfNeeded($final_location_for_zip_file); sub Make_Zip_File_Name { ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = gmtime(time); $year = 1900 + $yearOffset; $month = $month + 1; $second = sprintf("%02d", $second); $minute = sprintf("%02d", $minute); $hour = sprintf("%02d", $hour); $result=$zip_filename_prefix . "_" . "Stats" . "-" . $year . "_" . $month . "_" . $dayOfMonth . "-" . $hour . "," . $minute . "," . $second; return $result; } $zipfile_name=Make_Zip_File_Name(); print "\n\nZip file name: $zipfile_name\n\n"; sub Output_Last_Zip_Date { open(LAST_ZIP_DATE_FILE, ">$path_to_last_zip_date_file"); print LAST_ZIP_DATE_FILE "#!/usr/bin/perl\n\n"; print LAST_ZIP_DATE_FILE '$last_zip_date = '; print LAST_ZIP_DATE_FILE time(); print LAST_ZIP_DATE_FILE "; \n"; close LAST_ZIP_DATE_FILE; chmod 0600, $path_to_last_zip_date_file; # Owner:Read permissions only. } sub GetFilesInDirectory { $whichdir=$_[0]; opendir(DIR, $whichdir); opendir(DIR2, $whichdir); @alphafiles=readdir(DIR2); if ($ORDER eq "alpha") { @files=readdir(DIR); } elsif ($ORDER eq "creation") { @files=sort {-C $b <=> -C $a} readdir(DIR); } elsif ($ORDER eq "mod") { @files=sort {-M $b <=> -M $a} readdir(DIR); }; closedir(DIR); } foreach $dir_containing_statistics_to_zip (@dirs_containing_statistics_to_zip) { $dir_containing_statistics_to_zip=AddSlashIfNeeded($dir_containing_statistics_to_zip); print "dir_containing_statistics_to_zip: $dir_containing_statistics_to_zip\n\n"; if ( ! -e $dir_containing_statistics_to_zip) { push (@nonexistent_paths, $dir_containing_statistics_to_zip ); next; } GetFilesInDirectory($dir_containing_statistics_to_zip); chdir $dir_containing_statistics_to_zip; foreach $file (@files) { #print "File: $file \n"; if (-d $file) { print "DIRECTORY FOUND!!!! $file\n"; next; }; if (-l $file) { print "SYMLINK FOUND!!!! $file\n"; next; } if (-f $file) { print "yeeha! "; print $file; print "\n"; $path_to_file = $dir_containing_statistics_to_zip . $file; push(@things_to_zip, $path_to_file); } }; } foreach $thing_to_zip (@things_to_zip) { $len=length($thing_to_zip); $second_rightmost_slash_loc=rindex($thing_to_zip, "/", $len-2); $parentdirs=substr($thing_to_zip, 0, $second_rightmost_slash_loc+1); $file_or_folder_name=substr($thing_to_zip, $second_rightmost_slash_loc+1); $length_of_file_or_folder_name=length($file_or_folder_name); if (substr($file_or_folder_name, $length_of_file_or_folder_name-1, 1) eq "/") #If the last character is "/"... { chop($file_or_folder_name); #...get rid of it. } $this_full_path=$parentdirs . $file_or_folder_name; if (!-e $this_full_path) { push (@nonexistent_paths, $this_full_path); } else { $printable_list_of_things_to_zip=$printable_list_of_things_to_zip . "$thing_to_zip\n";; $count_of_existent_paths++; $parentdirs_and_file_or_folder_name{$this_full_path}=[$parentdirs, $file_or_folder_name]; } } if (@nonexistent_paths) { print "Nonexistent file(s) or folder(s) listed in things_to_zip array!\n\n"; foreach $nonexistent_path (@nonexistent_paths) { $list_of_nonexistent_paths = "$this_full_path\n"; print " $this_full_path\n"; } if ($should_halt_script_if_any_things_to_zip_dont_exist == true) { Email_Notif("$zip_filename_prefix Web Stats Saver Error - Nonexistent Items Found - No Output", "Couldn't make zip file because nonexistent items were found!\n\n$list_of_nonexistent_paths"); die; } else { Email_Notif("$zip_filename_prefix Web Stats Saver Warning - Incomplete zip file because of Nonexistent Items!", "Incomplete zip file because nonexistent items were found!\n\n$list_of_nonexistent_paths"); } } print "\n"; if ($should_use_full_paths == true) { print "Using full paths for files in zip file."; } else { print "Using short paths for files in zip file."; } print "\n"; $length_of_zipfile_name=length($zipfile_name); if ($length_of_zipfile_name>96) { $zipfile_name=substr($zipfile_name, 0, 96); } $zipfile_name=$zipfile_name . ".zip"; $zip_file_path=$final_location_for_zip_file . $zipfile_name; print "\n\nGoing to try to back up these paths:\n\n"; print $printable_list_of_things_to_zip; print "\n"; print "\n\nTrying to make zip...\n\n\n"; foreach $fullpath (keys %parentdirs_and_file_or_folder_name) { $array_reference = $parentdirs_and_file_or_folder_name{$fullpath}; @the_array=@$array_reference; $path=$the_array[0]; $file_or_folder=$the_array[1]; $parentdirs=dirname($path); $parentdir=basename($path); $grandparentdirs = dirname($parentdirs); $grandparentdir = basename($parentdirs); # This script is assuming your web stats path looks something like this: # # /home/username/logs/example-domain.com/http/ # # or # # /home/username/logs/example-subdomain.example-domain.com/http/ # # If you're using full paths, then, those entire paths will appear in the # zip file as well. # # # If you're not using full paths, then, the folders inside the zip file # will be: # # /example-domain.com/http/ # # and # # /example-subdomain.example-domain.com/http/ if ($should_use_full_paths == false) { chdir $grandparentdirs; $shell_command_line="zip -D -y \"$zip_file_path\" \"$grandparentdir/$parentdir/$file_or_folder\""; } elsif ($should_use_full_paths == true) { chdir "/"; $longfilepath=$path . $file_or_folder; $shell_command_line="zip -D -y \"$zip_file_path\" \"$longfilepath\""; } print "# $shell_command_line\n\n"; system ($shell_command_line); } print "\n\nHopefully backed up all these paths:\n\n"; print $printable_list_of_things_to_zip; print "\n"; $zip_file_path_to_display=$zip_file_path; print "\n\nBackup finished!\n"; print "\nZip file at: $zip_file_path_to_display\n"; if (-e $zip_file_path) { chmod 0400, $zip_file_path; # Owner:Read permissions only. Output_Last_Zip_Date(); } else # Couldn't find the zip file at the path where it's now supposed to be. { Email_Notif("$zip_filename_prefix Web Stats Saver Error - Zip File Not Created", "The end of the script was reached, but no zip file was found where it was supposed to be!\n\nTried to create $zip_file_path but failed."); }