Archiving old emails & attachments

I’m trying to purge old emails (and attachments) from the CRM database and filesystem.

Filesystem:
I plan to write a little php script that will move files in the “/upload” folder to another location based on the id column in the “notes” table (filtering by “data_entered” or “date_modified”). I can then easily compress and archive if I want.

Database:
It’s mainly tables “emails_text” and “notes” that’s starting to get pretty large.
emails_text: pruning all bounce mails that are >x years old
notes: pruning all notes that are > x years old

Question:

  • Will SuiteCRM reimport any and all mails I remove this way from the source IMAP account via auto-import?
    Do I need to first archive the respective mail in the source email account, in order to prevent this re-import to happen?
  • Has anyone of you done anything similar in the past?

update:

I was reminded, that SuiteCRM only auto-imports mails if in “unread” status. So no archiving of emails in the source mailbox necessary.

I notice that most (all) bounced mails that are auto-imported from campaigns have the original mail attached. This attached email is then saved in table “notes” (filename usually “bounced.eml”) and in the filesystem under (/upload).
I removed all these attachments from the filesystem, thereby compacting the /upload folder by around 40% (6GB in 6 years).
This is something I will do regularly to save some space on the server.

I wrote a simple script to archive all bounced email attachments here
https://suitecrm.com/suitecrm/forum/developer-help/18843-archiving-bounced-email-attachments

Here the script I ended up using to archive attachments for mails older than a few years. Feel free to extend this and make an admin panel for it :slight_smile:

<?php

/*
* Published under license CC-BY-SA
*
* John Dickinson, Energie Zukunft Schweiz - j.dickinson@ezs.ch
* April 2018
*
* example URL for listing files: https://crm-url/scriptfolder/scriptname.php?auth_key=1234&note_rename=yes&append_ext=yes&mode=list&limit=100
* example URL for archiving files: https://crm-url/scriptfolder/scriptname.php?auth_key=1234&note_rename=yes&append_ext=yes&mode=archive&limit=none
*/


# params
$default_limit = 10; # if no GET limit is given, use this limit
$auth_key = "1234"; #auth_key that must be given as GET parameter for script to be executed.
$cutoff_years = "4"; #number of years before which files are archived
$min_filesize = 500; # file size in kB. Files below this threshold won't be moved.
$note_rename_prepend = "[[ Archived ]] "; # string to prepend to notes.name

# database params
$db_name = "crm"; # database name
$db_user = "username";
$db_pass = "password";

$sourcedir = "/var/www/crm_folder/upload/";     # path to source directory
$targetdir = "/var/www/crm_folder/upload_archive/";     # path to target directory


# check if target folder exists, else die()
if (!file_exists($targetdir)) {

  die("target folder doesn't exist => first manually create folder " . $targetdir); # PM: mkdir doesn't work (permissions on crm folder?)

}


# security: don't execute script unless auth_key given as GET parameter
$die_msg = "<html><head><title>Testpage</title></head>";
$die_msg .= "<body>No Hay Banda!</body>";
$die_msg .= "</html>";

if ($_GET['auth_key'] != $auth_key) {
  die($die_msg);
}


# load GET parameters
$limit = $_GET['limit'] ? $_GET['limit'] : $default_limit; # to run without limit, use '?limit=none'
$movefiles = ($_GET['mode'] == "archive") ? true : false; # copy files or only print report

# connect to DB
$dblink = dbConnect($db_name, $db_user, $db_pass);

# execute sql query
$sql_query = selectQuery(); # populate query

# start looping through file IDs
$counter = 1;
$sourcefilesize_sum = 0; # sum up filesize of all files in source folder
$targetfilesize_sum = 0; # sum up filesize of all files in target folder

while ($data = mysql_fetch_assoc($sql_query)) {

  # reset variables
  $movestatus = "";
  $renamestatus = "";
  $sourcefilesize = 0;
  $targetfilesize = 0;
  $filedate = "";

  # file extension (from database)
  $file_ext = pathinfo(utf8_encode($data["nname"]), PATHINFO_EXTENSION); #get filename extension from note name

  # file paths
  $sourcefile = $sourcedir . utf8_encode($data["id"]);
  $targetfile = $targetdir . utf8_encode($data["id"]); # target file name without extension (added later depending on GET parameter)
  $targetfile_w_ext = $targetfile .  "." . $file_ext;

  # if source file present in folder
  if (file_exists($sourcefile)) {

    $sourcefilesize = filesize($sourcefile)/1000; # filesize in KB
    $filedate = date("d.m.Y", filemtime($sourcefile)); # get file modified time

    # skip files with filesize below 500kB
    if ($sourcefilesize < $min_filesize) {
      continue; # jump to next record (restart while loop)
    }


    # if GET parameter action=archive is set
    if ($movefiles) {

      switch ($_GET['append_ext']) {

        case "no":
          $targetfilename = $targetfile; # target file path without extension
          break;

        default:
          $targetfilename = $targetfile_w_ext; # target file path with extension (default)

      } # end switch

      $movestatus = rename($sourcefile, $targetfilename) ? "success" : "failure";

      # falls Datei in dieser session neu verschoben wurde => Notiznamen im CRM anpassen, damit Leute wissen was los ist.
      if ($movestatus == "success") {

        $renamestatus = ($_GET['note_rename'] == "no") ? "note not renamed (GET parameter)" : updateNoteName(utf8_encode($data["id"]));

      } else {

        $renamestatus = "note not renamed";

      }

    } else {

      $movestatus = "ready to be moved";
      $renamestatus = "report only";

    } # end if movefiles

  } elseif (file_exists($targetfile) || file_exists($targetfile_w_ext)) {

    if (file_exists($targetfile)) { # targetfile without extension found

      $targetfilesize = filesize($targetfile)/1000; # filesize in KB
      $filedate = date("d.m.Y", filemtime($targetfile)); # get file modified time

    } elseif (file_exists($targetfile_w_ext)) { #targetfile with extension found

      $targetfilesize = filesize($targetfile_w_ext)/1000; # filesize in KB
      $filedate = date("d.m.Y", filemtime($targetfile_w_ext)); # get file modified time

    }

    $movestatus = "file already moved to target directory";
    $renamestatus = "note not renamed";

  } else {

    # if file can't be found
    $sourcefilesize = 0;
    $filedate = "n/a";
    $movestatus = "file can't be found";
    $renamestatus = "note not renamed";

  } # end if file exists

  $tablerows .= "<tr>";
  $tablerows .= "<td>" . $counter . "</td>";
  $tablerows .= "<td>" . utf8_encode($data["parent_type"]) . "</td>";
  $tablerows .= "<td>" . utf8_encode($data["intent"]) . "</td>";
  $tablerows .= "<td><nobr>" . date("d.m.Y", strtotime(utf8_encode($data["date_entered"]))) . "</nobr></td>";
  $tablerows .= "<td>" . utf8_encode($data["ename"]) . "</td>";
  $tablerows .= "<td>" . utf8_encode($data["nname"]) . "</td>";
  $tablerows .= "<td><nobr>" . utf8_encode($data["id"]) . "</nobr></td>";
  $tablerows .= "<td><nobr>" . $movestatus . "</nobr></td>";
  $tablerows .= "<td>" . $renamestatus . "</td>";
  $tablerows .= "<td>" . ($file_ext ? $file_ext : "n/a") . "</td>";
  $tablerows .= "<td>" . max(round($sourcefilesize/1000, 1), round($targetfilesize/1000, 1)) . "MB </td>";
  $tablerows .= "<td><nobr>" . $filedate . "</nobr></td>";
  $tablerows .= "</tr>";

  $counter++;

  $sourcefilesize_sum += $sourcefilesize;
  $targetfilesize_sum += $targetfilesize;

}



$sourcefilesize_sum = $sourcefilesize_sum/1000; # filesize in MB


# output html
echo "<html><head><title>CRM Archive Script</title></head><body>";
echo "<h3>Parameters used</h3>";
echo "<div>File Archive Mode: " . ($movefiles ? "Archive" : "List only (default)") . "</div>";
echo "<div>Append File Extension to filename: " . ($_GET['append_ext'] == no ? "No (GET Parameter)" : "Yes (default)") . "</div>";
echo "<div>Note Rename Mode: " . ($_GET['note_rename'] == no ? "Don't rename" : "Rename upon file archive (default)") . "</div>";
echo "<div>Cutoff date: " . date("d.m.Y", strtotime("-4 year"))  . " (newer mails are ignored)</div>";
echo "<div>Filesize threshold: " . $min_filesize . "kB (smaller files are ignored)</div>";
echo "<div>Email intent filter: 'pick'</div>";
echo "<div>Database Query LIMIT: " . $limit . "</div>";
echo "<br />";
echo "<div>potential total filesize: " . $sourcefilesize_sum . "MB.<br />Total filesize already moved: " . $targetfilesize_sum . "MB</div><br /><br />";

# echo data table
echo "<h3>Results</h3>";
echo tableCreateHeader(); # populate table title bar (function)
echo $tablerows;
echo "</tbody></table>";

echo "</body></html>";

?>





<?php ### functions


# function: create title bar for table
function tableCreateHeader () {

  $html = "<table><tbody><tr>";
  $html .= "<th>counter</th>";
  $html .= "<th><nobr>parent type</nobr></th>";
  $html .= "<th>intent</th>";
  $html .= "<th>date_entered</th>";
  $html .= "<th>email name</th>";
  $html .= "<th>note name</th>";
  $html .= "<th>ID</th>";
  $html .= "<th><nobr>archiving status</nobr></th>";
  $html .= "<th><nobr>note rename status</nobr></th>";
  $html .= "<th><nobr>filetype</nobr></th>";
  $html .= "<th><nobr>filesize</nobr></th>";
  $html .= "<th><nobr>file last modified</nobr></th>";
  $html .= "</tr>";

  return $html;
}



# function to prepend "ARCHIV" signature onto note name for user to better understand what's happened.
function updateNoteName ($noteID) {

  $sql = "UPDATE notes as n SET n.name=CONCAT('" . $note_rename_prepend . "', n.name) WHERE n.id = '" . $noteID ."' LIMIT 1";

  $sql_query = mysql_query($sql);

  return $renamestatus = ($sql_query == true ? "success": "failure");

} # end function updateNoteName



# DB connection
function dbConnect($db, $user, $pass) {

   $link = mysql_connect("localhost", $user, $pass);
    if (!$link) {
      die('Connection failed: ' . mysql_error());
    }

    $db_selected = mysql_select_db($db, $link);
    if (!$db_selected) {
        die ('Cannot use ' . $db . mysql_error());
    }
    return $link;
}



# SQL query
function selectQuery() {

  global $cutoff_years;
  global $limit;

  $sql = "SELECT n.id, n.parent_type, e.name AS ename, n.name AS nname, e.intent, n.filename, e.date_entered FROM notes AS n JOIN emails AS e ON n.parent_id = e.id";
  $sql .= " AND e.intent = 'pick'"; # only select non-bounce mails (use other script for bounce)
  $sql .= " AND DATE(e.date_entered) < (CURDATE() - INTERVAL " . $cutoff_years . " YEAR)"; # filter age of email in emails table

  $sql .= " ORDER BY e.date_entered ASC";

  switch ($limit) {
    case "none":
      break;

    default:
      $sql .= " LIMIT " . $limit;
  }

  $sql .= ";";

  return mysql_query($sql);

}

?>
1 Like