Script to clean up unused Magento images
#!/usr/bin/php
<?php
/**
* jv_clean_product_images.php
*
* NOTICE OF LICENSE
*
* This source file is subject to the Open Software License (OSL 3.0)
* that is bundled with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://opensource.org/licenses/osl-3.0.php
*
* @copyright Copyright (c) 2014 Jeroen Vermeulen (http://www.jeroenvermeulen.eu)
* @license http://opensource.org/licenses/osl-3.0.php Open Software License (OSL 3.0)
*/
// Path to Magento, probably the only thing you need to change. You can also use a literal path.
$magePath = dirname(__FILE__) . '/..';
$tool = new Tool( $magePath );
$tool->run();
exit;
/**
* Class Tool
*
* Tool to remove unused Magento images from disk.
*
* Looks for usage in:
* - Product media gallery
* - Product EAV attributes: small_image, thumbnail, image
* - These product attributes in flat tables
*
*/
class Tool
{
/** @var string - Path to the Magento root */
protected $magePath;
/** @var string - Path to the product images root */
protected $imagesPath;
/** @var mysqli - MySQLi database adapter object */
protected $mysqli;
/** @var string - Hostname of MySQL server */
protected $dbHost;
/** @var string - Username for MySQL server */
protected $dbUser;
/** @var string - Password for MySQL server */
protected $dbWord;
/** @var string - Database name for MySQL server */
protected $dbName;
/** @var string - Table prefix for database */
protected $dbPrefix;
/** @var array - Magento product attribute names which contain image paths */
protected $imageFields = array( 'small_image', 'thumbnail', 'image' );
/**
* Constructor. Reads config and checks images path.
*
* @param $magePath
* @throws Exception
*/
public function __construct( $magePath ) {
$this->beNice();
// Read Magento config XML
$this->magePath = realpath( $magePath );
$xmlFile = $this->magePath.'/app/etc/local.xml';
if ( !is_readable($xmlFile) ) {
throw new Exception( sprintf( "ERROR: Config file '%s' does not exits or is not readable.\n", $xmlFile ) );
}
$simpleXml = simplexml_load_file( $xmlFile, 'SimpleXMLElement', LIBXML_NOCDATA );
$this->dbHost = strval( $simpleXml->global->resources->default_setup->connection->host );
$this->dbUser = strval( $simpleXml->global->resources->default_setup->connection->username );
$this->dbName = strval( $simpleXml->global->resources->default_setup->connection->dbname );
$this->dbWord = strval( $simpleXml->global->resources->default_setup->connection->password );
$this->dbPrefix = strval( $simpleXml->global->resources->db->table_prefix );
unset( $simpleXml );
// Check images path
$this->imagesPath = realpath( $this->magePath . '/media/catalog/product' );
if ( !is_dir( $this->imagesPath ) ) {
throw new Exception( sprintf( "ERROR: Product images directory '%s' does not exist.\n" . $this->imagesPath ) );
}
}
/**
* Runs the actual tool.
*
* @return bool
* @throws Exception
*/
public function run() {
// Open database connection
$this->mysqli = new mysqli( $this->dbHost, $this->dbUser, $this->dbWord, $this->dbName );
if ( $this->mysqli->connect_errno ) {
throw new Exception( sprintf( "ERROR: Failed to connect to MySQL: %s\n", $this->mysqli->connect_error ) );
}
// Add media gallery imags
$dbImages = $this->getQueryColumn(
sprintf( "SELECT `value` FROM `%scatalog_product_entity_media_gallery`",
$this->dbPrefix) );
// Add images from product attributes
$attrIds = $this->getQueryColumn(
sprintf( "SELECT `attribute_id`
FROM `%seav_attribute` INNER JOIN `%seav_entity_type` USING (`entity_type_id`)
WHERE `entity_type_code` = 'catalog_product'
AND `attribute_code` IN (%s)",
$this->dbPrefix, $this->dbPrefix, "'".implode("','",$this->imageFields)."'" ) );
$eavImages = $this->getQueryColumn(
sprintf( "SELECT `value` FROM `%scatalog_product_entity_varchar` WHERE `attribute_id` IN (%s)",
$this->dbPrefix, implode(',',$attrIds) ) );
$dbImages = array_merge( $dbImages, $eavImages );
// Add images from flat tables, to be sure we don't break the current site
$flatTables = $this->getQueryColumn(
sprintf( "SHOW TABLES LIKE '%scatalog_product_flat_%%'", $this->dbPrefix) );
foreach( $flatTables as $flatTable ) {
foreach( $this->imageFields as $field ) {
if ( $this->fieldExists($flatTable,$field) ) {
$fieldImages = $this->getQueryColumn( sprintf( "SELECT `%s` FROM `%s`", $field, $flatTable ) );
$dbImages = array_merge( $dbImages, $fieldImages );
}
}
}
// Make array unique, convert to real filename
$dbImages = array_unique( $dbImages );
$dbInvalidCount = 0;
$keys = array_keys($dbImages); // Save keys because we will manipulate array
foreach ( $keys as $key ) {
$dbImages[$key] = trim( $dbImages[$key] );
if ( empty( $dbImages[$key] ) ) {
unset( $dbImages[$key] );
} else {
$fullPath = realpath( $this->imagesPath . $dbImages[$key] );
if ( false === $fullPath ) {
// Finding path failed, filename from database does not exist on filesystem, ignore
unset( $dbImages[$key] );
$dbInvalidCount++;
}
elseif ( 0 !== strpos( $fullPath, $this->imagesPath ) ) {
// Something wrong, file is outside product images root, for example contains '../'
printf( "Warning: Image path outside image root used: '%s'.\n", $fullPath );
unset( $dbImages[$key] );
$dbInvalidCount++;
} else {
$dbImages[$key] = $fullPath;
}
}
}
$dbImages = array_unique( $dbImages ); // After realpath some paths may be the same
printf( "Found %d invalid images in database.\n", $dbInvalidCount );
printf( "Found %d valid images in database.\n", count( $dbImages ) );
// Close database connection, we don't need it anymore
$this->mysqli->close();
// Find images on filesystem using shell command "find"
$imageFiles = array();
chdir( $this->imagesPath );
exec( 'find ? -type f', $imageFiles );
printf( "Found %d image files on disk.\n", count( $imageFiles ) );
// Compare and build list of images to delete
$dbImageFlip = array_flip( $dbImages );
$deleteList = array();
$deleteSize = 0;
foreach( $imageFiles as $imageFile ) {
$fullFilePath = $this->imagesPath . '/' . $imageFile;
if ( !isset( $dbImageFlip[$fullFilePath] ) ) {
if ( is_writable( $fullFilePath ) ) {
$deleteList[] = $fullFilePath;
$deleteSize += filesize( $fullFilePath ) / 1024 / 1024; // Add in Mb
} else {
printf( "Warning: File '%s' is not writable, skipping.\n", $fullFilePath );
}
}
}
// Let the user descide what to do
if ( empty( $deleteList ) ) {
echo "Found no images to clean up.\n";
}
else {
printf( "Found %d image files to be deleted, %d Mb. Are you sure (y/N)? ", count( $deleteList ), $deleteSize );
$handle = fopen ("php://stdin","r");
$userInput = fgets($handle);
if( strtolower( substr( trim($userInput), 0, 1) ) != 'y' ) {
echo " ABORTING!\n";
return false;
}
}
// The actual deletion of the files
foreach( $deleteList as $deleteFile ) {
unlink( $deleteFile );
}
echo "Done.\n";
return true;
}
/**
* Execute a query and return the MySQLi result object
*
* @param string $query
* @return mysqli_result
* @throws Exception
*/
protected function sqlQuery( $query ) {
$queryResult = $this->mysqli->query( $query );
if (!$queryResult) {
throw new Exception( sprintf( "ERROR: Error '%s' running query '%s'.\n", $this->mysqli->error, $query ) );
}
return $queryResult;
}
/**
* Execute a query, and return the first field of each resulting row, in one array.
*
* @param $query
* @return array
* @throws Exception
*/
protected function getQueryColumn( $query ) {
$result = array();
$queryResult = $this->sqlQuery( $query );
while ( $row = $queryResult->fetch_row() ) {
$result[] = strval( $row[0] );
}
return $result;
}
/**
* Check if a field exists in a table in the database
*
* @param $table
* @param $column
* @return bool
* @throws Exception
*/
protected function fieldExists( $table, $column ) {
$queryResult = $this->sqlQuery( sprintf( "SHOW COLUMNS FROM `%s` LIKE '%s'", $table, $column ) );
return ( 0 < $queryResult->num_rows );
}
/**
* Be nice to other processes in disk and CPU usage.
* We give this script low priority so we don't bother more important processes.
*/
protected function beNice() {
proc_nice( 19 );
if ( file_exists('/usr/bin/ionice') ) {
shell_exec( sprintf( '/usr/bin/ionice -c 3 -p %d', getmypid() ) );
}
}
}