mystix
11/27/2014 - 10:10 AM

Script to clean up unused Magento images

Script to clean up unused Magento images

#!/usr/bin/php
<?php
/**
 * jv_clean_product_images.php
 *
 * NOTICE OF LICENSE
 *
 * This source file is subject to the Open Software License (OSL 3.0)
 * that is bundled with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://opensource.org/licenses/osl-3.0.php
 *
 * @copyright   Copyright (c) 2014 Jeroen Vermeulen (http://www.jeroenvermeulen.eu)
 * @license     http://opensource.org/licenses/osl-3.0.php  Open Software License (OSL 3.0)
 */

// Path to Magento, probably the only thing you need to change. You can also use a literal path.
$magePath = dirname(__FILE__) . '/..';

$tool = new Tool( $magePath );
$tool->run();

exit;

/**
 * Class Tool
 *
 * Tool to remove unused Magento images from disk.
 *
 * Looks for usage in:
 *   - Product media gallery
 *   - Product EAV attributes: small_image, thumbnail, image
 *   - These product attributes in flat tables
 *
 */
class Tool
{
    /** @var string - Path to the Magento root */
    protected $magePath;
    /** @var string - Path to the product images root */
    protected $imagesPath;
    /** @var mysqli - MySQLi database adapter object */
    protected $mysqli;
    /** @var string - Hostname of MySQL server */
    protected $dbHost;
    /** @var string - Username for MySQL server */
    protected $dbUser;
    /** @var string - Password for MySQL server */
    protected $dbWord;
    /** @var string - Database name for MySQL server */
    protected $dbName;
    /** @var string - Table prefix for database */
    protected $dbPrefix;
    /** @var array - Magento product attribute names which contain image paths */
    protected $imageFields = array( 'small_image', 'thumbnail', 'image' );

    /**
     * Constructor. Reads config and checks images path.
     *
     * @param $magePath
     * @throws Exception
     */
    public function __construct( $magePath ) {
        $this->beNice();

        // Read Magento config XML
        $this->magePath = realpath( $magePath );
        $xmlFile  = $this->magePath.'/app/etc/local.xml';
        if ( !is_readable($xmlFile) ) {
            throw new Exception( sprintf( "ERROR: Config file '%s' does not exits or is not readable.\n", $xmlFile ) );
        }
        $simpleXml = simplexml_load_file( $xmlFile, 'SimpleXMLElement', LIBXML_NOCDATA );
        $this->dbHost = strval( $simpleXml->global->resources->default_setup->connection->host );
        $this->dbUser = strval( $simpleXml->global->resources->default_setup->connection->username );
        $this->dbName = strval( $simpleXml->global->resources->default_setup->connection->dbname );
        $this->dbWord = strval( $simpleXml->global->resources->default_setup->connection->password );
        $this->dbPrefix = strval( $simpleXml->global->resources->db->table_prefix );
        unset( $simpleXml );

        // Check images path
        $this->imagesPath = realpath( $this->magePath . '/media/catalog/product' );
        if ( !is_dir( $this->imagesPath ) ) {
            throw new Exception( sprintf( "ERROR: Product images directory '%s' does not exist.\n" . $this->imagesPath ) );
        }
    }

    /**
     * Runs the actual tool.
     *
     * @return bool
     * @throws Exception
     */
    public function run() {
        // Open database connection
        $this->mysqli = new mysqli( $this->dbHost, $this->dbUser, $this->dbWord, $this->dbName );
        if ( $this->mysqli->connect_errno ) {
            throw new Exception( sprintf( "ERROR: Failed to connect to MySQL: %s\n", $this->mysqli->connect_error ) );
        }

        // Add media gallery imags
        $dbImages = $this->getQueryColumn(
            sprintf( "SELECT `value` FROM `%scatalog_product_entity_media_gallery`",
                $this->dbPrefix) );

        // Add images from product attributes
        $attrIds = $this->getQueryColumn(
            sprintf( "SELECT `attribute_id`
                      FROM `%seav_attribute` INNER JOIN `%seav_entity_type` USING (`entity_type_id`)
                      WHERE `entity_type_code` = 'catalog_product'
                      AND `attribute_code` IN (%s)",
                $this->dbPrefix, $this->dbPrefix, "'".implode("','",$this->imageFields)."'" ) );
        $eavImages = $this->getQueryColumn(
            sprintf( "SELECT `value` FROM `%scatalog_product_entity_varchar` WHERE `attribute_id` IN (%s)",
                $this->dbPrefix, implode(',',$attrIds) ) );
        $dbImages = array_merge( $dbImages, $eavImages );

        // Add images from flat tables, to be sure we don't break the current site
        $flatTables = $this->getQueryColumn(
            sprintf( "SHOW TABLES LIKE '%scatalog_product_flat_%%'", $this->dbPrefix) );
        foreach( $flatTables as $flatTable ) {
            foreach( $this->imageFields as $field ) {
                if ( $this->fieldExists($flatTable,$field) ) {
                    $fieldImages = $this->getQueryColumn( sprintf( "SELECT `%s` FROM `%s`", $field, $flatTable ) );
                    $dbImages = array_merge( $dbImages, $fieldImages );
                }
            }
        }

        // Make array unique, convert to real filename
        $dbImages = array_unique( $dbImages );
        $dbInvalidCount = 0;
        $keys = array_keys($dbImages); // Save keys because we will manipulate array
        foreach ( $keys as $key ) {
            $dbImages[$key] = trim( $dbImages[$key] );
            if ( empty( $dbImages[$key] ) ) {
                unset( $dbImages[$key] );
            } else {
                $fullPath = realpath( $this->imagesPath . $dbImages[$key] );
                if ( false === $fullPath ) {
                    // Finding path failed, filename from database does not exist on filesystem, ignore
                    unset( $dbImages[$key] );
                    $dbInvalidCount++;
                }
                elseif ( 0 !== strpos( $fullPath, $this->imagesPath ) ) {
                    // Something wrong, file is outside product images root, for example contains '../'
                    printf( "Warning: Image path outside image root used: '%s'.\n", $fullPath );
                    unset( $dbImages[$key] );
                    $dbInvalidCount++;
                } else {
                    $dbImages[$key] = $fullPath;
                }
            }
        }
        $dbImages = array_unique( $dbImages ); // After realpath some paths may be the same
        printf( "Found %d invalid images in database.\n", $dbInvalidCount );
        printf( "Found %d valid images in database.\n", count( $dbImages ) );

        // Close database connection, we don't need it anymore
        $this->mysqli->close();

        // Find images on filesystem using shell command "find"
        $imageFiles = array();
        chdir( $this->imagesPath );
        exec( 'find ? -type f', $imageFiles );
        printf( "Found %d image files on disk.\n", count( $imageFiles ) );

        // Compare and build list of images to delete
        $dbImageFlip = array_flip( $dbImages );
        $deleteList = array();
        $deleteSize = 0;
        foreach( $imageFiles as $imageFile ) {
            $fullFilePath = $this->imagesPath . '/' . $imageFile;
            if ( !isset( $dbImageFlip[$fullFilePath] ) ) {
                if ( is_writable( $fullFilePath ) ) {
                    $deleteList[] = $fullFilePath;
                    $deleteSize += filesize( $fullFilePath ) / 1024 / 1024; // Add in Mb
                } else {
                    printf( "Warning: File '%s' is not writable, skipping.\n", $fullFilePath );
                }
            }
        }

        // Let the user descide what to do
        if ( empty( $deleteList ) ) {
            echo "Found no images to clean up.\n";
        } 
        else {
            printf( "Found %d image files to be deleted, %d Mb. Are you sure (y/N)? ", count( $deleteList ), $deleteSize );
            $handle = fopen ("php://stdin","r");
            $userInput = fgets($handle);
            if( strtolower( substr( trim($userInput), 0, 1) ) != 'y' ) {
                echo " ABORTING!\n";
                return false;
            }
        }

        // The actual deletion of the files
        foreach( $deleteList as $deleteFile ) {
            unlink( $deleteFile );
        }

        echo "Done.\n";
        return true;
    }

    /**
     * Execute a query and return the MySQLi result object
     *
     * @param string $query
     * @return mysqli_result
     * @throws Exception
     */
    protected function sqlQuery( $query ) {
        $queryResult = $this->mysqli->query( $query );
        if (!$queryResult) {
            throw new Exception( sprintf( "ERROR: Error '%s' running query '%s'.\n", $this->mysqli->error, $query ) );
        }
        return $queryResult;
    }

    /**
     * Execute a query, and return the first field of each resulting row, in one array.
     *
     * @param $query
     * @return array
     * @throws Exception
     */
    protected function getQueryColumn( $query ) {
        $result = array();
        $queryResult = $this->sqlQuery( $query );
        while ( $row = $queryResult->fetch_row() ) {
             $result[] = strval( $row[0] );
        }
        return $result;
    }

    /**
     * Check if a field exists in a table in the database
     *
     * @param $table
     * @param $column
     * @return bool
     * @throws Exception
     */
    protected function fieldExists( $table, $column ) {
        $queryResult = $this->sqlQuery( sprintf( "SHOW COLUMNS FROM `%s` LIKE '%s'", $table, $column ) );
        return ( 0 < $queryResult->num_rows );
    }

    /**
     * Be nice to other processes in disk and CPU usage.
     * We give this script low priority so we don't bother more important processes.
     */
    protected function beNice() {
        proc_nice( 19 );
        if ( file_exists('/usr/bin/ionice') ) {
            shell_exec( sprintf( '/usr/bin/ionice -c 3 -p %d', getmypid() ) );
        }
    }
}