<?php

/*
 * This script deletes duplicate images and imagerows from the database of which the images are not present in the filesystem.
 * It also removes images that are exact copies of another image for the same product.
 * And lastly, it looks for images that are on the filesystem but not in the database (orphaned images).
 * 
 * This script can most likely be optimized but since it'll probably only be run a few times, I can't be bothered.
 *
 * Place scripts in a folder named 'scripts' (or similar) in the Magento root.
 *
 * Note: needs 'fdupes' lib to run cleanDuplicates function.
 *
 */

chdir(dirname(__FILE__));

require_once '../app/Mage.php';
Mage::app();

$resource = Mage::getSingleton('core/resource');
$db = $resource->getConnection('core_write');

$eavAttribute = new Mage_Eav_Model_Mysql4_Entity_Attribute();
$thumbnailAttrId = $eavAttribute->getIdByCode('catalog_product', 'thumbnail');
$smallImageAttrId = $eavAttribute->getIdByCode('catalog_product', 'small_image');
$imageAttrId = $eavAttribute->getIdByCode('catalog_product', 'image');

$cleanUpDuplicates = false;
$countProductWithoutImages = false;
$cleanUpOrphans = false;
$cleanUpTableRowsMediaGallery = false;
$cleanUpTableRowsVarchar = false;
$setDefaultImageForProductsWithoutDefaultImage = false;

if($countProductWithoutImages) {
    $result = $db->fetchAll('SELECT * FROM `' . $resource->getTableName('catalog_product_entity_media_gallery') . '` as mediagallery RIGHT OUTER JOIN ' . $resource->getTableName('catalog_product_entity') . ' as entitytable ON entitytable.entity_id = mediagallery.entity_id WHERE mediagallery.value is NULL');
    echo count($result) . ' products without images' . "\n";
}

if($cleanUpDuplicates) {
    $directory = Mage::getBaseDir('media') . DS . 'catalog' . DS . 'product' . DS; //. '/catalog/product/z/o/';
    
    $output = shell_exec('find ' . $directory . ' -type d -exec fdupes -n {} \;'); // find duplicates
    $before = substr(shell_exec('find ' . $directory . ' -type f | wc -l'),0,-1); // count files for difference calculation
    $total = shell_exec('du -h ' . $directory); $total = explode("\n",$total); array_pop($total); $total = array_pop($total); $total = explode("\t",$total); $total = array_shift($total);
    $totalBefore = $total;
    
    $chunks = explode("\n\n",$output);
    
    /* Run through duplicates and replace database rows */
    foreach($chunks as $chunk) {
        $files = explode("\n",$chunk);
        $original = array_shift($files);
        foreach($files as $file) {
            // update database where filename=file set filename=original
            $original = DS . implode(DS,array_slice(explode(DS,$original), -3));
            $file = DS . implode(DS,array_slice(explode(DS,$file), -3));
            $oldFileOnServer = Mage::getBaseDir('media') . DS . 'catalog' . DS . 'product' . $file;
            $newFileOnServer = Mage::getBaseDir('media') . DS . 'catalog' . DS . 'product' . $original;
            if(file_exists($newFileOnServer) && file_exists($oldFileOnServer)) {
                $db->beginTransaction();
                $resultVarchar = $db->update('catalog_product_entity_varchar', array('value'=>$original), $db->quoteInto('value =?',$file));
                $db->commit();
                $db->beginTransaction();
                $resultGallery = $db->update('catalog_product_entity_media_gallery', array('value'=>$original), $db->quoteInto('value =?',$file));
                $db->commit();
                echo 'Replaced ' . $file . ' with ' . $original . ' (' . $resultVarchar . '/' . $resultGallery . ')' . "\n";
                unlink($oldFileOnServer);
                if(file_exists($oldFileOnServer)) {
                    die('File ' . $oldFileOnServer . ' not deleted; permissions issue?');
                }
            } else {
                if(!file_exists($oldFileOnServer)) {
                    echo 'File ' . $oldFileOnServer . ' does not exist.' . "\n";
                }
                if(!file_exists($newFileOnServer)) {
                    echo 'File ' . $newFileOnServer . ' does not exist.' . "\n";
                }
            }
        }
    }
    
    $after = substr(shell_exec('find ' . $directory . ' -type f | wc -l'),0,-1); // calculate difference
    $total = shell_exec('du -h ' . $directory); $total = explode("\n",$total); array_pop($total); $total = array_pop($total); $total = explode("\t",$total); $total = array_shift($total);
    $totalAfter = $total;
    
    echo 'In directory ' . $directory . ' the script has deleted ' . ($before-$after) . ' files - went from ' . $totalBefore . ' to ' . $totalAfter . "\n";
}

if($cleanUpOrphans) {
    /* Clean up orphaned images */
    $dir = Mage::getBaseDir('media') . DS . 'catalog' . DS . 'product';
    $files = glob($dir . DS . '[A-z0-9]' . DS . '[A-z0-9]' . DS . '*');
    foreach($files as $file) {
        if(!is_file($file)) continue;
        $filename = DS . implode(DS,array_slice(explode(DS,$file),-3));
        //echo $filename."\n";
        $results = $db->fetchAll("SELECT * FROM " . $resource->getTableName('catalog_product_entity_media_gallery') . " WHERE value='".$filename."'");
        if(count($results)==0) {
            unlink($file);
            echo 'Deleting orphaned image ' . $filename . "\n";
            $deleted++;
        }
        $total++;
    }
    echo 'Deleted ' . $deleted . ' of total ' . $total;
}

if($cleanUpTableRowsMediaGallery) {
    /* Clean up images from media gallery tables */
    $images = $db->fetchAll("SELECT value,value_id FROM " . $resource->getTableName('catalog_product_entity_media_gallery'));
    foreach($images as $image) {
        if(!file_exists(Mage::getBaseDir('media') . DS . 'catalog' . DS . 'product' . $image['value'])) {
            echo $image['value'] . ' does not exist; deleting.' . "\n";
            $db->query("DELETE FROM " . $resource->getTableName('catalog_product_entity_media_gallery') . " WHERE value_id = ?",array($image['value_id']));
            $db->query("DELETE FROM " . $resource->getTableName('catalog_product_entity_media_gallery_value') . " WHERE value_id = ?",array($image['value_id']));
        }
    }
}

if($cleanUpTableRowsVarchar) {
    /* Clean up images from varchar table */
    $images = $db->fetchAll("SELECT value,value_id FROM " . $resource->getTableName('catalog_product_entity_varchar') . " WHERE attribute_id = ? OR attribute_id = ? OR attribute_id = ?",array($thumbnailAttrId,$smallImageAttrId,$imageAttrId));
    foreach($images as $image) {
        if(!file_exists(Mage::getBaseDir('media') . DS . 'catalog' . DS . 'product' . $image['value'])) {
            echo $image['value'] . ' does not exist; deleting.' . "\n";
            $db->query("DELETE FROM " . $resource->getTableName('catalog_product_entity_varchar') . " WHERE value_id = ?",array($image['value_id']));
        }
    }
}

if($setDefaultImageForProductsWithoutDefaultImage) {
    $products = $db->fetchAll('SELECT sku,entity_id FROM catalog_product_entity');
    foreach($products as $product) {
        $chooseDefaultImage = false;
        $images = $db->fetchAll('select * from catalog_product_entity_varchar where `entity_id` = ? AND (`attribute_id` = ? OR `attribute_id` = ? OR `attribute_id` = ?)', array($product['entity_id'], $imageAttrId,$smallImageAttrId,$thumbnailAttrId));
        if(count($images) == 0) {
            $chooseDefaultImage = true;
        } else {
            foreach($images as $image) {
                if($image['value']== 'no_selection') {
                    $chooseDefaultImage = true;
                    break;
                }
            }
        }
        if($chooseDefaultImage) {
            $defaultImage = $db->fetchOne('SELECT value FROM catalog_product_entity_media_gallery WHERE entity_id = ? AND attribute_id = ? LIMIT 1', array($product['entity_id'],82));
            if($defaultImage) {
                $db->query('INSERT INTO catalog_product_entity_varchar SET entity_type_id = ?, attribute_id = ?, store_id = ?, entity_id = ?, value = ? ON DUPLICATE KEY UPDATE value = ?', array(4,$imageAttrId,0,$product['entity_id'],$defaultImage, $defaultImage));
                $db->query('INSERT INTO catalog_product_entity_varchar SET entity_type_id = ?, attribute_id = ?, store_id = ?, entity_id = ?, value = ? ON DUPLICATE KEY UPDATE value = ?', array(4,$smallImageAttrId,0,$product['entity_id'],$defaultImage, $defaultImage));
                $db->query('INSERT INTO catalog_product_entity_varchar SET entity_type_id = ?, attribute_id = ?, store_id = ?, entity_id = ?, value = ? ON DUPLICATE KEY UPDATE value = ?', array(4,$thumbnailAttrId,0,$product['entity_id'],$defaultImage, $defaultImage));
                echo 'New default image has been set for ' . $product['sku'] . PHP_EOL;
            }
        }
    }
}