dmitry-pro
5/23/2017 - 8:54 PM

euc-kr to utf-8. PHP detect file encoding and if encoding is euc-kr or ms949, it convert the file.

euc-kr to utf-8. PHP detect file encoding and if encoding is euc-kr or ms949, it convert the file.

<?php
function batch_convert_encoding($dir, $out_charset = 'utf-8'){
    if($handle = opendir($dir)){
        echo "\n\n## Directory is $dir\n\n";
        while(false !== ($file = readdir($handle))){
            if(in_array($file, array('.', '..', '.git'))){
                continue;
            }
            $file_path = realpath($dir . DIRECTORY_SEPARATOR . $file);
            $pathinfo = pathinfo($file_path);
            if(empty($pathinfo['extension'])){
                continue;
            }
            if(!in_array(strtolower($pathinfo['extension']), array('php', 'htm', 'html', 'js', 'txt')) and is_file($file_path)){
                continue;
            }
            if(is_file($file_path)){
                $content = file_get_contents($file_path);
                $encoding = mb_detect_encoding($content, array('utf-8', 'cp949', 'euc-kr'));
                if(trim(strtolower($encoding)) != trim(strtolower($out_charset))){
                    $content_converted = iconv($encoding, $out_charset, $content);
                    copy($file_path, $pathinfo['dirname'] . DIRECTORY_SEPARATOR . $pathinfo['filename'] . '.' . strtolower($encoding) . '.' . $pathinfo['extension']);
                    $fh = fopen($file_path, 'w');
                    fwrite($fh, $content_converted);
                    fclose($fh);
                    echo "$encoding to $out_charset: $file_path is converted.\n";
                }
            }else{
                batch_convert_encoding($file_path);
            }
        }
        closedir($handle);
    }else{
        echo 'Opening failed.';
    }
}

if(empty($argv[1])){
    echo "Usage: php $argv[0] dir_path\n";
    exit;
}
$dir = realpath($argv[1]);
if(!$dir){
    echo "$argv[1] is incorrect path.";
    exit;
}

echo "=== Scanning $dir start! ===\n";

batch_convert_encoding($dir);