danijeel
3/30/2015 - 8:13 AM

Convert files to utf-8 and remove utf8_decode / utf8_encode Proof of concept

Convert files to utf-8 and remove utf8_decode / utf8_encode Proof of concept

#!/usr/bin/env php
<?php
/**
 * PHP convert file encoding
 *
 * Command-line script to convert php files encoding to UTF-8
 * This has only been tested on a macbook. proberbly works on unix systems
 * 
 * Required file-5.04, iconv (GNU libiconv 1.11)
 * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man1/file.1.html
 * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man1/iconv.1.html
 *
 * @license http://opensource.org/licenses/MIT
 * 
 * The MIT License (MIT)
 *
 * Copyright (c) 2015 Daniel Nilsson
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * @link https://github.com/danijeel/file-converter
 *
 *
 * @author Daniel Nilsson <daniel.nilsson1989@gmail.com>
 *
 * @todo select directory path as argument
 * 
*/
$time = microtime();
$time = explode(' ', $time);
$time = $time[1] + $time[0];
$global_start = $time;

$files = [];
$queue = [];
$pattern = '/(utf8_(de|en)code)[.(](.*)[)]/';
$path = './files';


foreach (new DirectoryIterator($path) as $index => $fileInfo) {
    if ($fileInfo->isDot()) {
        continue;
    }
    if ($fileInfo->isFile() && $fileInfo->getExtension() == 'php') {
        $files[] = [
            'file'  => $fileInfo->getFilename(),
            'path'  => $fileInfo->getPath()
        ];
    }
}

chdir($path);

foreach ($files as $file) {
    $command = sprintf("file -I %s | awk -F 'charset=' '{ print $2}'", $file['file']);
    exec($command, $output);

    if (strlen($output[0]) > 1 && $output[0] != 'utf-8') {
        $queue[] = [
            'file'     => $file['file'],
            'path'     => $file['path'],
            'encoding' => $output[0]
        ];
    }
}
$queueCount = count($queue);
echo sprintf("Antal att konvertera: %d st filer" . PHP_EOL, $queueCount);

foreach ($queue as $index => $fileInfo) {

    echo sprintf("Formaterar: fil: %s" . PHP_EOL, $fileInfo['file']);

    $command = sprintf('iconv -f %s -t utf-8 %s' . PHP_EOL,
        $fileInfo['encoding'],
        $fileInfo['file']
    );

    exec($command, $content);

    $rows = count($content);
    echo sprintf(PHP_EOL .'Antal rader: %d' . PHP_EOL, $rows);
    $file_start = startFileTimer();

    foreach ($content as $row => $line) {
        // remove utf8_encode or utf8_decode
        if (preg_match($pattern, $line, $match)) {
            $replace = end($match);
            $line = preg_replace($pattern, $replace, $line);
        }
    }
    saveFile($fileInfo['file'], $content);
    unset($content);
    stopFileTimer($file_start);
    echo sprintf(PHP_EOL . "%s Completed" . PHP_EOL, $fileInfo['file']);
    unset($queue[$index]);
}

$time = microtime();
$time = explode(' ', $time);
$time = $time[1] + $time[0];
$finish = $time;
$total_time = round(($finish - $global_start), 4);
echo sprintf(PHP_EOL . "Converted %s files in %s seconds" . PHP_EOL, $queueCount, $total_time);
echo 'queue: ' . count($queue) . PHP_EOL;

function saveFile($filename, $content)
{
    echo PHP_EOL . $filename . PHP_EOL;
    $handle = fopen($filename, "wb");
    foreach ($content as $line) {
        fwrite($handle, $line . PHP_EOL);
    }
    fclose($handle);
}
function startFileTimer()
{
    $time = microtime();
    $time = explode(' ', $time);
    $time = $time[1] + $time[0];
    $start = $time;

    return $start;
}
function stopFileTimer($start)
{
    $time = microtime();
    $time = explode(' ', $time);
    $time = $time[1] + $time[0];
    $finish = $time;
    $total_time = round(($finish - $start), 4);

    echo sprintf(PHP_EOL . "File converted in %s seconds" . PHP_EOL, $total_time);
}