Parse Data File and Skip Duplicates
<?php
// Global Config
$counter = 0;
$max = 100000;
$ids = array();
$info = array();
$info['started'] = now();
// In-file
$in = array();
$in['filepath'] = 'data.txt';
$in['fh'] = fopen($in['filepath'], 'r');
// Out-file
$out = array();
$out['filepath'] = 'data.unique.txt';
$out['fh'] = fopen($out['filepath'], 'w');
// Read in-file & write to out-file
while (!feof($in['fh']) && $counter <= $max) {
$counter++;
if( $counter % ($max / 10) === 0 ){
echo $counter.' in '.get_time_difference()."\n";
}
$in['line'] = fgets($in['fh']);
$in['columns'] = explode("\t", $in['line']);
$in['id'] = $in['columns'][0];
if( in_array($in['id'], $ids) )
{
continue;
}
else
{
array_push($ids, $in['id']);
fwrite($out['fh'], $in['line']);
}
}
function get_time_difference(){
global $info;
return now() - $info['started'];
}
// Complete
fclose($in['fh']);
fclose($out['fh']);
echo "Finished";
?>