yehosef
10/7/2018 - 1:45 PM

RF interview followup

RF interview followup

hash-based algo: 10 took 0.614 ms
dummy n^2 algo: 10 took 0.057 ms
simple n^3 algo: 10 took 0.565 ms

values matched!
hash-based algo: 100 took 0.126 ms
dummy n^2 algo: 100 took 2.806 ms
simple n^3 algo: 100 took 490.489 ms

values matched!
hash-based algo: 200 took 0.139 ms
dummy n^2 algo: 200 took 12.799 ms
simple n^3 algo: 200 took 4,339.552 ms

values matched!
hash-based algo: 300 took 0.228 ms
dummy n^2 algo: 300 took 35.685 ms
simple n^3 algo: 300 took 15,139.607 ms

values matched!
hash-based algo: 1000 took 0.345 ms
dummy n^2 algo: 1000 took 378.190 ms
skipping simple n^3 algo - takes too long

hash-based algo: 10000 took 2.415 ms
dummy n^2 algo: 10000 took 35,642.271 ms
skipping simple n^3 algo - takes too long
<?php

$series_sizes = [10, 100, 200, 300, 1000, 10000];
//$series_sizes = [10, 100];
$range_max = 3;


foreach ($series_sizes as $series_size)
{
    $series_arr[$series_size] = generate_series($series_size, $range_max * -1, $range_max);
}

foreach ($series_arr as $series_size => $series)
{
    $sets_simple = [];
    $sets_hash   = [];

    $start            = microtime(true);
    $sets_hash        = hash_algo($series);
    $elapsed          = (microtime(true) - $start) * 1000;
    $elapsed_formated = number_format($elapsed, 3);
    echo "hash-based algo: $series_size took $elapsed_formated ms\n";


    $start            = microtime(true);
    $sets_dummy       = dummyn2($series);
    $elapsed          = (microtime(true) - $start) * 1000;
    $elapsed_formated = number_format($elapsed, 3);
    echo "dummy n^2 algo: $series_size took $elapsed_formated ms\n";


    if ($series_size < 1000)
    {
        $start            = microtime(true);
        $sets_simple      = simple($series);
        $elapsed          = (microtime(true) - $start) * 1000;
        $elapsed_formated = number_format($elapsed, 3);

        echo "simple n^3 algo: $series_size took $elapsed_formated ms\n";
        echo "\nvalues ";
        echo (serialize($sets_hash) == serialize($sets_simple)) ? "matched!\n" : "do not match!!\n";
    }
    else
    {
        echo "skipping simple n^3 algo - takes too long\n";
    }
}


function hash_algo($series)
{
    $value_hash = [];
    $valid      = [];
    foreach ($series as $offset => $value)
    {
        if (!is_array($value_hash[$value])) $value_hash[$value] = [];
        $value_hash[$value][] = $offset;
    }

    ksort($value_hash);

    foreach ($value_hash as $value_1 => $offsets_1)
    {
        foreach ($value_hash as $value_2 => $offsets_2)
        {
            if ($value_2 < $value_1) continue;  // we did it already..
            $match_value = ($value_1 + $value_2) * -1;
            if (isset($value_hash[$match_value])           // we have value that will fill it
                && ($value_1 || $value_2 || $match_value)) // skip all zeros
            {
                // todo - could have check if there is enough if reusing (eg. -1, -1 => 2 ; using -1 twice)
                // but for most series >10, always true.. ignoring
                $valid = add_valid_set($value_1, $value_2, $match_value, $valid);
            }

        }
    }

    return $valid;
}


function simple($series)
{
    $valid = [];
    foreach ($series as $value_1)
    {
        foreach ($series as $value_2)
        {
            foreach ($series as $value_3)
            {
                if ($value_1 + $value_2 + $value_3 === 0 && ($value_1 || $value_2 || $value_3))
                {
                    $valid = add_valid_set($value_1, $value_2, $value_3, $valid);
                }
            }
        }
    }

    return $valid;
}


function dummyn2($series)
{
    $valid = [];
    foreach ($series as $value_1)
    {
        foreach ($series as $value_2)
        {
            $value_3 = $value_2;

            if ($value_1 + $value_2 + $value_3 === 0 && ($value_1 || $value_2 || $value_3))
            {
                $valid = add_valid_set($value_1, $value_2, $value_3, $valid);
            }
        }
    }

    return $valid;
}


/**
 * @param $value_1
 * @param $value_2
 * @param $value_3
 * @param $valid
 * @return mixed
 */
function add_valid_set($value_1, $value_2, $value_3, $valid)
{
    $set = [$value_1, $value_2, $value_3];
    // this is to deduplicate - keep memory down on larger sets
    sort($set);
    if (!is_array($valid[$set[0]])) $valid[$set[0]] = [];
    $valid[$set[0]][$set[1]] = $set[2];

    // normalize the sets - helps for comparing results of different algorithms
    ksort($valid[$set[0]]);
    ksort($valid);

    return $valid;
}


function generate_series($count, $min = -3, $max = 3)
{
    $series = [];
    for ($i = 0; $i < $count; $i++)
    {
        $series[] = rand($min, $max);
    }

    return $series;
}