aaron-g
9/6/2017 - 7:51 PM

data anonymizer

takes a csv of data an anonymizes chosen column names

#path to spreadsheet of customer data
$inputDataFile = "C:\temp\inputData.csv"
$outputDataFileAnonymized = "c:\temp\anonymized_data.csv"

$dataToAnonymize = import-csv $inputDataFile

#duplicate result of csv input because we are going to be editing values in the psobjects in the array.


#columns to anonymize
$anonymizeColumns = @(
    'UserName',
    'Email',
    'FullName',
    'Manager',
    'Domain'
  )

#create a hash for each column to anonymize.  Keys = column name, values = anonymized value for each unique value in that column

$uniqueValuesHashes = foreach($ac in $anonymizeColumns)
{
    $anonymizerCount = 0
    $uniqueValuesHash = $dataToAnonymize | 
        Select-Object $ac -Unique | 
            ForEach-Object {
                    $null = $anonymizerCount ++ 
                    @{$_.$ac = "$ac$($anonymizerCount.ToString())"  }
            }
    #output a hash that contains all hashed values and add to array
    @{ $ac = $uniqueValuesHash }
}


#loop through each record from the data file. Use the hash for each column to anonymize and replace the columns original value with an anonymized value.

foreach( $data in $dataToAnonymize  )
{
    foreach($uvh in $uniqueValuesHashes)
    {
        $propertyName = $uvh.Keys
        if($data.$propertyName){
            $data.($propertyName) = $uvh.$propertyName.($data.($propertyName))
        }
    }
    
}

#export ananonymized data to CSV
$dataToAnonymize | Export-Csv $outputDataFileAnonymized -NoTypeInformation