GroupDocsGists
10/24/2017 - 1:18 PM

Examples-CSharp-GroupDocs.Text.Examples.CSharp-Utilities-WordStatistic-WordStatistic.cs

// For complete examples and data files, please go to https://github.com/groupdocs-text/GroupDocs.Text-for-.NET
ExtractorFactory factory = new ExtractorFactory();
Dictionary<string, int> statistic = new Dictionary<string, int>();

TextExtractor extractor = factory.CreateTextExtractor(fileName);
if (extractor == null)
{
    Console.WriteLine("The document's format is not supported");
    return;
}

try
{
    string line = null;
    do
    {
        line = extractor.ExtractLine();
        if (line != null)
        {
            string[] words = line.Split(' ', ',', ';', '.');
            foreach (string w in words)
            {
                string word = w.Trim().ToLower();
                if (word.Length > maxWordLength)
                {
                    if (!statistic.ContainsKey(word))
                    {
                        statistic[word] = 0;
                    }

                    statistic[word]++;
                }
            }
        }
    }
    while (line != null);
}
finally
{
    extractor.Dispose();
}

Console.WriteLine("Top words:");

for (int i = 0; i < 10; i++)
{
    int count = -1;
    string maxKey = null;
    foreach (string key in statistic.Keys)
    {
        if (statistic[key] > count)
        {
            count = statistic[key];
            maxKey = key;
        }
    }

    if (maxKey == null)
    {
        break;
    }

    Console.WriteLine("{0}: {1}", maxKey, count);
    statistic.Remove(maxKey);
}