Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
95 lines (77 sloc) 3.79 KB
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace RandomnessTest
{
public class RepeatingPatternDiscovery
{
public static Dictionary<string, List<int>> DiscoverPatterns(string raw, int patternSizeMin = 2)
{
if (patternSizeMin < 1)
throw new Exception("Minimum pattern size should not be less than 1.");
Dictionary<string, HashSet<int>> initialPatternSearch = new Dictionary<string, HashSet<int>>();
Dictionary<string, HashSet<int>> discoveredPatterns = new Dictionary<string, HashSet<int>>();
int numberOfInitialSearches = raw.Length - (patternSizeMin - 1);
for (int i = 0; i < numberOfInitialSearches; i++)
{
string pattern = raw.Substring(i, patternSizeMin);
HashSet<int> patternIndexes;
if (initialPatternSearch.TryGetValue(pattern, out patternIndexes))
{
discoveredPatterns[pattern] = patternIndexes;
}
else
{
patternIndexes = new HashSet<int>();
initialPatternSearch.Add(pattern, patternIndexes);
}
patternIndexes.Add(i);
}
initialPatternSearch.Clear();
Dictionary<string, HashSet<int>> basePatterns = discoveredPatterns;
do
{
Dictionary<string, HashSet<int>> superPatterns = discoverSuperPatterns(raw, ++patternSizeMin, basePatterns);
foreach (KeyValuePair<string, HashSet<int>> superPattern in superPatterns)
{
string basePattern = superPattern.Key.Substring(0, patternSizeMin - 1);
discoveredPatterns.Remove(basePattern);
string baseInnerPattern = superPattern.Key.Substring(1, patternSizeMin - 1);
discoveredPatterns.Remove(baseInnerPattern);
discoveredPatterns.Add(superPattern.Key, superPattern.Value);
}
basePatterns = superPatterns;
} while (basePatterns.Count != 0);
return discoveredPatterns.OrderByDescending(pattern => pattern.Key.Length).ToDictionary(pattern => pattern.Key, x => x.Value.ToList());
}
private static Dictionary<string, HashSet<int>> discoverSuperPatterns(string raw, int patternSize, Dictionary<string, HashSet<int>> basePatterns)
{
Dictionary<string, HashSet<int>> initialSuperPatternSearch = new Dictionary<string, HashSet<int>>();
Dictionary<string, HashSet<int>> discoveredSuperPatterns = new Dictionary<string, HashSet<int>>();
foreach (KeyValuePair<string, HashSet<int>> basePattern in basePatterns)
{
foreach (int patternIndex in basePattern.Value)
{
if (raw.Length >= (patternIndex + patternSize))
{
string superPattern = raw.Substring(patternIndex, patternSize);
HashSet<int> superPatternIndexes;
if (initialSuperPatternSearch.TryGetValue(superPattern, out superPatternIndexes))
{
discoveredSuperPatterns[superPattern] = superPatternIndexes;
}
else
{
superPatternIndexes = new HashSet<int>();
initialSuperPatternSearch.Add(superPattern, superPatternIndexes);
}
superPatternIndexes.Add(patternIndex);
}
}
}
return discoveredSuperPatterns;
}
}
}