Skip to content
Browse files

-refactor: corrected a heavy design glitch where LexerHelper was used…

… heavily. This made it really hard/counter-intuitive to exchange the currently used Lexer

-bugfix: fixed a bug where HashingGSTAlgorithm wouldn't work with MutexTokenImpl objects
  • Loading branch information...
1 parent ac5dca6 commit e47c623b97e49cbe0c6a99a3737276a6310e62e8 @yas4891 committed Oct 14, 2012
View
1 CTokenizer/CTokenizer.csproj
@@ -56,6 +56,7 @@
<ItemGroup>
<Compile Include="CLexer.cs" />
<Compile Include="LexerHelper.cs" />
+ <Compile Include="MutexTokenFactory.cs" />
<Compile Include="MutexTokenImpl.cs" />
<Compile Include="test\LexerHelperTest.cs" />
<Compile Include="MutexCLexer.cs" />
View
90 CTokenizer/LexerHelper.cs
@@ -9,31 +9,17 @@
namespace CTokenizer
{
/// <summary>
- ///
+ /// provides a bunch of useful (extension) methods for use with the Lexer class
/// </summary>
public static class LexerHelper
{
- private static Type usedLexer = typeof(CLexer);
+ /*
+ private static readonly Type usedLexer = typeof(MutexCLexer);
- public static Type UsedLexer
- {
- get { return usedLexer; }
- set
- {
- if(null == value)
- throw new ArgumentNullException("value can not be NULL");
-
- if(!(typeof(Lexer).IsAssignableFrom(value)))
- throw new ArgumentException("value must be a sub-class of Antlr.Runtime.Lexer");
-
-
- usedLexer = value;
- }
- }
internal static ConstructorInfo UsedConstructor
{
- get { return UsedLexer.GetConstructor(new[] {typeof (ICharStream)}); }
+ get { return usedLexer.GetConstructor(new[] {typeof (ICharStream)}); }
}
/// <summary>
@@ -64,7 +50,7 @@ public static string GetTokenNameFromCLexer(this int tokenType)
internal static string GetTokenName(this int tokenType)
{
- return tokenType.GetTokenName(UsedLexer);
+ return tokenType.GetTokenName(usedLexer);
}
internal static string GetTokenName(this int tokenType, Type lexerType)
@@ -194,5 +180,71 @@ public static IEnumerable<string> ToStringEnumerable(this IEnumerable<TokenWrapp
{
return enumTokens.Select(token => token.Type.Type.GetTokenName());
}
+ /* */
+
+
+ /// <summary>
+ /// returns enumerable containing all tokens in the lexer except EOF.
+ /// automatically resets the lexer
+ /// </summary>
+ /// <param name="lexer"></param>
+ /// <returns></returns>
+ public static IEnumerable<IToken> GetTokens(this Lexer lexer)
+ {
+ var list = new List<IToken>();
+ IToken myToken;
+ const int eof = -1;
+
+ while (eof != (myToken = lexer.NextToken()).Type)
+ {
+ if (BaseRecognizer.Hidden == myToken.Channel)
+ continue;
+
+ list.Add(myToken);
+ }
+
+ lexer.Reset();
+ return list;
+ }
+
+
+ /*
+ public static IEnumerable<string> GetDebugTokenStrings(this Lexer lexer, TokenFactory factory)
+ {
+ var lexerType = lexer.GetType();
+
+ return lexer.GetTokens().Select(
+ token => string.Format("[{0:000}:{1:00}] {2}\t\t- {3}",
+ token.Line,
+ token.CharPositionInLine,
+ factory.GetTokenName(token.Type, lexerType),
+ token.Text))
+ .ToList();
+ }
+ /* */
+
+ /// <summary>
+ /// returns a bunch of debug information for the tokens
+ /// </summary>
+ /// <param name="tokens"></param>
+ /// <returns></returns>
+ public static IEnumerable<string> GetDebugTokenStrings(this IEnumerable<TokenWrapper> tokens)
+ {
+ return tokens.Select(token => string.Format("[{0:000}:{1:00}] {2}\t\t- {3}",
+ token.Token.Line,
+ token.Token.CharPositionInLine,
+ token.TokenName,
+ token.Text));
+ }
+
+ /// <summary>
+ /// turns enumerable of TokenWrapper objects into the corresponding token names enumerable
+ /// </summary>
+ /// <param name="enumTokens"></param>
+ /// <returns></returns>
+ public static IEnumerable<string> ToStringEnumerable(this IEnumerable<TokenWrapper> enumTokens)
+ {
+ return enumTokens.Select(token => token.TokenName);
+ }
}
}
View
35 CTokenizer/MutexTokenFactory.cs
@@ -0,0 +1,35 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Antlr.Runtime;
+using Tokenizer;
+
+namespace CTokenizer
+{
+ /// <summary>
+ /// factory for the MutexCLexer
+ /// </summary>
+ public class MutexTokenFactory : TokenFactory
+ {
+ private static readonly Type mutexLexerType = typeof (MutexCLexer);
+
+ protected override Type LexerType
+ {
+ get { return mutexLexerType; }
+ }
+
+ public override IToken GetToken(string tokenName)
+ {
+ return new MutexTokenImpl(tokenName);
+ }
+
+ public override List<TokenWrapper> GetTokenWrapperListFromSource(string source)
+ {
+ var lexer = new MutexCLexer(new ANTLRStringStream(source));
+
+ var gst = lexer.GetTokens().Select(token => new TokenWrapper(token, GetTokenName(token.Type, mutexLexerType)));
+ return gst.ToList();
+ }
+ }
+}
View
20 CTokenizer/MutexTokenImpl.cs
@@ -13,6 +13,9 @@ namespace CTokenizer
public class MutexTokenImpl : IToken
{
private static readonly ILog cLogger = LogManager.GetLogger(typeof(MutexTokenImpl).Name);
+
+ public string TokenName { get; private set; }
+
public int Channel { get; set; }
public int CharPositionInLine { get; set; }
@@ -35,15 +38,28 @@ public MutexTokenImpl(string tokenName)
{
try
{
- Type = (int)LexerHelper.UsedLexer.GetField(tokenName.ToUpper()).GetValue(null);
+ TokenName = tokenName.ToUpperInvariant();
+
+ Type = (int)typeof(MutexCLexer).GetField(TokenName).GetValue(null);
}
catch (Exception ex)
{
- var msg = string.Format("could not find token '{0}'", tokenName);
+ var msg = string.Format("could not find token '{0}'", TokenName);
cLogger.Warn(msg);
cLogger.Debug(msg, ex);
+ //Console.WriteLine("tokenName:" + tokenName);
throw;
}
}
+
+ public override string ToString()
+ {
+ return TokenName;
+ }
+
+ public override int GetHashCode()
+ {
+ return Type.GetHashCode();
+ }
}
}
View
6 CTokenizer/test/LexerHelperTest.cs
@@ -18,12 +18,16 @@ public void SetUp()
[Test]
public void TokenStringOnDefault()
{
+ /*
var lexer = ToLexer(
"void main(int argc, char** argv) {\r\n" +
"printf(\"Hello World!\");\r\n" +
"}");
- Assert.AreEqual("T__92 IDENTIFIER T__30 T__79 IDENTIFIER T__37 T__66 T__32 T__32 IDENTIFIER T__31 T__95 IDENTIFIER T__30 STRING_LITERAL T__31 T__47 T__99", lexer.GetJoinedTokenString());
+ Assert.AreEqual(
+ "T__92 IDENTIFIER T__30 T__79 IDENTIFIER T__37 T__66 T__32 T__32 IDENTIFIER T__31 T__95 IDENTIFIER T__30 STRING_LITERAL T__31 T__47 T__99",
+ lexer);
+ /* */
}
View
14 GSTAppLogic/app/AppLogicImpl.cs
@@ -6,6 +6,7 @@
using DataRepository;
using GSTAppLogic.app.model;
using log4net;
+using Tokenizer;
namespace GSTAppLogic.app
{
@@ -33,8 +34,12 @@ public string MaxSimilarityStudentIdentifier
get { return null != comparisonModel ? comparisonModel.MaxSimilarityStudentID : string.Empty; }
}
+ /// <summary>
+ /// the threshold above which a source is considered to be plagiarism
+ /// </summary>
public int Threshold { get; set; }
-
+
+
/// <summary>
/// calculates the maximum similarity of the provided source against
/// all sources for the given asignment in the reference database
@@ -45,11 +50,14 @@ public string MaxSimilarityStudentIdentifier
public void Start(string student, string assignment, string source)
{
cLogger.DebugFormat("starting with threshold {0}", Threshold);
- var tokens = LexerHelper.CreateLexerFromSource(source).GetTokenWrappers().ToList();
+ TokenFactory factory = new MutexTokenFactory();
+
+ var tokens = factory.GetTokenWrapperListFromSource(source);
+ //LexerHelper.CreateLexerFromSource(source).GetTokenWrappers().ToList();
cLogger.Debug("tokenized source... loading Data Repository");
var repo = Repository.GetRepository();
- comparisonModel = new ComparisonModel(tokens, repo.LoadByAssignment(assignment));
+ comparisonModel = new ComparisonModel(tokens, repo.LoadByAssignment(assignment), factory);
comparisonModel.Calculate();
View
23 GSTAppLogic/app/model/ComparisonModel.cs
@@ -32,27 +32,36 @@ public class ComparisonModel
/// <summary>
///
/// </summary>
- public IEnumerable<TokenWrapper> Tokens { get; private set; }
+ public IEnumerable<TokenWrapper> Tokens { get; private set; }
+
+ /// <summary>
+ /// the token factory used during comparison
+ /// </summary>
+ public TokenFactory Factory { get; private set; }
/// <summary>
/// stores the tokens and the referenceData for comparison
/// </summary>
/// <param name="tokens"></param>
/// <param name="referenceData"></param>
- public ComparisonModel(IEnumerable<TokenWrapper> tokens, IEnumerable<SourceEntityData> referenceData)
+ public ComparisonModel(IEnumerable<TokenWrapper> tokens, IEnumerable<SourceEntityData> referenceData, TokenFactory factory)
{
Tokens = tokens;
ReferenceData = referenceData;
+ Factory = factory;
}
/// <summary>
/// creates the token list from the file at pathToFile
/// </summary>
/// <param name="pathToFile"></param>
/// <param name="referenceData"></param>
- public ComparisonModel(string pathToFile, IEnumerable<SourceEntityData> referenceData ) :
- this(LexerHelper.CreateLexer(pathToFile).GetTokenWrappers(), referenceData)
+ public ComparisonModel(string pathToFile, IEnumerable<SourceEntityData> referenceData )
{
+ ReferenceData = referenceData;
+ var factory = new MutexTokenFactory();
+ Tokens = factory.GetTokenWrapperListFromSource(pathToFile);
+
}
/// <summary>
@@ -69,9 +78,9 @@ public int Calculate()
{
// create the list here, because this way it is local to this run
// ==> more functional and separated
- var gstTokenList = Tokens.ToGSTTokenList();
- var referenceTokens = data.Tokens.ToGSTTokenList();
- var algorithm = new HashingGSTAlgorithm<GSTToken<TokenWrapper>>(gstTokenList, referenceTokens)
+ var sourceTokens = Tokens.ToGSTTokenList();
+ var referenceTokens = Factory.GetTokenWrapperEnumerable(data.Tokens).ToGSTTokenList();
+ var algorithm = new HashingGSTAlgorithm<GSTToken<TokenWrapper>>(sourceTokens, referenceTokens)
{
MinimumMatchLength = DEFAULT_MML
};
View
16 GSTAppLogic/ext/AppHelper.cs
@@ -12,24 +12,30 @@ namespace GSTAppLogic.ext
public static class AppHelper
{
/// <summary>
- /// Compares the two files and returns the Similarity
+ /// Compares the two files and returns the Similarity.
+ /// Lexer used: MutexCLexer
+ /// Algorithm used: HashingGSTAlgorithm (MML = 8)
/// </summary>
/// <param name="path1"></param>
/// <param name="path2"></param>
/// <returns></returns>
public static Int32 CompareFiles(string path1, string path2)
{
- var tokens1 = LexerHelper.CreateLexer(path1).GetTokenWrappers();
- var tokens2 = LexerHelper.CreateLexer(path2).GetTokenWrappers();
+ var factory = new MutexTokenFactory();
+
+ var tokens1 = factory.GetTokenWrapperListFromFile(path1);
+ var tokens2 = factory.GetTokenWrapperListFromFile(path2);
var algo = new HashingGSTAlgorithm<GSTToken<TokenWrapper>>(
tokens1.ToGSTTokenList<TokenWrapper>(),
- tokens2.ToGSTTokenList<TokenWrapper>());
- algo.MinimumMatchLength = 8;
+ tokens2.ToGSTTokenList<TokenWrapper>()) {MinimumMatchLength = 8};
algo.RunToCompletion();
return algo.Similarity;
}
+
+
+
}
}
View
4 GSTAppLogic/ext/LexerExtensions.cs
@@ -22,9 +22,9 @@ public static GSTTokenList<GSTToken<TokenWrapper>> ToGSTTokenList(this IEnumerab
/// </summary>
/// <param name="tokens"></param>
/// <returns></returns>
- public static GSTTokenList<GSTToken<TokenWrapper>> ToGSTTokenList(this IEnumerable<string> tokens)
+ public static GSTTokenList<GSTToken<TokenWrapper>> ToGSTTokenList(this IEnumerable<string> tokens, TokenFactory factory)
{
- var enumTokens = tokens.GetTokens();
+ var enumTokens = factory.GetTokenWrapperEnumerable(tokens);
return new GSTTokenList<GSTToken<TokenWrapper>>(enumTokens.Select(token => new GSTToken<TokenWrapper>(token)));
}
View
11 GSTAppLogic/test/model/ComparisonModelTest.cs
@@ -12,18 +12,19 @@ public class ComparisonModelTest
private const string RELATIVE_PATH_TO_TEST_FILES = "../../../test/applogic/{0}";
private readonly IEnumerable<string> defaultTokenSet = new []
{
- "T__24", "T__23", "T__25",
- "T__26", "T__27", "T__28", "T__29"
+ "INCREMENT", "ADDEQUAL", "ARRAY_ACCESS",
+ "ASSIGN", "CASE", "DECREMENT", "GOTO", "INCREMENT", "ASSIGN"
};
[Test]
public void Default()
{
- var model = new ComparisonModel(defaultTokenSet.GetTokens(),
+ var factory = new MutexTokenFactory();
+ var model = new ComparisonModel(factory.GetTokenWrapperEnumerable(defaultTokenSet),
new []{
new SourceEntityData("stud1", "assignment1", defaultTokenSet, "bla" ),
- new SourceEntityData("stud2", "assignment1", new[] {"T__27", "T__28"}, "bla2")
- });
+ new SourceEntityData("stud2", "assignment1", new[] {"CASE", "DECREMENT"}, "bla2")
+ }, factory);
model.Calculate();
View
25 GSTConsole/Program.cs
@@ -3,7 +3,9 @@
using System.Diagnostics;
using System.IO;
using CTokenizer;
+using DataRepository;
using GSTAppLogic.app;
+using GSTAppLogic.app.model;
using GSTAppLogic.ext;
using GSTLibrary.tile;
using GSTLibrary.token;
@@ -29,6 +31,21 @@ static void Main(string[] args)
cLogger.DebugFormat("64-bit process: {0}", Environment.Is64BitProcess);
Console.WriteLine();
+ IEnumerable<string> defaultTokenSet = new[]
+ {
+ "INCREMENT", "ADDEQUAL", "ARRAY_ACCESS",
+ "ASSIGN", "CASE", "DECREMENT", "GOTO", "INCREMENT", "ASSIGN"
+ };
+ var factory = new MutexTokenFactory();
+ var model = new ComparisonModel(factory.GetTokenWrapperEnumerable(defaultTokenSet),
+ new[]{
+ new SourceEntityData("stud1", "assignment1", defaultTokenSet, "bla" ),
+ new SourceEntityData("stud2", "assignment1", new[] {"CASE", "DECREMENT"}, "bla2")
+ }, factory);
+
+ model.Calculate();
+
+ /*
string student;
string assignment;
string path;
@@ -78,6 +95,8 @@ static void Main(string[] args)
if (Environment.UserInteractive)
Console.ReadLine();
#endif
+
+ /* */
}
catch (Exception ex)
{
@@ -143,12 +162,12 @@ private static void PrintUsageInformation()
private static GSTTokenList<GSTToken<TokenWrapper>> GetTokens(FileInfo file)
{
- string source = File.ReadAllText(file.FullName);
- var tokens = LexerHelper.CreateLexerFromSource(source).GetTokenWrappers().ToList();
+ var factory = new MutexTokenFactory();
+ var tokens = factory.GetTokenWrapperListFromFile(file.FullName);
return tokens.ToGSTTokenList();
}
-
+
private static GSTTokenList<GSTToken<TokenWrapper>> GetTokens(string file)
{
return GetTokens(new FileInfo(file));
View
10 GSTEvaluation/Program.cs
@@ -16,6 +16,7 @@
using System.Threading;
using GSTLibrary.test.tile;
using System.Globalization;
+using ComparisonModel = GSTAppLogic.app.model.ComparisonModel;
namespace GSTEvaluation
{
@@ -181,7 +182,6 @@ private static void EvaluateSpeed()
/// </summary>
private static void EvaluateCompleteSetOfSources()
{
- LexerHelper.UsedLexer = typeof(MutexCLexer);
new CompleteComparisonReport("01_01").Run();
Console.WriteLine("Finished complete set of sources");
Console.ReadLine();
@@ -194,14 +194,14 @@ private static void EvaluateStandardSet()
{
try
{
- LexerHelper.UsedLexer = typeof(MutexCLexer);
+ var factory = new MutexTokenFactory();
var watch = Stopwatch.StartNew();
var evalModel = new EvaluationRunModel(TEST_SUITE_DIRECTORY);
cLogger.DebugFormat("evaluation run finished in {0} ms", watch.ElapsedMilliseconds);
new ListResultsExport().Run(evalModel);
- File.WriteAllLines(@"test\tokens\tok1.txt", LexerHelper.CreateLexer(@"test\tokens\main-01.c").GetDebugTokenStrings());
- File.WriteAllLines(@"test\tokens\tok2.txt", LexerHelper.CreateLexer(@"test\tokens\main-03.c").GetDebugTokenStrings());
+ File.WriteAllLines(@"test\tokens\tok1.txt", factory.GetTokenWrapperListFromFile(@"test\tokens\main-01.c").GetDebugTokenStrings());
+ File.WriteAllLines(@"test\tokens\tok2.txt", factory.GetTokenWrapperListFromFile(@"test\tokens\main-03.c").GetDebugTokenStrings());
}
catch (Exception ex)
{
@@ -230,6 +230,8 @@ public static void Main(string[] args)
{
XmlConfigurator.Configure(new FileInfo("log4net.xml"));
+
+
//EvaluateSpeed();
//EvaluateCompleteSetOfSources()
View
112 GSTEvaluation/bin/Debug/test/tokens/tok1.txt
@@ -0,0 +1,112 @@
+[004:00] VOID - void
+[004:06] IDENTIFIER - stdcall
+[004:14] IDENTIFIER - Sleep
+[004:20] INTEGER_DATATYPE - long
+[004:25] IDENTIFIER - ms
+[007:00] VOID - void
+[007:05] IDENTIFIER - Ticker
+[007:12] POINTER_DATATYPE - char *
+[007:18] IDENTIFIER - s
+[007:20] INTEGER_DATATYPE - int
+[007:25] IDENTIFIER - len
+[009:00] VOID - void
+[009:05] IDENTIFIER - main
+[009:11] VOID - void
+[009:17] DECLARATION_ASSIGNMENT -
+ int i = 1
+[010:11] DECLARATION_ASSIGNMENT -
+ int j = 0
+[011:11] INTEGER_DATATYPE -
+ char
+[012:06] ARRAY_ACCESS - name[80]
+[012:15] ASSIGN_OPS - =
+[012:17] STRING_LITERAL - ""
+[012:20] INTEGER_DATATYPE -
+ char
+[013:06] ARRAY_ACCESS - s1[4]
+[013:12] ASSIGN_OPS - =
+[013:14] STRING_LITERAL - "Max"
+[013:20] INTEGER_DATATYPE -
+ char
+[014:06] ARRAY_ACCESS - s2[1]
+[014:12] ASSIGN_OPS - =
+[014:14] STRING_LITERAL - ""
+[016:01] WHILE_LOOP - while(i = 1)
+[018:01] IDENTIFIER - printf
+[018:08] STRING_LITERAL - "Bitte einen Namen eingeben: \n"
+[019:01] IDENTIFIER - fflush
+[019:08] IDENTIFIER - stdin
+[020:01] IDENTIFIER - scanf
+[020:07] STRING_LITERAL - "%79[^\n]s"
+[020:20] IDENTIFIER - name
+[021:01] IF - if
+[021:05] IDENTIFIER - strlen
+[021:12] IDENTIFIER - name
+[021:17] COMPARISONOPERATOR - ==
+[021:20] INTEGER_LITERAL - 0
+[022:02] IDENTIFIER - printf
+[022:09] STRING_LITERAL - "Fehler\n\n"
+[023:01] ELSE - else
+[025:02] FOR_LOOP - for (j=strlen(name)
+[025:22] COMPARISON - j<=78
+[025:28] INCREMENT - j++
+[027:03] IDENTIFIER - strcat
+[027:10] IDENTIFIER - name
+[027:15] STRING_LITERAL - " "
+[035:02] FOR_LOOP - for(j=0; j <= 100; j++)
+[037:03] IDENTIFIER - Ticker
+[037:10] IDENTIFIER - name
+[037:16] IDENTIFIER - strlen
+[037:23] IDENTIFIER - name
+[039:03] IDENTIFIER - printf
+[039:10] STRING_LITERAL - "%4s\r"
+[039:18] IDENTIFIER - name
+[040:03] IDENTIFIER - Sleep
+[040:09] INTEGER_LITERAL - 50
+[056:00] VOID - void
+[056:05] IDENTIFIER - Ticker
+[056:12] POINTER_DATATYPE - char *
+[056:18] IDENTIFIER - s
+[056:20] INTEGER_DATATYPE - int
+[056:25] IDENTIFIER - len
+[056:30] DECLARATION_ASSIGNMENT -
+
+ int j = 0
+[058:11] DECLARATION_ASSIGNMENT -
+ int counter = 0
+[060:01] IF - if
+[060:05] IDENTIFIER - len
+[060:09] GREATERTHAN - >
+[060:11] INTEGER_LITERAL - 1
+[061:02] DECLARATION_ASSIGNMENT -
+
+
+
+ int i = 0
+[065:13] INTEGER_DATATYPE -
+ char
+[066:08] IDENTIFIER - t
+[068:03] FOR_LOOP - for(i;i<=77;i++)
+[070:04] IDENTIFIER - t
+[070:06] ASSIGN_OPS - =
+[070:08] ARRAY_ACCESS - s[i]
+[071:04] ARRAY_ACCESS - s[i]
+[071:09] ASSIGN_OPS - =
+[071:11] IDENTIFIER - s
+[071:12] LSQUAREBRACKET - [
+[071:13] IDENTIFIER - i
+[071:14] PLUS - +
+[071:15] INTEGER_LITERAL - 1
+[071:16] RSQUAREBRACKET - ]
+[072:04] IDENTIFIER - s
+[072:05] LSQUAREBRACKET - [
+[072:06] IDENTIFIER - i
+[072:07] PLUS - +
+[072:08] INTEGER_LITERAL - 1
+[072:09] RSQUAREBRACKET - ]
+[072:11] ASSIGN_OPS - =
+[072:13] IDENTIFIER - t
+[077:01] ELSE - else
+[077:06] RETURN - return
+[121:01] DIVIDE - /
+[121:02] DIVIDE - /
View
91 GSTEvaluation/bin/Debug/test/tokens/tok2.txt
@@ -0,0 +1,91 @@
+[005:00] VOID - void
+[005:05] IDENTIFIER - Ticker
+[005:12] POINTER_DATATYPE - char *
+[005:18] IDENTIFIER - s
+[005:20] INTEGER_DATATYPE - int
+[005:25] IDENTIFIER - len
+[006:01] INTEGER_DATATYPE -
+ int
+[007:09] IDENTIFIER - i
+[007:11] INTEGER_DATATYPE -
+ char
+[008:10] IDENTIFIER - tausch
+[010:05] IF - if
+[010:09] ARRAY_ACCESS - s[0]
+[010:14] NOTEQUAL - !=
+[010:18] BACKSLASH - \
+[010:19] INTEGER_LITERAL - 0
+[012:09] FOR_LOOP - for(i = 0; s[i+1] != '\0' ; i++)
+[014:13] IDENTIFIER - tausch
+[014:20] ASSIGN_OPS - =
+[014:22] ARRAY_ACCESS - s[i]
+[015:13] ARRAY_ACCESS - s[i]
+[015:18] ASSIGN_OPS - =
+[015:20] IDENTIFIER - s
+[015:21] LSQUAREBRACKET - [
+[015:22] IDENTIFIER - i
+[015:23] PLUS - +
+[015:24] INTEGER_LITERAL - 1
+[015:25] RSQUAREBRACKET - ]
+[016:13] IDENTIFIER - s
+[016:14] LSQUAREBRACKET - [
+[016:15] IDENTIFIER - i
+[016:16] PLUS - +
+[016:17] INTEGER_LITERAL - 1
+[016:18] RSQUAREBRACKET - ]
+[016:19] ASSIGN_OPS - =
+[016:20] IDENTIFIER - tausch
+[018:09] IDENTIFIER - printf
+[018:16] STRING_LITERAL - "%79s\r"
+[018:26] IDENTIFIER - s
+[022:00] VOID - void
+[022:07] IDENTIFIER - stdcall
+[022:15] IDENTIFIER - Sleep
+[022:21] INTEGER_DATATYPE - long
+[022:26] IDENTIFIER - ms
+[024:00] VOID - void
+[024:05] IDENTIFIER - main
+[024:10] VOID - void
+[025:01] DECLARATION_ASSIGNMENT -
+ int i = 0
+[026:15] DECLARATION_ASSIGNMENT -
+ int j = 0
+[027:15] INTEGER_DATATYPE -
+ char
+[028:10] ARRAY_ACCESS - s1[4]
+[028:16] ASSIGN_OPS - =
+[028:18] STRING_LITERAL - "Max"
+[028:24] INTEGER_DATATYPE -
+ char
+[029:10] ARRAY_ACCESS - s2[1]
+[029:16] ASSIGN_OPS - =
+[029:18] STRING_LITERAL - ""
+[029:21] INTEGER_DATATYPE -
+ char
+[030:10] ARRAY_ACCESS - name[80]
+[030:19] ASSIGN_OPS - =
+[030:21] STRING_LITERAL - ""
+[035:05] IDENTIFIER - printf
+[035:12] STRING_LITERAL - "Bitte Namen eingeben: "
+[036:05] IDENTIFIER - fflush
+[036:12] IDENTIFIER - stdin
+[037:05] IDENTIFIER - scanf
+[037:11] IDENTIFIER - s
+[037:13] STRING_LITERAL - "%79[^\n]s"
+[037:26] IDENTIFIER - name
+[037:32] SIZEOF - sizeof
+[037:39] IDENTIFIER - name
+[039:05] FOR_LOOP - for(j=strlen(name)
+[039:24] COMPARISON - j<=78
+[039:30] INCREMENT - j++
+[041:09] IDENTIFIER - strcat
+[041:16] IDENTIFIER - name
+[041:21] STRING_LITERAL - " "
+[044:05] FOR_LOOP - for(i=0;i<=30;i++)
+[047:09] IDENTIFIER - Ticker
+[047:16] IDENTIFIER - name
+[047:22] INTEGER_DATATYPE - int
+[047:27] IDENTIFIER - strlen
+[047:34] IDENTIFIER - name
+[048:09] IDENTIFIER - Sleep
+[048:15] INTEGER_LITERAL - 60
View
4 GSTEvaluation/export/CompleteComparisonReport.cs
@@ -146,8 +146,8 @@ private static string[] Calculate(string testName, int[] elem)
private static GSTTokenList<GSTToken<TokenWrapper>> GetTokens(FileInfo file)
{
- string source = File.ReadAllText(file.FullName);
- var tokens = LexerHelper.CreateLexerFromSource(source).GetTokenWrappers().ToList();
+ var factory = new MutexTokenFactory();
+ var tokens = factory.GetTokenWrapperListFromFile(file.FullName);
return tokens.ToGSTTokenList();
}
View
10 GSTEvaluation/model/ComparisonModel.cs
@@ -50,8 +50,10 @@ public ComparisonModel(string name, Int64 evalRunID, string sourcePath1, string
var tmplFile = Directory.GetFiles(directory, "template.c").FirstOrDefault();
var path1 = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath1, tmplFile) : sourcePath1;
var path2 = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath2, tmplFile) : sourcePath2;
- var tokens1 = LexerHelper.CreateLexer(path1).GetTokenWrappers();
- var tokens2 = LexerHelper.CreateLexer(path2).GetTokenWrappers();
+ var factory = new MutexTokenFactory();
+ var tokens1 = factory.GetTokenWrapperListFromFile(path1);
+ var tokens2 = factory.GetTokenWrapperListFromFile(path2);
+
cLogger.DebugFormat("TokenStream Length: {0} -- {1}", tokens1.Count(), tokens2.Count());
var algo = new GSTAlgorithm<GSTToken<TokenWrapper>>(
@@ -61,8 +63,8 @@ public ComparisonModel(string name, Int64 evalRunID, string sourcePath1, string
algo.RunToCompletion();
Result = algo.Similarity;
- Source1 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath1), tokens1.GetJoinedTokenString());
- Source2 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath2), tokens2.GetJoinedTokenString());
+ Source1 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath1), factory.GetJoinedTokenString(tokens1));
+ Source2 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath2), factory.GetJoinedTokenString(tokens2));
SQLFacade.Instance.CreateComparison(name, Result, watch.ElapsedMilliseconds, evalRunID, Source1.ID, Source2.ID);
}
}
View
29 GSTLibrary/tile/HashingGSTAlgorithm.cs
@@ -89,7 +89,15 @@ internal HashingEntity(GSTTokenList<T> list, int startIndex, int mml)
arr[i] = l[i + startIndex];
}
- hash = arr.Select((t, i) => (t.GetHashCode() * (multiplicators[i % multiplicators.Length]))).Sum();
+ var thing = arr.Select((t, i) => (t.GetHashCode()*(multiplicators[i%multiplicators.Length])));
+ unchecked
+ {
+ foreach (var element in thing)
+ hash += element;
+ }
+
+
+ //hash = unchecked(thing.Sum());
Tokens = arr;
}
@@ -109,12 +117,12 @@ public override int GetHashCode()
public override string ToString()
{
var builder = new StringBuilder("HashingEntity:\"");
-
+ builder.Append(Tokens[0].GetType());
foreach(var element in Tokens)
{
builder.Append(element.ToString());
}
-
+
return builder.Append('"').ToString();
}
}
@@ -199,6 +207,9 @@ private void InitializeHashes()
{
InitializeA();
InitializeB();
+
+ // this could be a possible optimization, but so far
+ // hash calculation does not seem like a bottleneck
/*
var thread1 = new Thread(InitializeA);
var thread2 = new Thread(InitializeB);
@@ -209,6 +220,18 @@ private void InitializeHashes()
thread2.Join();
/* */
+ /*
+ foreach(var asdf in HashesA)
+ {
+ Console.WriteLine("hashA:" + asdf.Key + "|" + asdf.Value[0]);
+ }
+
+ foreach (var asdf in HashesB)
+ {
+ Console.WriteLine("hashB:" + asdf.Key + "|" + asdf.Value[0]);
+ }
+ /* */
+
MinimizeHashes();
initialized = true;
}
View
96 Tokenizer/TokenFactory.cs
@@ -0,0 +1,96 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Reflection;
+using System.Text;
+using Antlr.Runtime;
+using System.IO;
+
+namespace Tokenizer
+{
+ /// <summary>
+ /// factory creates all tokens according to the passed in lexer
+ /// </summary>
+ public abstract class TokenFactory
+ {
+ protected abstract Type LexerType { get; }
+
+ /// <summary>
+ /// returns the IToken object for this tokenName
+ /// </summary>
+ /// <param name="tokenName"></param>
+ /// <returns></returns>
+ public abstract IToken GetToken(string tokenName);
+
+ /// <summary>
+ /// returns the TokenWrapper for this tokenName
+ /// </summary>
+ /// <param name="tokenName"></param>
+ /// <returns></returns>
+ public TokenWrapper GetTokenWrapper(string tokenName)
+ {
+ return new TokenWrapper(GetToken(tokenName), tokenName);
+ }
+
+ /// <summary>
+ /// reads the file and returns the TokenWrapper list representing this file
+ /// </summary>
+ /// <param name="path"></param>
+ /// <returns></returns>
+ public List<TokenWrapper> GetTokenWrapperListFromFile(string path)
+ {
+ return GetTokenWrapperListFromSource(File.ReadAllText(path));
+ }
+
+ /// <summary>
+ /// parses the source into a list of TokenWrapper objects
+ /// </summary>
+ /// <param name="source"></param>
+ /// <returns></returns>
+ public abstract List<TokenWrapper> GetTokenWrapperListFromSource(string source);
+
+ /// <summary>
+ /// turns the tokenNamens into TokenWrapper objects
+ /// </summary>
+ /// <param name="tokenNameEnumerable"></param>
+ /// <returns></returns>
+ public IEnumerable<TokenWrapper> GetTokenWrapperEnumerable(IEnumerable<string> tokenNameEnumerable)
+ {
+ return tokenNameEnumerable.Select(tokenString => new TokenWrapper(GetToken(tokenString), tokenString));
+ }
+
+ /// <summary>
+ /// returns the name for the passed in token
+ /// </summary>
+ /// <param name="tokenType">the token represented as an integer</param>
+ /// <param name="lexerType">the lexer that is used to find the token name</param>
+ /// <returns></returns>
+ public string GetTokenName(int tokenType, Type lexerType)
+ {
+ var fields =
+ lexerType.GetFields(BindingFlags.Public | BindingFlags.Static | BindingFlags.DeclaredOnly).Where(
+ field => field.FieldType == typeof(int)).Where(field => ((int)field.GetValue(null)) == tokenType).
+ Select(field => field.Name);
+
+
+ return fields.First();
+ }
+
+ /// <summary>
+ /// joins the name of strings with whitespaces
+ /// </summary>
+ /// <param name="lexer"></param>
+ /// <returns></returns>
+ public string GetJoinedTokenString(IEnumerable<TokenWrapper> tokens)
+ {
+ var builder = new StringBuilder();
+
+ foreach (var token in tokens)
+ {
+ builder.AppendFormat("{0} ", GetTokenName(token.Type.Type, LexerType));
+ }
+
+ return builder.ToString().TrimEnd(new[] { ' ' });
+ }
+ }
+}
View
13 Tokenizer/TokenWrapper.cs
@@ -1,5 +1,6 @@
using Antlr.Runtime;
+using System;
namespace Tokenizer
{
@@ -17,9 +18,12 @@ public class TokenWrapper
/// <summary>
/// the saved token object from ANTLR lexers
/// </summary>
- private IToken Token { get; set; }
+ public IToken Token { get; private set; }
- private string Text { get; set; }
+ /// <summary>
+ /// returns the text of the underlying token
+ /// </summary>
+ public string Text { get { return Token.Text; } }
/// <summary>
/// a human readable representation of the token type
@@ -36,7 +40,6 @@ public TokenWrapper(IToken token, string tokenName)
{
TokenName = tokenName;
Token = token;
- Text = token.Text;
Type = new TokenType {Type = token.Type};
tokenHashCode = Type.GetHashCode();
}
@@ -69,12 +72,12 @@ public override bool Equals(object obj)
public override int GetHashCode()
{
- return null == Token ? GetType().GetHashCode() : Token.GetHashCode();
+ return null == Token ? Type.GetHashCode() : Token.GetHashCode();
}
public override string ToString()
{
- return Token.Text;
+ return Token.ToString();
}
}
}
View
1 Tokenizer/Tokenizer.csproj
@@ -47,6 +47,7 @@
</ItemGroup>
<ItemGroup>
<Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TokenFactory.cs" />
<Compile Include="TokenType.cs" />
<Compile Include="TokenWrapper.cs" />
</ItemGroup>

0 comments on commit e47c623

Please sign in to comment.
Something went wrong with that request. Please try again.