diff --git a/src/main/kotlin/app/Main.kt b/src/main/kotlin/app/Main.kt index a1ff15b4..31bdd971 100644 --- a/src/main/kotlin/app/Main.kt +++ b/src/main/kotlin/app/Main.kt @@ -79,6 +79,8 @@ class Main(argv: Array<String>) { } Logger.info(Logger.Events.EXIT) { "App finished" } + + Measurements.showAllMeasurements() } private fun startUi() { diff --git a/src/main/kotlin/app/Measurements.kt b/src/main/kotlin/app/Measurements.kt new file mode 100644 index 00000000..d87bd4b1 --- /dev/null +++ b/src/main/kotlin/app/Measurements.kt @@ -0,0 +1,20 @@ +package app + +object Measurements { + private val measureMap: HashMap<String, Long> = hashMapOf() + + fun addMeasurement(key: String, value: Long) { + if (!measureMap.containsKey(key)) { + measureMap[key] = 0 + } + measureMap[key] = measureMap[key]!! + value + } + + fun showAllMeasurements() { + Logger.info { "Measurements:" } + Logger.info { measureMap.keys.joinToString(" ") } + measureMap.toList().sortedBy { it.second }.forEach { + Logger.info { "${it.first} -> ${it.second}" } + } + } +} diff --git a/src/main/kotlin/app/RegexMeasured.kt b/src/main/kotlin/app/RegexMeasured.kt new file mode 100644 index 00000000..5e85e1a2 --- /dev/null +++ b/src/main/kotlin/app/RegexMeasured.kt @@ -0,0 +1,75 @@ +package app + +import kotlin.system.measureNanoTime + +operator fun CharSequence.contains(regex: RegexMeasured): Boolean = regex.containsMatchIn(this) +fun CharSequence.split(regex: RegexMeasured, limit: Int = 0): List<String> = regex.split(this, limit) + +class RegexMeasured (val tag: String, val regex: String, + val options: Set<RegexOption>) { + companion object { + const val T_INIT = "+init" + const val T_REPLACE = "+replace" + const val T_FIND = "+find" + const val T_CONTAINS = "+containsMatchIn" + const val T_SPLIT = "+split" + } + + constructor(tag: String, regex: String, option: RegexOption) : + this(tag, regex, setOf(option)) + constructor(tag: String, regex: String) : this(tag, regex, setOf()) + + var regexObj: Regex? = null + + init { + val time = measureNanoTime { + regexObj = Regex(regex, options) + } + Measurements.addMeasurement(tag + T_INIT, time) + } + + fun replace(input: CharSequence, replacement: String): String { + var res:String? = null + val time = measureNanoTime { + res = regexObj!!.replace(input, replacement) + } + Measurements.addMeasurement(tag + T_REPLACE, time) + return res!! + } + + fun find(input: CharSequence, startIndex: Int = 0): MatchResult? { + var res: MatchResult? = null + val time = measureNanoTime { + res = regexObj!!.find(input, startIndex) + } + Measurements.addMeasurement(tag + T_FIND, time) + return res!! + } + + fun findAll(input: CharSequence, startIndex: Int = 0): Sequence<MatchResult> { + var res: Sequence<MatchResult>? = null + val time = measureNanoTime { + res = regexObj!!.findAll(input, startIndex) + } + Measurements.addMeasurement(tag + T_FIND, time) + return res!! + } + + fun split(input: CharSequence, limit: Int = 0): List<String> { + var res: List<String>? = null + val time = measureNanoTime { + res = regexObj!!.split(input, limit) + } + Measurements.addMeasurement(tag + T_SPLIT, time) + return res!! + } + + fun containsMatchIn(input: CharSequence): Boolean { + var res: Boolean? = null + val time = measureNanoTime { + res = regexObj!!.containsMatchIn(input) + } + Measurements.addMeasurement(tag + T_CONTAINS, time) + return res!! + } +} diff --git a/src/main/kotlin/app/api/ServerApi.kt b/src/main/kotlin/app/api/ServerApi.kt index 891a9909..3e08ece5 100644 --- a/src/main/kotlin/app/api/ServerApi.kt +++ b/src/main/kotlin/app/api/ServerApi.kt @@ -5,6 +5,7 @@ package app.api import app.BuildConfig import app.Logger +import app.Measurements import app.config.Configurator import app.model.* import com.github.kittinunf.fuel.core.FuelManager @@ -13,9 +14,11 @@ import com.github.kittinunf.fuel.core.Request import com.github.kittinunf.fuel.core.Response import com.google.protobuf.InvalidProtocolBufferException import java.security.InvalidParameterException +import kotlin.system.measureNanoTime class ServerApi (private val configurator: Configurator) : Api { companion object { + private val CLASS_TAG = "ServerApi-" private val HEADER_VERSION_CODE = "app-version-code" private val HEADER_CONTENT_TYPE = "Content-Type" private val HEADER_CONTENT_TYPE_PROTO = "application/octet-stream" @@ -128,21 +131,24 @@ class ServerApi (private val configurator: Configurator) : Api { var error: ApiError? = null var data: T? = null - try { - Logger.debug { "Request $requestName initialized" } - val (_, res, result) = request.responseString() - val (_, e) = result - if (e == null) { - Logger.debug { "Request $requestName success" } - data = parser(res.data) - } else { + val time = measureNanoTime { + try { + Logger.debug { "Request $requestName initialized" } + val (_, res, result) = request.responseString() + val (_, e) = result + if (e == null) { + Logger.debug { "Request $requestName success" } + data = parser(res.data) + } else { + error = ApiError(e) + } + } catch (e: InvalidProtocolBufferException) { + error = ApiError(e) + } catch (e: InvalidParameterException) { error = ApiError(e) } - } catch (e: InvalidProtocolBufferException) { - error = ApiError(e) - } catch (e: InvalidParameterException) { - error = ApiError(e) } + Measurements.addMeasurement(CLASS_TAG + "MakeRequest", time) return Result(data, error) } diff --git a/src/main/kotlin/app/extractors/CExtractor.kt b/src/main/kotlin/app/extractors/CExtractor.kt index c1059c03..d942708b 100644 --- a/src/main/kotlin/app/extractors/CExtractor.kt +++ b/src/main/kotlin/app/extractors/CExtractor.kt @@ -4,13 +4,23 @@ package app.extractors +import app.RegexMeasured + class CExtractor : ExtractorInterface { companion object { const val LANGUAGE_NAME = Lang.C - val importRegex = Regex("""^([^\n]*#include)\s[^\n]*""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val extractImportRegex = - Regex("""#include\s+["<](\w+)[/\w+]*\.\w+[">]""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^([^\n]*#include)\s[^\n]*""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImport", + """#include\s+["<](\w+)[/\w+]*\.\w+[">]""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/CSharpExtractor.kt b/src/main/kotlin/app/extractors/CSharpExtractor.kt index d3af3aae..5544cbd2 100644 --- a/src/main/kotlin/app/extractors/CSharpExtractor.kt +++ b/src/main/kotlin/app/extractors/CSharpExtractor.kt @@ -4,12 +4,18 @@ package app.extractors +import app.RegexMeasured + class CSharpExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "CSharpExtractor-" const val LANGUAGE_NAME = Lang.CSHARP - val importRegex = Regex("""^.*using\s+(\w+[.\w+]*)""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val extractImportRegex = Regex("""using\s+(\w+[.\w+]*)""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex","""^.*using\s+(\w+[.\w+]*)""") + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex","""^([^\n]*//)[^\n]*""") + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex","""using\s+(\w+[.\w+]*)""") } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/CppExtractor.kt b/src/main/kotlin/app/extractors/CppExtractor.kt index e4f02775..1258c96b 100644 --- a/src/main/kotlin/app/extractors/CppExtractor.kt +++ b/src/main/kotlin/app/extractors/CppExtractor.kt @@ -4,17 +4,32 @@ package app.extractors +import app.RegexMeasured +import app.contains import app.model.CommitStats import app.model.DiffFile class CppExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "CppExtractor-" const val LANGUAGE_NAME = Lang.CPP const val TEMPLATE = "template" - val importRegex = Regex("""^([^\n]*#include)\s[^\n]*""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val extractImportRegex = Regex("""#include\s+["<](\w+[/\w+]*(\.\w+)?)[">]""") - val templateRegex = Regex("""template\s*<""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^([^\n]*#include)\s[^\n]*""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex", + """#include\s+["<](\w+[/\w+]*(\.\w+)?)[">]""" + ) + val templateRegex = RegexMeasured( + CLASS_TAG + "templateRegex", + """template\s*<""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/CrystalExtractor.kt b/src/main/kotlin/app/extractors/CrystalExtractor.kt index 0fd4430a..e3bd635e 100644 --- a/src/main/kotlin/app/extractors/CrystalExtractor.kt +++ b/src/main/kotlin/app/extractors/CrystalExtractor.kt @@ -3,13 +3,28 @@ package app.extractors +import app.RegexMeasured + class CrystalExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "CrystalExtractor-" const val LANGUAGE_NAME = Lang.CRYSTAL - val importRegex = Regex("""require\s+\"(\w+)\"""") - val commentRegex = Regex("""^([^\n]*#)[^\n]*""") - val extractImportRegex = Regex("""require\s+\"(.+)\"""") - val includeRegex = Regex("""include\s+(\w+)::.+""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """require\s+\"(\w+)\"""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*#)[^\n]*""" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex", + """require\s+\"(.+)\"""" + ) + val includeRegex = RegexMeasured( + CLASS_TAG + "includeRegex", + """include\s+(\w+)::.+""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/DartExtractor.kt b/src/main/kotlin/app/extractors/DartExtractor.kt index d11adbc1..d271f873 100644 --- a/src/main/kotlin/app/extractors/DartExtractor.kt +++ b/src/main/kotlin/app/extractors/DartExtractor.kt @@ -3,7 +3,16 @@ package app.extractors +import app.RegexMeasured + object DartExtractor : ExtractorBase( language = Lang.DART, - importRegex = Regex("""^import ['"](.+)['"];$"""), - commentRegex = Regex("(//.+$)|(/[*].*?[*]/)")) + importRegex = RegexMeasured( + "DartExtractor-importRegex", + """^import ['"](.+)['"];$""" + ), + commentRegex = RegexMeasured( + "DartExtractor-commentRegex", + "(//.+$)|(/[*].*?[*]/)" + ) +) diff --git a/src/main/kotlin/app/extractors/ElixirExtractor.kt b/src/main/kotlin/app/extractors/ElixirExtractor.kt index a8f4078b..83549024 100644 --- a/src/main/kotlin/app/extractors/ElixirExtractor.kt +++ b/src/main/kotlin/app/extractors/ElixirExtractor.kt @@ -3,7 +3,16 @@ package app.extractors +import app.RegexMeasured + object ElixirExtractor : ExtractorBase( language = Lang.ELIXIR, - importRegex = Regex("""^\s+(?:use|import|require) ([a-zA-Z_][a-zA-Z0-9_]*)"""), - commentRegex = Regex("""#.*$""")) + importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^\s+(?:use|import|require) ([a-zA-Z_][a-zA-Z0-9_]*)""" + ), + commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """#.*$""" + ) +) diff --git a/src/main/kotlin/app/extractors/ExtractorBase.kt b/src/main/kotlin/app/extractors/ExtractorBase.kt index eed6cea9..e0580029 100644 --- a/src/main/kotlin/app/extractors/ExtractorBase.kt +++ b/src/main/kotlin/app/extractors/ExtractorBase.kt @@ -3,9 +3,11 @@ package app.extractors +import app.RegexMeasured + open class ExtractorBase(private val language: String, - private val importRegex: Regex, - private val commentRegex: Regex, + private val importRegex: RegexMeasured, + private val commentRegex: RegexMeasured, private val importStartsWith: Boolean = false) : ExtractorInterface { override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/ExtractorInterface.kt b/src/main/kotlin/app/extractors/ExtractorInterface.kt index 13496d4f..bf4a19e3 100644 --- a/src/main/kotlin/app/extractors/ExtractorInterface.kt +++ b/src/main/kotlin/app/extractors/ExtractorInterface.kt @@ -4,10 +4,12 @@ package app.extractors +import app.RegexMeasured import app.model.* interface ExtractorInterface { companion object { + const val CLASS_TAG = "ExtractorInterface-" const val TYPE_LANGUAGE = 1 const val TYPE_LIBRARY = 2 const val TYPE_KEYWORD = 3 @@ -15,17 +17,26 @@ interface ExtractorInterface { const val SEPARATOR = ">" private val classifierManager = ClassifierManager() - - val stringRegex = Regex("""(".+?"|'.+?')""") - val splitRegex = Regex("""\s|,|;|\*|\n|\(|\)|\[|]|\{|}|\+|=|&|\$|""" + - """!=|\.|>|<|#|@|:|\?|!""") + private val tokenizer = CachingTokenizer() + + val stringRegex = RegexMeasured( + CLASS_TAG + "stringRegex", + """(".+?"|'.+?')""" + ) + val splitRegex = RegexMeasured( + CLASS_TAG + "splitRegex", + """\s|,|;|\*|\n|\(|\)|\[|]|\{|}|\+|=|&|\$|""" + + """!=|\.|>|<|#|@|:|\?|!""" + ) } // Identify libs used in a line with classifiers. fun determineLibs(line: String, importedLibs: List<String>): List<String> { val lang = getLanguageName() if (lang != null) { - return classifierManager.estimate(tokenize(line), importedLibs) +// val tokenizedLine = tokenizer.tokenize(line, this) + val tokenizedLine = tokenize(line) + return classifierManager.estimate(tokenizedLine, importedLibs) } return listOf() } @@ -106,10 +117,15 @@ interface ExtractorInterface { fun tokenize(line: String): List<String> { // TODO(lyaronskaya): Multiline comment regex. val newLine = stringRegex.replace(line, "") - val tokens = splitRegex.split(newLine).filter { - it.isNotBlank() && !it.contains('"') && !it.contains('\'') && + + val tokens = newLine.split(' ', '[', ',', ';', '*', '\n', + ')', '(', '[', ']', '}', '{', '+', '-', '=', '&', '$', '!', + '.', '>', '<', '#', '@', ':', '?', ']') + .filter { + it.isNotBlank() && !it.contains('"') && !it.contains('\'') && it != "-" && it != "@" - } + } + return tokens } diff --git a/src/main/kotlin/app/extractors/FSharpExtractor.kt b/src/main/kotlin/app/extractors/FSharpExtractor.kt index beacfc8b..4923487c 100644 --- a/src/main/kotlin/app/extractors/FSharpExtractor.kt +++ b/src/main/kotlin/app/extractors/FSharpExtractor.kt @@ -5,12 +5,24 @@ package app.extractors +import app.RegexMeasured + class FSharpExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "FSharpExtractor-" const val LANGUAGE_NAME = Lang.FSHARP - val importRegex = Regex("""^.*open\s+(\w+[.\w+]*)""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val extractImportRegex = Regex("""open\s+(\w+[.\w+]*)""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^.*open\s+(\w+[.\w+]*)""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex", + """open\s+(\w+[.\w+]*)""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/GoExtractor.kt b/src/main/kotlin/app/extractors/GoExtractor.kt index 5703d31f..5a58568e 100644 --- a/src/main/kotlin/app/extractors/GoExtractor.kt +++ b/src/main/kotlin/app/extractors/GoExtractor.kt @@ -4,15 +4,34 @@ package app.extractors +import app.RegexMeasured +import app.split + class GoExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "GoExtractor-" const val LANGUAGE_NAME = Lang.GO - val importRegex = Regex("""^(.*import)\s[^\n]*""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val singleImportRegex = Regex("""import\s+"(.+?)"""") - val multipleImportRegex = Regex("""import[\s\t\n]+\((.+?)\)""", - RegexOption.DOT_MATCHES_ALL) - val separatorsRegex = Regex("""(\t+|\n+|\s+|")""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^(.*import)\s[^\n]*""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) + val singleImportRegex = RegexMeasured( + CLASS_TAG + "singleImportRegex", + """import\s+"(.+?)"""" + ) + val multipleImportRegex = RegexMeasured( + CLASS_TAG + "multipleImportRegex", + """import[\s\t\n]+\((.+?)\)""", + RegexOption.DOT_MATCHES_ALL + ) + val separatorsRegex = RegexMeasured( + CLASS_TAG + "separatorsRegex", + """(\t+|\n+|\s+|")""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/Heuristics.kt b/src/main/kotlin/app/extractors/Heuristics.kt index 10b2eb06..0b63767f 100644 --- a/src/main/kotlin/app/extractors/Heuristics.kt +++ b/src/main/kotlin/app/extractors/Heuristics.kt @@ -3,228 +3,297 @@ package app.extractors +import app.RegexMeasured import app.model.DiffFile import app.model.CommitStats -val ActionscriptRegex = Regex( +const val CLASS_TAG = "Heuristics-" + +val ActionscriptRegex = RegexMeasured( + CLASS_TAG + "ActionscriptRegex", "^\\s*(package\\s+[a-z0-9_\\.]+|import\\s+[a-zA-Z0-9_\\.]+;|class\\s+[A-Za-z0-9_]+\\s+extends\\s+[A-Za-z0-9_]+)", RegexOption.MULTILINE ) -val CoqRegex = Regex( +val CoqRegex = RegexMeasured( + CLASS_TAG + "CoqRegex", """^Require\s""", RegexOption.MULTILINE ) -val CommonLispRegex = Regex( +val CommonLispRegex = RegexMeasured( + CLASS_TAG + "CommonLispRegex", "^\\s*\\((defun|in-package|defpackage) ", setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE) ) -val CppRegex = Regex( +val CppRegex = RegexMeasured( + CLASS_TAG + "CppRegex", "(template |class |namespace |#include <c?std[^.]+>)", RegexOption.MULTILINE ) -val DRegex = Regex( +val DRegex = RegexMeasured( + CLASS_TAG + "DRegex", "^module\\s+[\\w.]*\\s*;|import\\s+[\\w\\s,.:]*;|\\w+\\s+\\w+\\s*\\(.*\\)(?:\\(.*\\))?\\s*\\{[^}]*\\}|unittest\\s*(?:\\(.*\\))?\\s*\\{[^}]*\\}", RegexOption.MULTILINE ) -val DtraceRegex = Regex( +val DtraceRegex = RegexMeasured( + CLASS_TAG + "DtraceRegex", "^(\\w+:\\w*:\\w*:\\w*|BEGIN|END|provider\\s+|(tick|profile)-\\w+\\s+\\{[^}]*\\}|#pragma\\s+D\\s+(option|attributes|depends_on)\\s|#pragma\\s+ident\\s)", RegexOption.MULTILINE ) -val FilterscriptRegex = Regex( +val FilterscriptRegex = RegexMeasured( + CLASS_TAG + "FilterscriptRegex", "#include|#pragma\\s+(rs|version)|__attribute__" ) -val FSharpRegex = Regex( +val FSharpRegex = RegexMeasured( + CLASS_TAG + "FSharpRegex", "^\\s*(#light|import|let|module|namespace|open|type)", RegexOption.MULTILINE ) -val ForthRegex = Regex( +val ForthRegex = RegexMeasured( + CLASS_TAG + "ForthRegex", "^: " ) -val ForthFsRegex = Regex( +val ForthFsRegex = RegexMeasured( + CLASS_TAG + "ForthFsRegex", "^(: |new-device)" ) -val FortranRegex = Regex( +val FortranRegex = RegexMeasured( + CLASS_TAG + "FortranRegex", "^([c*][^abd-z]| (subroutine|program|end|data)\\s|\\s*!)", RegexOption.IGNORE_CASE ) -val GlslRegex = Regex( +val GlslRegex = RegexMeasured( + CLASS_TAG + "GlslRegex", "^\\s*(#version|precision|uniform|varying|vec[234])", RegexOption.IGNORE_CASE ) -val IdlRegex = Regex( +val IdlRegex = RegexMeasured( + CLASS_TAG + "IdlRegex", "^\\s*function[ \\w,]+$", RegexOption.MULTILINE ) -val IniPropsRegex = Regex( +val IniPropsRegex = RegexMeasured( + CLASS_TAG + "IniPropsRegex", "\\w+\\s*=\\s*", RegexOption.IGNORE_CASE ) -val LexRegex = Regex( +val LexRegex = RegexMeasured( + CLASS_TAG + "LexRegex", "^(%[%{}]xs|<.*>)", RegexOption.MULTILINE ) -val LimboRegex = Regex( +val LimboRegex = RegexMeasured( + CLASS_TAG + "LimboRegex", "^\\w+\\s*:\\s*module\\s*\\{", RegexOption.MULTILINE ) -val MathematicaRegex = Regex( +val MathematicaRegex = RegexMeasured( + CLASS_TAG + "MathematicaRegex", "\\*\\)$", RegexOption.MULTILINE ) -val MatlabRegex = Regex( +val MatlabRegex = RegexMeasured( + CLASS_TAG + "MatlabRegex", """(^\s*%)|(^end$)""", RegexOption.MULTILINE ) val MRegexs = setOf( - Regex( + RegexMeasured( + CLASS_TAG + "MRegex1", "^\\s*;", RegexOption.MULTILINE ), - Regex( + RegexMeasured( + CLASS_TAG + "MRegex2", "^\\w+\\s;", RegexOption.MULTILINE ) ) -val MakefileRegex = Regex( +val MakefileRegex = RegexMeasured( + CLASS_TAG + "MakefileRegex", "([\\/\\\\].*:\\s+.*\\s\\\\$|: \\\\$|^ : |^[\\w\\s\\/\\\\.]+\\w+\\.\\w+\\s*:\\s+[\\w\\s\\/\\\\.]+\\w+\\.\\w+)" ) -val MufRegex =Regex( +val MufRegex = RegexMeasured( + CLASS_TAG + "MufRegex", "^: ", RegexOption.MULTILINE ) -val NewLispRegex = Regex( +val NewLispRegex = RegexMeasured( + CLASS_TAG + "NewLispRegex", "^\\s*\\(define ", RegexOption.MULTILINE ) -val NotSqlRegex = Regex( +val NotSqlRegex = RegexMeasured( + CLASS_TAG + "NotSqlRegex", "begin|boolean|package|exception", RegexOption.IGNORE_CASE ) -val ObjectiveCRegex = Regex( +val ObjectiveCRegex = RegexMeasured( + CLASS_TAG + "ObjectiveCRegex", "^\\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\\b|#import\\s+.+\\.h[\">])", RegexOption.MULTILINE ) -val OcamlRegex = Regex( +val OcamlRegex = RegexMeasured( + CLASS_TAG + "OcamlRegex", "(^\\s*module)|let rec |match\\s+(\\S+\\s)+with", RegexOption.MULTILINE ) -val PascalRegex = Regex( +val PascalRegex = RegexMeasured( + CLASS_TAG + "PascalRegex", "(^\\s*uses)|(function)|(program)", setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE) ) -val Perl5Regex = Regex( +val Perl5Regex = RegexMeasured( + CLASS_TAG + "Perl5Regex", "\\buse\\s+(?:strict\\b|v?5\\.)" ) -val Perl6Regex = Regex( +val Perl6Regex = RegexMeasured( + CLASS_TAG + "Perl6Regex", "^\\s*(?:use\\s+v6\\b|\\bmodule\\b|\\b(?:my\\s+)?class\\b)", RegexOption.MULTILINE ) -val PhpRegex = Regex( +val PhpRegex = RegexMeasured( + CLASS_TAG + "PhpRegex", "^<\\?(?:php)?" ) -val PicoLispRegex = Regex( +val PicoLispRegex = RegexMeasured( + CLASS_TAG + "PicoLispRegex", "^\\((de|class|rel|code|data|must)\\s", RegexOption.MULTILINE ) val PlpgsqlRegexs = setOf( - Regex( + RegexMeasured( + CLASS_TAG + "PlpgsqlRegex1", "^\\\\i\\b|AS \\$\\$|LANGUAGE '?plpgsql'?", setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE) ), - Regex( + RegexMeasured( + CLASS_TAG + "PlpgsqlRegex2", "SECURITY (DEFINER|INVOKER)", RegexOption.IGNORE_CASE ), - Regex( + RegexMeasured( + CLASS_TAG + "PlpgsqlRegex3", "BEGIN( WORK| TRANSACTION)?;", RegexOption.IGNORE_CASE ) ) val PlsqlRegexs = setOf( - Regex( + RegexMeasured( + CLASS_TAG + "PlsqlRegex1", "\\\$\\\$PLSQL_|XMLTYPE|sysdate|systimestamp|\\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)", RegexOption.IGNORE_CASE ), - Regex( + RegexMeasured( + CLASS_TAG + "PlsqlRegex2", "constructor\\W+function", RegexOption.IGNORE_CASE ) ) -val PovRaySdlRegex = Regex( +val PovRaySdlRegex = RegexMeasured( + CLASS_TAG + "PovRaySdlRegex", "^\\s*#(declare|local|macro|while)\\s", RegexOption.MULTILINE ) -val PrologRegex = Regex( +val PrologRegex = RegexMeasured( + CLASS_TAG + "PrologRegex", "^[^#]*:-", RegexOption.MULTILINE ) -val PythonRegex = Regex( +val PythonRegex = RegexMeasured( + CLASS_TAG + "PythonRegex", "(^(import|from|class|def)\\s)", RegexOption.MULTILINE ) -val RRegex = Regex( +val RRegex = RegexMeasured( + CLASS_TAG + "RRegex", "<-|^\\s*#" ) -val RebolRegex = Regex( +val RebolRegex = RegexMeasured( + CLASS_TAG + "RebolRegex", "\\bRebol\\b", RegexOption.IGNORE_CASE ) -val RoffRegex = Regex( +val RoffRegex = RegexMeasured( + CLASS_TAG + "RoffRegex", "^\\.[a-z][a-z](\\s|$)", setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE) ) -val RustRegex = Regex( +val RustRegex = RegexMeasured( + CLASS_TAG + "RustRegex", "^(use |fn |mod |pub |macro_rules|impl|#!?\\[)", RegexOption.MULTILINE ) -val RenderscriptRegex = Regex( +val RenderscriptRegex = RegexMeasured( + CLASS_TAG + "RenderscriptRegex", "#include|#pragma\\s+(rs|version)|__attribute__" ) -val ScalaRegex = Regex( +val ScalaRegex = RegexMeasured( + CLASS_TAG + "ScalaRegex", "^\\s*import (scala|java)\\./.match(data) || /^\\s*val\\s+\\w+\\s*=/.match(data) || /^\\s*class\\b", RegexOption.MULTILINE ) -val SmalltalkRegex = Regex( +val SmalltalkRegex = RegexMeasured( + CLASS_TAG + "SmalltalkRegex", "![\\w\\s]+methodsFor: " ) val SqlplRegexs = setOf( - Regex( + RegexMeasured( + CLASS_TAG + "SqlplRegex1", "(alter module)|(language sql)|(begin( NOT)+ atomic)", RegexOption.IGNORE_CASE ), - Regex( + RegexMeasured( + CLASS_TAG + "SqlplRegex2", "signal SQLSTATE '[0-9]+'", RegexOption.IGNORE_CASE ) ) -val StandardMlRegex = Regex( +val StandardMlRegex = RegexMeasured( + CLASS_TAG + "StandardMlRegex", "=> |case\\s+(\\S+\\s)+of" ) val SupercolliderRegexs = setOf( - Regex("\\^(this|super)\\."), - Regex("^\\s*(\\+|\\*)\\s*\\w+\\s*\\{", RegexOption.MULTILINE), - Regex("^\\s*~\\w+\\s*=\\.", RegexOption.MULTILINE) + RegexMeasured( + CLASS_TAG + "SupercolliderRegex1", + "\\^(this|super)\\." + ), + RegexMeasured( + CLASS_TAG + "SupercolliderRegex2", + "^\\s*(\\+|\\*)\\s*\\w+\\s*\\{", + RegexOption.MULTILINE + ), + RegexMeasured( + CLASS_TAG + "SupercolliderRegex3", + "^\\s*~\\w+\\s*=\\.", + RegexOption.MULTILINE + ) ) -val TexRegex = Regex( +val TexRegex = RegexMeasured( + CLASS_TAG + "TexRegex", "\\\\\\w+\\{" ) -val TypescriptRegex = Regex( +val TypescriptRegex = RegexMeasured( + CLASS_TAG + "TypescriptRegex", "^\\s*(import.+(from\\s+|require\\()['\"]react|\\/\\/\\/\\s*<reference\\s)", RegexOption.MULTILINE ) -val XmlPropsRegex = Regex( +val XmlPropsRegex = RegexMeasured( + CLASS_TAG + "XmlPropsRegex", "^(\\s*)(<Project|<Import|<Property|<?xml|xmlns)", setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE) ) -val XmltsRegex = Regex( +val XmltsRegex = RegexMeasured( + CLASS_TAG + "XmltsRegex", "<TS\\b" ) // Mystical \uFEFF 'ZERO WIDTH NO-BREAK SPACE' unicode character may appear // in beginning of files. -val XmlRegex = Regex( +val XmlRegex = RegexMeasured( + CLASS_TAG + "XmlRegex", "^\\uFEFF?\\s*<\\?xml\\s+version", setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE) ) -val XpmRegex = Regex( +val XpmRegex = RegexMeasured( + CLASS_TAG + "XpmRegex", "^\\s*\\/\\* XPM \\*\\/", RegexOption.MULTILINE ) diff --git a/src/main/kotlin/app/extractors/JavaExtractor.kt b/src/main/kotlin/app/extractors/JavaExtractor.kt index f1f3df3a..a5a6a33f 100644 --- a/src/main/kotlin/app/extractors/JavaExtractor.kt +++ b/src/main/kotlin/app/extractors/JavaExtractor.kt @@ -4,11 +4,13 @@ package app.extractors +import app.RegexMeasured import app.model.CommitStats import app.model.DiffFile class JavaExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "JavaExtractor-" const val LANGUAGE_NAME = Lang.JAVA val KEYWORDS = listOf("abstract", "continue", "for", "new", "switch", "assert", "default", "goto", "package", "synchronized", "boolean", @@ -18,10 +20,22 @@ class JavaExtractor : ExtractorInterface { "extends", "int", "short", "try", "char", "final", "interface", "static", "void", "class", "finally", "long", "strictfp", "volatile", "const", "float", "native", "super", "while") - val importRegex = Regex("""^(.*import)\s[^\n]*""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val packageRegex = Regex("""^(.*package)\s[^\n]*""") - val extractImportRegex = Regex("""import\s+(\w+[.\w+]*)""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^(.*import)\s[^\n]*""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) + val packageRegex = RegexMeasured( + CLASS_TAG + "packageRegex", + """^(.*package)\s[^\n]*""" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex", + """import\s+(\w+[.\w+]*)""" + ) } override fun extract(files: List<DiffFile>): List<CommitStats> { diff --git a/src/main/kotlin/app/extractors/JavascriptExtractor.kt b/src/main/kotlin/app/extractors/JavascriptExtractor.kt index 304358e2..1738751f 100644 --- a/src/main/kotlin/app/extractors/JavascriptExtractor.kt +++ b/src/main/kotlin/app/extractors/JavascriptExtractor.kt @@ -4,16 +4,31 @@ package app.extractors +import app.RegexMeasured import app.model.CommitStats import app.model.DiffFile +import app.split class JavascriptExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "JavascriptExtractor-" const val LANGUAGE_NAME = Lang.JAVASCRIPT - val splitRegex = Regex("""\s+|,|;|:|\*|\n|\(|\)|\[|]|\{|}|\+|=|\.|>|<|#|@|\$""") - val multilineCommentRegex = Regex("""/\*.+?\*/""") - val twoOrMoreWordsRegex = Regex("""(".+?\s.+?"|'.+?\s.+?')""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") + val splitRegex = RegexMeasured( + CLASS_TAG + "splitRegex", + """\s+|,|;|:|\*|\n|\(|\)|\[|]|\{|}|\+|=|\.|>|<|#|@|\$""" + ) + val multilineCommentRegex = RegexMeasured( + CLASS_TAG + "multilineCommentRegex", + """/\*.+?\*/""" + ) + val twoOrMoreWordsRegex = RegexMeasured( + CLASS_TAG + "twoOrMoreWordsRegex", + """(".+?\s.+?"|'.+?\s.+?')""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/KotlinExtractor.kt b/src/main/kotlin/app/extractors/KotlinExtractor.kt index 7a621e77..f9627a8c 100644 --- a/src/main/kotlin/app/extractors/KotlinExtractor.kt +++ b/src/main/kotlin/app/extractors/KotlinExtractor.kt @@ -4,13 +4,27 @@ package app.extractors +import app.RegexMeasured + class KotlinExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "KotlinExtractor-" const val LANGUAGE_NAME = Lang.KOTLIN - val importRegex = Regex("""^(.*import)\s[^\n]*""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val packageRegex = Regex("""^(.*package)\s[^\n]*""") - val extractImportRegex = Regex("""import\s+(\w+[.\w+]*)""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^(.*import)\s[^\n]*""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) + val packageRegex = RegexMeasured( + CLASS_TAG + "packageRegex", + """^(.*package)\s[^\n]*""" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex", + """import\s+(\w+[.\w+]*)""") } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/LineCache.kt b/src/main/kotlin/app/extractors/LineCache.kt new file mode 100644 index 00000000..73c86f99 --- /dev/null +++ b/src/main/kotlin/app/extractors/LineCache.kt @@ -0,0 +1,14 @@ +package app.extractors + +class CachingTokenizer { + val cache = hashMapOf<String, List<String>>() + + fun tokenize(line: String, extractor: ExtractorInterface): + List<String> { + if (cache.containsKey(line)) { + return cache[line]!! + } + cache[line] = extractor.tokenize(line) + return cache[line]!! + } +} \ No newline at end of file diff --git a/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt b/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt index 953cc804..78620853 100644 --- a/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt +++ b/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt @@ -4,14 +4,28 @@ package app.extractors +import app.RegexMeasured + class ObjectiveCExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "ObjectiveCExtractor-" const val LANGUAGE_NAME = Lang.OBJECTIVEC - val importRegex = Regex("""^([^\n]*[#@](import|include))\s[^\n]*""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val sharpImportIncludeRegex = - Regex("""[#@](import|include)\s+["<](\w+)[/\w+]*\.\w+[">]""") - val atImportRegex = Regex("""@import\s+(\w+)""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^([^\n]*[#@](import|include))\s[^\n]*""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) + val sharpImportIncludeRegex = RegexMeasured( + CLASS_TAG + "sharpImportIncludeRegex", + """[#@](import|include)\s+["<](\w+)[/\w+]*\.\w+[">]""" + ) + val atImportRegex = RegexMeasured( + CLASS_TAG + "atImportRegex", + """@import\s+(\w+)""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/PhpExtractor.kt b/src/main/kotlin/app/extractors/PhpExtractor.kt index b37cc199..af719d21 100644 --- a/src/main/kotlin/app/extractors/PhpExtractor.kt +++ b/src/main/kotlin/app/extractors/PhpExtractor.kt @@ -4,15 +4,30 @@ package app.extractors +import app.RegexMeasured + class PhpExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "PhpExtractor-" const val LANGUAGE_NAME = Lang.PHP - val importRegex = Regex("""^(.*require|require_once|include|""" + - """include_once|use)\s[^\n]*""") - val commentRegex = Regex("""^([^\n]*//)[^\n]*""") - val useRegex = Regex("""use\s+(\w+)[\\\w+]*""") - val requireIncludeRegex = Regex("""(require|require_once|include|""" + - """"include_once)\s*[(]?'(\w+)[.\w+]*'[)]?""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^(.*require|require_once|include|""" + + """include_once|use)\s[^\n]*""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*//)[^\n]*""" + ) + val useRegex = RegexMeasured( + CLASS_TAG + "useRegex", + """use\s+(\w+)[\\\w+]*""" + ) + val requireIncludeRegex = RegexMeasured( + CLASS_TAG + "requireIncludeRegex", + """(require|require_once|include|""" + + """"include_once)\s*[(]?'(\w+)[.\w+]*'[)]?""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/PlpgsqlExtractor.kt b/src/main/kotlin/app/extractors/PlpgsqlExtractor.kt index 14ebddce..ebdd394d 100644 --- a/src/main/kotlin/app/extractors/PlpgsqlExtractor.kt +++ b/src/main/kotlin/app/extractors/PlpgsqlExtractor.kt @@ -3,7 +3,16 @@ package app.extractors +import app.RegexMeasured + object PlpgsqlExtractor : ExtractorBase( language = Lang.PLPGSQL, - importRegex = Regex(""".+CREATE (?:EXTENSION|LANGUAGE) ([a-zA-Z_][a-zA-Z0-9_]*)"""), - commentRegex = Regex("""(--.*$)|(/[*].*?[*]/)""")) + importRegex = RegexMeasured( + "PlpgsqlExtractor-commentRegex", + """.+CREATE (?:EXTENSION|LANGUAGE) ([a-zA-Z_][a-zA-Z0-9_]*)""" + ), + commentRegex = RegexMeasured( + "PlpgsqlExtractor-commentRegex", + """(--.*$)|(/[*].*?[*]/)""" + ) +) diff --git a/src/main/kotlin/app/extractors/PythonExtractor.kt b/src/main/kotlin/app/extractors/PythonExtractor.kt index d382a249..e8da60cd 100644 --- a/src/main/kotlin/app/extractors/PythonExtractor.kt +++ b/src/main/kotlin/app/extractors/PythonExtractor.kt @@ -4,22 +4,43 @@ package app.extractors +import app.RegexMeasured import app.model.CommitStats import app.model.DiffFile +import app.split class PythonExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "PythonExtractor-" const val LANGUAGE_NAME = Lang.PYTHON const val COMPREHENSION_MAP = "map" const val COMPREHENSION_LIST = "list" - val docImportRegex = Regex("""^([^\n]*#|\s*\"\"\"|\s*import|""" + - """\s*from)[^\n]*""") - val commentRegex = Regex("""^(.*#).*""") - val extractImportRegex = Regex("""(from\s+(\w+)[.\w+]*\s+import|""" + - """import\s+(\w+(,\s*\w+)*))(as\s+)*""") - val mapRegex = Regex("""(map\([^,]+?,)""") - val listRegex = Regex("""\[.+? for .+? in .+?]""") - val lineEndRegex = Regex(""",\s*""") + val docImportRegex = RegexMeasured( + CLASS_TAG + "docImportRegex", + """^([^\n]*#|\s*\"\"\"|\s*import|""" + + """\s*from)[^\n]*""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^(.*#).*""" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex", + """(from\s+(\w+)[.\w+]*\s+import|""" + + """import\s+(\w+(,\s*\w+)*))(as\s+)*""" + ) + val mapRegex = RegexMeasured( + CLASS_TAG + "mapRegex", + """(map\([^,]+?,)""" + ) + val listRegex = RegexMeasured( + CLASS_TAG + "listRegex", + """\[.+? for .+? in .+?]""" + ) + val lineEndRegex = RegexMeasured( + CLASS_TAG + "lineEndRegex", + """,\s*""" + ) } override fun extract(files: List<DiffFile>): List<CommitStats> { diff --git a/src/main/kotlin/app/extractors/RubyExtractor.kt b/src/main/kotlin/app/extractors/RubyExtractor.kt index 08cc7b6d..dccbb307 100644 --- a/src/main/kotlin/app/extractors/RubyExtractor.kt +++ b/src/main/kotlin/app/extractors/RubyExtractor.kt @@ -4,14 +4,28 @@ package app.extractors +import app.RegexMeasured + class RubyExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "RubyExtractor-" const val LANGUAGE_NAME = Lang.RUBY - val importRegex = Regex("""(require\s+'(\w+)'|load\s+'(\w+)\.\w+')""") - val commentRegex = Regex("""^([^\n]*#)[^\n]*""") - val extractImportRegex = - Regex("""(require\s+'(.+)'|load\s+'(\w+)\.\w+')""") - val includeRegex = Regex("""include\s+(\w+)::.+""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """(require\s+'(\w+)'|load\s+'(\w+)\.\w+')""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + """^([^\n]*#)[^\n]*""" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex", + """(require\s+'(.+)'|load\s+'(\w+)\.\w+')""" + ) + val includeRegex = RegexMeasured( + CLASS_TAG + "includeRegex", + """include\s+(\w+)::.+""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/RustExtractor.kt b/src/main/kotlin/app/extractors/RustExtractor.kt index 589f1405..bfb601c0 100644 --- a/src/main/kotlin/app/extractors/RustExtractor.kt +++ b/src/main/kotlin/app/extractors/RustExtractor.kt @@ -3,12 +3,24 @@ package app.extractors +import app.RegexMeasured + class RustExtractor : ExtractorInterface { companion object { + const val CLASS_TAG = "RustExtractor-" const val LANGUAGE_NAME = Lang.RUST - val importRegex = Regex("""^extern crate \w+;$""") - val commentRegex = Regex("(//.+$)|(/[*].*?[*]/)") - val extractImportRegex = Regex("""^extern crate (\w+);$""") + val importRegex = RegexMeasured( + CLASS_TAG + "importRegex", + """^extern crate \w+;$""" + ) + val commentRegex = RegexMeasured( + CLASS_TAG + "commentRegex", + "(//.+$)|(/[*].*?[*]/)" + ) + val extractImportRegex = RegexMeasured( + CLASS_TAG + "extractImportRegex", + """^extern crate (\w+);$""" + ) } override fun extractImports(fileContent: List<String>): List<String> { diff --git a/src/main/kotlin/app/extractors/ScalaExtractor.kt b/src/main/kotlin/app/extractors/ScalaExtractor.kt index 1679ead8..67910bbc 100644 --- a/src/main/kotlin/app/extractors/ScalaExtractor.kt +++ b/src/main/kotlin/app/extractors/ScalaExtractor.kt @@ -3,8 +3,16 @@ package app.extractors +import app.RegexMeasured + object ScalaExtractor : ExtractorBase( language = Lang.SCALA, - importRegex = Regex("""^import (?:_root_\.)?((?:\.?[a-z]+)+\.)"""), - commentRegex = Regex("(//.+$)|(/[*].*?[*]/)"), + importRegex = RegexMeasured( + "ScalaExtractor-importRegex", + """^import (?:_root_\.)?((?:\.?[a-z]+)+\.)""" + ), + commentRegex = RegexMeasured( + "ScalaExtractor-commentRegex", + "(//.+$)|(/[*].*?[*]/)" + ), importStartsWith = true) diff --git a/src/main/kotlin/app/extractors/SwiftExtractor.kt b/src/main/kotlin/app/extractors/SwiftExtractor.kt index c2dc4782..442d7576 100644 --- a/src/main/kotlin/app/extractors/SwiftExtractor.kt +++ b/src/main/kotlin/app/extractors/SwiftExtractor.kt @@ -4,7 +4,16 @@ package app.extractors +import app.RegexMeasured + object SwiftExtractor : ExtractorBase( language = Lang.SWIFT, - importRegex = Regex("""import\s+(\w+)"""), - commentRegex = Regex("""^([^\n]*//)[^\n]*""")) + importRegex = RegexMeasured( + "SwiftExtractor-importRegex", + """import\s+(\w+)""" + ), + commentRegex = RegexMeasured( + "SwiftExtractor-commentRegex", + """^([^\n]*//)[^\n]*""" + ) +) diff --git a/src/main/kotlin/app/hashers/CommitCrawler.kt b/src/main/kotlin/app/hashers/CommitCrawler.kt index 084c5a34..c7758721 100644 --- a/src/main/kotlin/app/hashers/CommitCrawler.kt +++ b/src/main/kotlin/app/hashers/CommitCrawler.kt @@ -5,6 +5,8 @@ package app.hashers import app.Logger +import app.Measurements +import app.RegexMeasured import app.model.Author import app.model.Commit import app.model.DiffContent @@ -29,6 +31,7 @@ import org.eclipse.jgit.treewalk.filter.PathFilter import org.eclipse.jgit.treewalk.TreeWalk import org.eclipse.jgit.util.io.DisabledOutputStream import java.util.LinkedList +import kotlin.system.measureNanoTime data class JgitData(var commit: RevCommit? = null, var list: List<JgitDiff>? = null, @@ -43,6 +46,8 @@ data class JgitDiff(val diffEntry: DiffEntry, val editList: EditList) * Iterates over the diffs between commits in the repo's history. */ object CommitCrawler { + const val CLASS_TAG = "CommitCrawler-" + private const val REMOTE_HEAD = "refs/remotes/origin/HEAD" private const val REMOTE_MASTER_BRANCH = "refs/remotes/origin/master" private const val LOCAL_MASTER_BRANCH = "refs/heads/master" @@ -51,7 +56,10 @@ object CommitCrawler { LOCAL_MASTER_BRANCH, LOCAL_HEAD) private val CONF_FILE_PATH = ".sourcerer-conf" private val MAX_DIFF_SIZE = 600000 - private val coauthoredRegex = Regex("""Co-authored-by: (.+) <(.+)>""") + private val coauthoredRegex = RegexMeasured( + CLASS_TAG + "CoauthoredRegex", + """Co-authored-by: (.+) <(.+)>""" + ) fun getDefaultBranchHead(git: Git): ObjectId { for (ref in REFS) { @@ -145,25 +153,34 @@ object CommitCrawler { var commit: RevCommit? = revWalk.next() // Move the walker to the head. while (commit != null && commit != tail) { commitCount++ - val parentCommit: RevCommit? = revWalk.next() - - // Smart casts are not yet supported for a mutable variable captured - // in an inline lambda, see - // https://youtrack.jetbrains.com/issue/KT-7186. - if (Logger.isTrace) { - val commitName = commit.name - val commitMsg = commit.shortMessage - Logger.trace { "commit: $commitName; '$commitMsg'" } - if (parentCommit != null) { - val parentCommitName = parentCommit.name - val parentCommitMsg = parentCommit.shortMessage - Logger.trace { "parent commit: $parentCommitName; " + - "'$parentCommitMsg'" } - } - else { - Logger.trace { "parent commit: null" } + var parentCommit: RevCommit? = null + + val timeWalkNext = measureNanoTime { + parentCommit = revWalk.next() + } + Measurements.addMeasurement(CLASS_TAG + "WalkNext", timeWalkNext) + + val timeTraceLogs = measureNanoTime { + // Smart casts are not yet supported for a mutable variable captured + // in an inline lambda, see + // https://youtrack.jetbrains.com/issue/KT-7186. + if (Logger.isTrace) { + val commitName = commit!!.name + val commitMsg = commit!!.shortMessage + Logger.trace { "commit: $commitName; '$commitMsg'" } + if (parentCommit != null) { + val parentCommitName = parentCommit!!.name + val parentCommitMsg = parentCommit!!.shortMessage + Logger.trace { + "parent commit: $parentCommitName; " + + "'$parentCommitMsg'" + } + } else { + Logger.trace { "parent commit: null" } + } } } + Measurements.addMeasurement(CLASS_TAG + "TraceLogs", timeTraceLogs) val perc = if (totalCommitCount != 0) { (commitCount.toDouble() / totalCommitCount) * 100 @@ -177,59 +194,73 @@ object CommitCrawler { } val paths = mutableListOf<String>() - val diffEntries = df.scan(parentCommit, commit) - .filter { diff -> + var diffEntriesAll:List<DiffEntry>? = null + val timeScan = measureNanoTime { + diffEntriesAll = df.scan(parentCommit, commit) + } + Measurements.addMeasurement(CLASS_TAG + "DiffScan", timeScan) + + val diffEntries = diffEntriesAll!!.filter { diff -> diff.changeType != DiffEntry.ChangeType.COPY } .filter { diff -> - val path = diff.newPath - for (cnv in VendorConventions) { - if (cnv.containsMatchIn(path) || - cnv.containsMatchIn(diff.oldPath)) { - return@filter false + var res = false + val time = measureNanoTime { + val path = diff.newPath + for (cnv in VendorConventions) { + if (cnv.containsMatchIn(path) || + cnv.containsMatchIn(diff.oldPath)) { + return@filter false + } } - } - val fileId = - if (path != DiffEntry.DEV_NULL) { - diff.newId.toObjectId() - } else { - diff.oldId.toObjectId() + val fileId = + if (path != DiffEntry.DEV_NULL) { + diff.newId.toObjectId() + } else { + diff.oldId.toObjectId() + } + val stream = try { + repo.open(fileId).openStream() + } catch (e: Exception) { + null } - val stream = try { - repo.open(fileId).openStream() - } catch (e: Exception) { - null + res = stream != null && !RawText.isBinary(stream) } - stream != null && !RawText.isBinary(stream) + Measurements.addMeasurement(CLASS_TAG + "IsBinaryFilter", time) + res } .filter { diff -> - val filePath = - if (diff.getNewPath() != DiffEntry.DEV_NULL) { - diff.getNewPath() - } else { - diff.getOldPath() + var res = false + val time = measureNanoTime { + val filePath = + if (diff.getNewPath() != DiffEntry.DEV_NULL) { + diff.getNewPath() + } else { + diff.getOldPath() + } + + // Update ignored paths list. The config file has retroactive + // force, i.e. if it was added at this commit, then we presume + // it is applied to all commits, preceding this commit. + if (diff.getOldPath() == CONF_FILE_PATH) { + ignoredPaths = + getIgnoredPaths(repo, diff.getNewId().toObjectId()) } - // Update ignored paths list. The config file has retroactive - // force, i.e. if it was added at this commit, then we presume - // it is applied to all commits, preceding this commit. - if (diff.getOldPath() == CONF_FILE_PATH) { - ignoredPaths = - getIgnoredPaths(repo, diff.getNewId().toObjectId()) + res = if (!ignoredPaths.any { path -> + if (path.endsWith("/")) { + filePath.startsWith(path) + } else { + path == filePath + } + }) { + paths.add(filePath) + true + } else false } - - if (!ignoredPaths.any { path -> - if (path.endsWith("/")) { - filePath.startsWith(path) - } - else { - path == filePath - } - }) { - paths.add(filePath) - true - } else false + Measurements.addMeasurement(CLASS_TAG + "IgnorePathFilter", time) + res } val jgitData = JgitData() @@ -237,16 +268,19 @@ object CommitCrawler { jgitData.commit = commit } if (extractDiffs) { - val diffEdits = diffEntries - .map { diff -> - JgitDiff(diff, df.toFileHeader(diff).toEditList()) - } - .filter { diff -> - diff.editList.fold(0) { acc, edit -> - acc + edit.lengthA + edit.lengthB - } < MAX_DIFF_SIZE + val time = measureNanoTime { + val diffEdits = diffEntries + .map { diff -> + JgitDiff(diff, df.toFileHeader(diff).toEditList()) + } + .filter { diff -> + diff.editList.fold(0) { acc, edit -> + acc + edit.lengthA + edit.lengthB + } < MAX_DIFF_SIZE + } + jgitData.list = diffEdits } - jgitData.list = diffEdits + Measurements.addMeasurement(CLASS_TAG + "ExtractDiffs", time) } if (extractPaths) { jgitData.paths = paths @@ -279,17 +313,24 @@ object CommitCrawler { return jgitObservable.map( { jgitData -> // Mapping and stats extraction. val commit = Commit(jgitData.commit!!, jgitData.coauthors) - commit.diffs = getDiffFiles(git.repository, jgitData.list!!) + val timeGetDiffFiles = measureNanoTime { + commit.diffs = getDiffFiles(git.repository, jgitData.list!!) + } + Measurements.addMeasurement(CLASS_TAG + "GetDiffFiles", timeGetDiffFiles) // Count lines on all non-binary files. This is additional // statistics to CommitStats because not all file extensions // may be supported. - commit.numLinesAdded = commit.diffs.fold(0) { total, file -> - total + file.getAllAdded().size - } - commit.numLinesDeleted = commit.diffs.fold(0) { total, file -> - total + file.getAllDeleted().size + val timeFoldDiffs = measureNanoTime { + commit.numLinesAdded = commit.diffs.fold(0) { total, file -> + total + file.getAllAdded().size + } + commit.numLinesDeleted = commit.diffs.fold(0) { total, file -> + total + file.getAllDeleted().size + } } + Measurements.addMeasurement(CLASS_TAG + "FoldDiffs", timeFoldDiffs) + commit.repo = repo commit diff --git a/src/main/kotlin/app/hashers/CommitHasher.kt b/src/main/kotlin/app/hashers/CommitHasher.kt index ccc4b9af..0149fc36 100644 --- a/src/main/kotlin/app/hashers/CommitHasher.kt +++ b/src/main/kotlin/app/hashers/CommitHasher.kt @@ -4,12 +4,14 @@ package app.hashers import app.Logger +import app.Measurements import app.api.Api import app.extractors.Extractor import app.model.Commit import app.model.Repo import io.reactivex.Observable import java.util.concurrent.TimeUnit +import kotlin.system.measureNanoTime /** * CommitHasher hashes repository and uploads stats to server. @@ -19,6 +21,8 @@ class CommitHasher(private val serverRepo: Repo = Repo(), private val rehashes: List<String>, private val emails: HashSet<String>) { + val CLASS_TAG = "CommitHasher-" + init { // Delete locally missing commits from server. If found at least one // common commit then preceding commits are not deleted because hash of @@ -40,16 +44,19 @@ class CommitHasher(private val serverRepo: Repo = Repo(), // Hash only commits made by authors with specified emails. .filter { commit -> emails.contains(commit.author.email) } .map { commit -> - Logger.printCommitDetail("Extracting stats") + val time = measureNanoTime { + Logger.printCommitDetail("Extracting stats") - // Mapping and stats extraction. - commit.stats = Extractor().extract(commit.diffs) - val statsNumStr = if (commit.stats.isNotEmpty()) { - commit.stats.size.toString() - } else "No" + // Mapping and stats extraction. + commit.stats = Extractor().extract(commit.diffs) + val statsNumStr = if (commit.stats.isNotEmpty()) { + commit.stats.size.toString() + } else "No" - Logger.printCommitDetail("$statsNumStr technology stats found") - Logger.debug { commit.stats.toString() } + Logger.printCommitDetail("$statsNumStr technology stats found") + Logger.debug { commit.stats.toString() } + } + Measurements.addMeasurement(CLASS_TAG + "MapCommit", time) commit } diff --git a/src/main/kotlin/app/hashers/FactHasher.kt b/src/main/kotlin/app/hashers/FactHasher.kt index 8a3d5d1a..6ffb2709 100644 --- a/src/main/kotlin/app/hashers/FactHasher.kt +++ b/src/main/kotlin/app/hashers/FactHasher.kt @@ -6,7 +6,9 @@ package app.hashers import app.FactCodes import app.Logger +import app.RegexMeasured import app.api.Api +import app.contains import app.extractors.Extractor import app.model.Author import app.model.Commit @@ -23,6 +25,8 @@ class FactHasher(private val serverRepo: Repo = Repo(), private val api: Api, private val rehashes: List<String>, private val emails: HashSet<String>) { + private val CLASS_TAG = "FactHasher-" + private val fsDayWeek = hashMapOf<String, Array<Int>>() private val fsDayTime = hashMapOf<String, Array<Int>>() private val fsRepoDateStart = hashMapOf<String, Long>() @@ -114,7 +118,10 @@ class FactHasher(private val serverRepo: Repo = Repo(), val tokens = Extractor().tokenize(line) val underscores = tokens.count { it.contains('_') } val camelCases = tokens.count { - !it.contains('_') && it.contains(Regex("[a-z][A-Z]")) + !it.contains('_') && it.contains(RegexMeasured( + CLASS_TAG + "VariableNaming", + "[a-z][A-Z]" + )) } val others = tokens.size - underscores - camelCases fsVariableNaming[email]!![FactCodes.VARIABLE_NAMING_SNAKE_CASE] += diff --git a/src/main/kotlin/app/hashers/RepoHasher.kt b/src/main/kotlin/app/hashers/RepoHasher.kt index 11dc91c7..139d31dd 100644 --- a/src/main/kotlin/app/hashers/RepoHasher.kt +++ b/src/main/kotlin/app/hashers/RepoHasher.kt @@ -5,6 +5,7 @@ package app.hashers import app.BuildConfig import app.Logger +import app.Measurements import app.api.Api import app.config.Configurator import app.model.Author @@ -19,11 +20,17 @@ import app.utils.batch import org.eclipse.jgit.api.Git import java.io.File import java.io.IOException +import java.time.Duration +import java.time.LocalDateTime import kotlin.collections.HashSet +import kotlin.system.measureNanoTime class RepoHasher(private val api: Api, private val configurator: Configurator) { + val CLASS_TAG = "RepoHasher-" + fun update(localRepo: LocalRepo) { + val startTime = LocalDateTime.now() Logger.debug { "RepoHasher.update call: $localRepo" } val processEntryId = localRepo.processEntryId @@ -79,8 +86,11 @@ class RepoHasher(private val api: Api, // Hash by all plugins. if (BuildConfig.COMMIT_HASHER_ENABLED) { - CommitHasher(serverRepo, api, rehashes, filteredEmails) - .updateFromObservable(observable, onError) + val time = measureNanoTime { + CommitHasher(serverRepo, api, rehashes, filteredEmails) + .updateFromObservable(observable, onError) + } + Measurements.addMeasurement(CLASS_TAG + "CommitHasher", time) } if (BuildConfig.FACT_HASHER_ENABLED) { FactHasher(serverRepo, api, rehashes, filteredEmails) @@ -116,6 +126,10 @@ class RepoHasher(private val api: Api, } Logger.info(Logger.Events.HASHING_REPO_SUCCESS) { "Hashing repo completed" } + val endTime = LocalDateTime.now() + val elapsed = Duration.between(startTime, endTime).toMillis() + Logger.info {"Hashing took $elapsed milliseconds"} + updateProcess(processEntryId, Api.PROCESS_STATUS_COMPLETE) } catch (e: EmptyRepoException) { updateProcess(processEntryId, Api.PROCESS_STATUS_FAIL, @@ -127,6 +141,7 @@ class RepoHasher(private val api: Api, } finally { closeGit(git) } + } private fun loadGit(path: String): Git { diff --git a/src/main/kotlin/app/hashers/Vendors.kt b/src/main/kotlin/app/hashers/Vendors.kt index 108a11d4..6a18e58f 100644 --- a/src/main/kotlin/app/hashers/Vendors.kt +++ b/src/main/kotlin/app/hashers/Vendors.kt @@ -5,366 +5,370 @@ package app.hashers +import app.RegexMeasured + +const val CLASS_TAG = "Vendors-" + /** * List of regexps that are matched against file pathname. Used to filter out * vendor specific files from the repo statistics. */ val VendorConventions = listOf( // Caches - Regex("""(^|/)cache/"""), + RegexMeasured(CLASS_TAG, """(^|/)cache/"""), // Dependencies - Regex("""^[Dd]ependencies/"""), + RegexMeasured(CLASS_TAG, """^[Dd]ependencies/"""), // Distributions - Regex("""(^|/)dist/"""), + RegexMeasured(CLASS_TAG, """(^|/)dist/"""), // C deps - Regex("""^deps/"""), - Regex("""(^|/)configure$"""), - Regex("""(^|/)config.guess$"""), - Regex("""(^|/)config.sub$"""), + RegexMeasured(CLASS_TAG, """^deps/"""), + RegexMeasured(CLASS_TAG, """(^|/)configure$"""), + RegexMeasured(CLASS_TAG, """(^|/)config.guess$"""), + RegexMeasured(CLASS_TAG, """(^|/)config.sub$"""), // stuff autogenerated by autoconf - still C deps - Regex("""(^|/)aclocal.m4"""), - Regex("""(^|/)libtool.m4"""), - Regex("""(^|/)ltoptions.m4"""), - Regex("""(^|/)ltsugar.m4"""), - Regex("""(^|/)ltversion.m4"""), - Regex("""(^|/)lt~obsolete.m4"""), + RegexMeasured(CLASS_TAG, """(^|/)aclocal.m4"""), + RegexMeasured(CLASS_TAG, """(^|/)libtool.m4"""), + RegexMeasured(CLASS_TAG, """(^|/)ltoptions.m4"""), + RegexMeasured(CLASS_TAG, """(^|/)ltsugar.m4"""), + RegexMeasured(CLASS_TAG, """(^|/)ltversion.m4"""), + RegexMeasured(CLASS_TAG, """(^|/)lt~obsolete.m4"""), // Linters - Regex("""cpplint.py"""), + RegexMeasured(CLASS_TAG, """cpplint.py"""), // Node dependencies - Regex("""node_modules/"""), + RegexMeasured(CLASS_TAG, """node_modules/"""), // Bower Components - Regex("""bower_components/"""), + RegexMeasured(CLASS_TAG, """bower_components/"""), // Erlang bundles - Regex("""^rebar$"""), - Regex("""erlang.mk"""), + RegexMeasured(CLASS_TAG, """^rebar$"""), + RegexMeasured(CLASS_TAG, """erlang.mk"""), // Go dependencies - Regex("""Godeps/_workspace/"""), + RegexMeasured(CLASS_TAG, """Godeps/_workspace/"""), // GNU indent profiles - Regex(""".indent.pro"""), + RegexMeasured(CLASS_TAG, """.indent.pro"""), // Minified JavaScript and CSS - Regex("""(\.|-)min\.(js|css)$"""), + RegexMeasured(CLASS_TAG, """(\.|-)min\.(js|css)$"""), // Stylesheets imported from packages - Regex("""([^\s]*)import\.(css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """([^\s]*)import\.(css|less|scss|styl)$"""), // Bootstrap css and js - Regex("""(^|/)bootstrap([^.]*)\.(js|css|less|scss|styl)$"""), - Regex("""(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)bootstrap([^.]*)\.(js|css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$"""), // Font Awesome - Regex("""(^|/)font-awesome\.(css|less|scss|styl)$"""), - Regex("""(^|/)font-awesome/.*\.(css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)font-awesome\.(css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)font-awesome/.*\.(css|less|scss|styl)$"""), // Foundation css - Regex("""(^|/)foundation\.(css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)foundation\.(css|less|scss|styl)$"""), // Normalize.css - Regex("""(^|/)normalize\.(css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)normalize\.(css|less|scss|styl)$"""), // Skeleton.css - Regex("""(^|/)skeleton\.(css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)skeleton\.(css|less|scss|styl)$"""), // Bourbon css - Regex("""(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$"""), // Animate.css - Regex("""(^|/)animate\.(css|less|scss|styl)$"""), + RegexMeasured(CLASS_TAG, """(^|/)animate\.(css|less|scss|styl)$"""), // Materialize.css - Regex("""(^|/)materialize\.(css|less|scss|styl|js)$"""), + RegexMeasured(CLASS_TAG, """(^|/)materialize\.(css|less|scss|styl|js)$"""), // Select2 - Regex("""(^|/)select2/.*\.(css|scss|js)$"""), + RegexMeasured(CLASS_TAG, """(^|/)select2/.*\.(css|scss|js)$"""), // Vendored dependencies - Regex("""third[-_]?party/"""), - Regex("""3rd[-_]?party/"""), - Regex("""vendors?/"""), - Regex("""extern(al)?/"""), - Regex("""(^|/)[Vv]+endor/"""), + RegexMeasured(CLASS_TAG, """third[-_]?party/"""), + RegexMeasured(CLASS_TAG, """3rd[-_]?party/"""), + RegexMeasured(CLASS_TAG, """vendors?/"""), + RegexMeasured(CLASS_TAG, """extern(al)?/"""), + RegexMeasured(CLASS_TAG, """(^|/)[Vv]+endor/"""), // Debian packaging - Regex("""^debian/"""), + RegexMeasured(CLASS_TAG, """^debian/"""), // Haxelib projects often contain a neko bytecode file named run.n - Regex("""run.n$"""), + RegexMeasured(CLASS_TAG, """run.n$"""), // Bootstrap Datepicker - Regex("""bootstrap-datepicker/"""), + RegexMeasured(CLASS_TAG, """bootstrap-datepicker/"""), // Commonly Bundled JavaScript frameworks // jQuery - Regex("""(^|/)jquery([^.]*)\.js$"""), - Regex("""(^|/)jquery\-\d\.\d+(\.\d+)?\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)jquery([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)jquery\-\d\.\d+(\.\d+)?\.js$"""), // jQuery UI - Regex("""(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$"""), - Regex("""(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$"""), + RegexMeasured(CLASS_TAG, """(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$"""), + RegexMeasured(CLASS_TAG, """(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$"""), // jQuery Gantt - Regex("""jquery.fn.gantt.js"""), + RegexMeasured(CLASS_TAG, """jquery.fn.gantt.js"""), // jQuery fancyBox - Regex("""jquery.fancybox.(js|css)"""), + RegexMeasured(CLASS_TAG, """jquery.fancybox.(js|css)"""), // Fuel UX - Regex("""fuelux.js"""), + RegexMeasured(CLASS_TAG, """fuelux.js"""), // jQuery File Upload - Regex("""(^|/)jquery\.fileupload(-\w+)?\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)jquery\.fileupload(-\w+)?\.js$"""), // jQuery dataTables - Regex("""jquery.dataTables.js"""), + RegexMeasured(CLASS_TAG, """jquery.dataTables.js"""), // bootboxjs - Regex("""bootbox.js"""), + RegexMeasured(CLASS_TAG, """bootbox.js"""), // pdf-worker - Regex("""pdf.worker.js"""), + RegexMeasured(CLASS_TAG, """pdf.worker.js"""), // Slick - Regex("""(^|/)slick\.\w+.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)slick\.\w+.js$"""), // Leaflet plugins - Regex("""(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$"""), - Regex("""leaflet.draw-src.js"""), - Regex("""leaflet.draw.css"""), - Regex("""Control.FullScreen.css"""), - Regex("""Control.FullScreen.js"""), - Regex("""leaflet.spin.js"""), - Regex("""wicket-leaflet.js"""), + RegexMeasured(CLASS_TAG, """(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$"""), + RegexMeasured(CLASS_TAG, """leaflet.draw-src.js"""), + RegexMeasured(CLASS_TAG, """leaflet.draw.css"""), + RegexMeasured(CLASS_TAG, """Control.FullScreen.css"""), + RegexMeasured(CLASS_TAG, """Control.FullScreen.js"""), + RegexMeasured(CLASS_TAG, """leaflet.spin.js"""), + RegexMeasured(CLASS_TAG, """wicket-leaflet.js"""), // Sublime Text workspace files - Regex(""".sublime-project"""), - Regex(""".sublime-workspace"""), + RegexMeasured(CLASS_TAG, """.sublime-project"""), + RegexMeasured(CLASS_TAG, """.sublime-workspace"""), // VS Code workspace files - Regex(""".vscode"""), + RegexMeasured(CLASS_TAG, """.vscode"""), // Prototype - Regex("""(^|/)prototype(.*)\.js$"""), - Regex("""(^|/)effects\.js$"""), - Regex("""(^|/)controls\.js$"""), - Regex("""(^|/)dragdrop\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)prototype(.*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)effects\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)controls\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)dragdrop\.js$"""), // Typescript definition files - Regex("""(.*?)\.d\.ts$"""), + RegexMeasured(CLASS_TAG, """(.*?)\.d\.ts$"""), // MooTools - Regex("""(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$"""), // Dojo - Regex("""(^|/)dojo\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)dojo\.js$"""), // MochiKit - Regex("""(^|/)MochiKit\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)MochiKit\.js$"""), // YUI - Regex("""(^|/)yahoo-([^.]*)\.js$"""), - Regex("""(^|/)yui([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)yahoo-([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)yui([^.]*)\.js$"""), // WYS editors - Regex("""(^|/)ckeditor\.js$"""), - Regex("""(^|/)tiny_mce([^.]*)\.js$"""), - Regex("""(^|/)tiny_mce/(langs|plugins|themes|utils)"""), + RegexMeasured(CLASS_TAG, """(^|/)ckeditor\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)tiny_mce([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)tiny_mce/(langs|plugins|themes|utils)"""), // Ace Editor - Regex("""(^|/)ace-builds/"""), + RegexMeasured(CLASS_TAG, """(^|/)ace-builds/"""), // Fontello CSS files - Regex("""(^|/)fontello(.*?)\.css$"""), + RegexMeasured(CLASS_TAG, """(^|/)fontello(.*?)\.css$"""), // MathJax - Regex("""(^|/)MathJax/"""), + RegexMeasured(CLASS_TAG, """(^|/)MathJax/"""), // Chart.js - Regex("""(^|/)Chart\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)Chart\.js$"""), // CodeMirror - Regex("""(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)"""), + RegexMeasured(CLASS_TAG, """(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)"""), // SyntaxHighlighter - http://alexgorbatchev.com/ - Regex("""(^|/)shBrush([^.]*)\.js$"""), - Regex("""(^|/)shCore\.js$"""), - Regex("""(^|/)shLegacy\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)shBrush([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)shCore\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)shLegacy\.js$"""), // AngularJS - Regex("""(^|/)angular([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)angular([^.]*)\.js$"""), // D3.js - Regex("""(^|\/)d3(\.v\d+)?([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|\/)d3(\.v\d+)?([^.]*)\.js$"""), // React - Regex("""(^|/)react(-[^.]*)?\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)react(-[^.]*)?\.js$"""), // flow-typed - Regex("""(^|/)flow-typed/.*\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)flow-typed/.*\.js$"""), // Modernizr - Regex("""(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$"""), - Regex("""(^|/)modernizr\.custom\.\d+\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)modernizr\.custom\.\d+\.js$"""), // Knockout - Regex("""(^|/)knockout-(\d+\.){3}(debug\.)?js$"""), + RegexMeasured(CLASS_TAG, """(^|/)knockout-(\d+\.){3}(debug\.)?js$"""), // Python // Sphinx - Regex("""(^|/)docs?/_?(build|themes?|templates?|static)/"""), + RegexMeasured(CLASS_TAG, """(^|/)docs?/_?(build|themes?|templates?|static)/"""), // django - Regex("""(^|/)admin_media/"""), - Regex("""(^|/)env/"""), + RegexMeasured(CLASS_TAG, """(^|/)admin_media/"""), + RegexMeasured(CLASS_TAG, """(^|/)env/"""), // Fabric - Regex("""^fabfile\.py$"""), + RegexMeasured(CLASS_TAG, """^fabfile\.py$"""), // WAF - Regex("""^waf$"""), + RegexMeasured(CLASS_TAG, """^waf$"""), // .osx - Regex("""^.osx$"""), + RegexMeasured(CLASS_TAG, """^.osx$"""), // Obj-C // Xcode - Regex("""\.xctemplate/"""), - Regex("""\.imageset/"""), - Regex("""\.xc.*/"""), - Regex("""(^|/)Info\.plist$"""), - Regex("""\.storyboard$"""), + RegexMeasured(CLASS_TAG, """\.xctemplate/"""), + RegexMeasured(CLASS_TAG, """\.imageset/"""), + RegexMeasured(CLASS_TAG, """\.xc.*/"""), + RegexMeasured(CLASS_TAG, """(^|/)Info\.plist$"""), + RegexMeasured(CLASS_TAG, """\.storyboard$"""), // Carthage - Regex("""(^|/)Carthage/"""), + RegexMeasured(CLASS_TAG, """(^|/)Carthage/"""), // Sparkle - Regex("""(^|/)Sparkle/"""), + RegexMeasured(CLASS_TAG, """(^|/)Sparkle/"""), // Crashlytics - Regex("""Crashlytics.framework/"""), + RegexMeasured(CLASS_TAG, """Crashlytics.framework/"""), // Fabric - Regex("""Fabric.framework/"""), + RegexMeasured(CLASS_TAG, """Fabric.framework/"""), // BuddyBuild - Regex("""BuddyBuildSDK.framework/"""), + RegexMeasured(CLASS_TAG, """BuddyBuildSDK.framework/"""), // Realm - Regex("""Realm.framework"""), + RegexMeasured(CLASS_TAG, """Realm.framework"""), // RealmSwift - Regex("""RealmSwift.framework"""), + RegexMeasured(CLASS_TAG, """RealmSwift.framework"""), // git config files - Regex("""gitattributes$"""), - Regex("""gitignore$"""), - Regex("""gitmodules$"""), + RegexMeasured(CLASS_TAG, """gitattributes$"""), + RegexMeasured(CLASS_TAG, """gitignore$"""), + RegexMeasured(CLASS_TAG, """gitmodules$"""), // Groovy // Gradle - Regex("""(^|/)gradlew$"""), - Regex("""(^|/)gradlew\.bat$"""), - Regex("""(^|/)gradle/wrapper/"""), + RegexMeasured(CLASS_TAG, """(^|/)gradlew$"""), + RegexMeasured(CLASS_TAG, """(^|/)gradlew\.bat$"""), + RegexMeasured(CLASS_TAG, """(^|/)gradle/wrapper/"""), // Java // Maven - Regex("""(^|/)mvnw$"""), - Regex("""(^|/)mvnw\.cmd$"""), - Regex("""(^|/)\.mvn/wrapper/"""), + RegexMeasured(CLASS_TAG, """(^|/)mvnw$"""), + RegexMeasured(CLASS_TAG, """(^|/)mvnw\.cmd$"""), + RegexMeasured(CLASS_TAG, """(^|/)\.mvn/wrapper/"""), // .NET // Visual Studio IntelliSense - Regex("""-vsdoc\.js$"""), - Regex("""\.intellisense\.js$"""), + RegexMeasured(CLASS_TAG, """-vsdoc\.js$"""), + RegexMeasured(CLASS_TAG, """\.intellisense\.js$"""), // jQuery validation plugin (MS bundles this with asp.net mvc), - Regex("""(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$"""), - Regex("""(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$"""), // Microsoft Ajax - Regex("""(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$"""), // NuGet - Regex("""^[Pp]ackages\/.+\.\d+\/"""), + RegexMeasured(CLASS_TAG, """^[Pp]ackages\/.+\.\d+\/"""), // ExtJS - Regex("""(^|/)extjs/.*?\.js$"""), - Regex("""(^|/)extjs/.*?\.xml$"""), - Regex("""(^|/)extjs/.*?\.txt$"""), - Regex("""(^|/)extjs/.*?\.html$"""), - Regex("""(^|/)extjs/.*?\.properties$"""), - Regex("""(^|/)extjs/.sencha/"""), - Regex("""(^|/)extjs/docs/"""), - Regex("""(^|/)extjs/builds/"""), - Regex("""(^|/)extjs/cmd/"""), - Regex("""(^|/)extjs/examples/"""), - Regex("""(^|/)extjs/locale/"""), - Regex("""(^|/)extjs/packages/"""), - Regex("""(^|/)extjs/plugins/"""), - Regex("""(^|/)extjs/resources/"""), - Regex("""(^|/)extjs/src/"""), - Regex("""(^|/)extjs/welcome/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/.*?\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/.*?\.xml$"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/.*?\.txt$"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/.*?\.html$"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/.*?\.properties$"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/.sencha/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/docs/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/builds/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/cmd/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/examples/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/locale/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/packages/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/plugins/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/resources/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/src/"""), + RegexMeasured(CLASS_TAG, """(^|/)extjs/welcome/"""), // Html5shiv - Regex("""(^|/)html5shiv\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)html5shiv\.js$"""), // Test fixtures - Regex("""^[Tt]ests?/fixtures/"""), - Regex("""^[Ss]pecs?/fixtures/"""), + RegexMeasured(CLASS_TAG, """^[Tt]ests?/fixtures/"""), + RegexMeasured(CLASS_TAG, """^[Ss]pecs?/fixtures/"""), // PhoneGap/Cordova - Regex("""(^|/)cordova([^.]*)\.js$"""), - Regex("""(^|/)cordova\-\d\.\d(\.\d)?\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)cordova([^.]*)\.js$"""), + RegexMeasured(CLASS_TAG, """(^|/)cordova\-\d\.\d(\.\d)?\.js$"""), // Foundation js - Regex("""foundation(\..*)?\.js$"""), + RegexMeasured(CLASS_TAG, """foundation(\..*)?\.js$"""), // Vagrant - Regex("""^Vagrantfile$"""), + RegexMeasured(CLASS_TAG, """^Vagrantfile$"""), // .DS_Stores - Regex(""".[Dd][Ss]_[Ss]tore$"""), + RegexMeasured(CLASS_TAG, """.[Dd][Ss]_[Ss]tore$"""), // R packages - Regex("""^vignettes/"""), - Regex("""^inst/extdata/"""), + RegexMeasured(CLASS_TAG, """^vignettes/"""), + RegexMeasured(CLASS_TAG, """^inst/extdata/"""), // Octicons - Regex("""octicons.css"""), - Regex("""sprockets-octicons.scss"""), + RegexMeasured(CLASS_TAG, """octicons.css"""), + RegexMeasured(CLASS_TAG, """sprockets-octicons.scss"""), // Typesafe Activator - Regex("""(^|/)activator$"""), - Regex("""(^|/)activator\.bat$"""), + RegexMeasured(CLASS_TAG, """(^|/)activator$"""), + RegexMeasured(CLASS_TAG, """(^|/)activator\.bat$"""), // ProGuard - Regex("""proguard.pro"""), - Regex("""proguard-rules.pro"""), + RegexMeasured(CLASS_TAG, """proguard.pro"""), + RegexMeasured(CLASS_TAG, """proguard-rules.pro"""), // PuPHPet - Regex("""^puphpet/"""), + RegexMeasured(CLASS_TAG, """^puphpet/"""), // Android Google APIs - Regex("""(^|/)\.google_apis/"""), + RegexMeasured(CLASS_TAG, """(^|/)\.google_apis/"""), // Jenkins Pipeline - Regex("""^Jenkinsfile$"""), + RegexMeasured(CLASS_TAG, """^Jenkinsfile$"""), // generated by BUCKLESCRIPT - Regex("""\.bs\.js$""") + RegexMeasured(CLASS_TAG, """\.bs\.js$""") )