From 1d4f2e4f216d43f808d2681d21cbc3d29c152669 Mon Sep 17 00:00:00 2001 From: Liubov Yaronskaya <lyaronskaya@sourcerer.io> Date: Tue, 23 Oct 2018 14:12:00 +0300 Subject: [PATCH 1/5] feat: colleagues(fast). --- build.gradle | 1 + src/main/kotlin/app/api/Api.kt | 10 +- src/main/kotlin/app/api/MockApi.kt | 17 +-- src/main/kotlin/app/api/ServerApi.kt | 24 ++-- src/main/kotlin/app/hashers/Colleagues.kt | 63 ++++++++++ src/main/kotlin/app/hashers/CommitCrawler.kt | 112 ++++++++++++++++++ src/main/kotlin/app/hashers/RepoHasher.kt | 8 +- src/main/kotlin/app/model/AuthorDistance.kt | 38 ++++++ .../kotlin/app/model/AuthorDistanceGroup.kt | 33 ++++++ src/main/proto/sourcerer.proto | 11 ++ .../tests/hashers/AuthorDistanceHasherTest.kt | 86 ++++++++++++++ 11 files changed, 377 insertions(+), 26 deletions(-) create mode 100644 src/main/kotlin/app/hashers/Colleagues.kt create mode 100644 src/main/kotlin/app/model/AuthorDistance.kt create mode 100644 src/main/kotlin/app/model/AuthorDistanceGroup.kt create mode 100644 src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt diff --git a/build.gradle b/build.gradle index dfe250a8..7fc9d45a 100644 --- a/build.gradle +++ b/build.gradle @@ -61,6 +61,7 @@ buildConfig { buildConfigField 'boolean', 'LONGEVITY_ENABLED', project.hasProperty('longevity-enabled') ? project.property('longevity-enabled').toString() : 'false' buildConfigField 'long', 'HEARTBEAT_RATE', project.hasProperty('heartbeat-rate') ? project.property('heartbeat-rate').toString() : '60000' buildConfigField 'boolean', 'META_HASHER_ENABLED', project.hasProperty('meta-hasher-enabled') ? project.property('meta-hasher-enabled').toString() : 'true' + buildConfigField 'boolean', 'DISTANCES_ENABLED', project.hasProperty('distances-enabled') ? project.property('distances-enabled').toString() : 'true' buildConfig } diff --git a/src/main/kotlin/app/api/Api.kt b/src/main/kotlin/app/api/Api.kt index 85161b3f..140945ee 100644 --- a/src/main/kotlin/app/api/Api.kt +++ b/src/main/kotlin/app/api/Api.kt @@ -3,13 +3,7 @@ package app.api -import app.model.Author -import app.model.Commit -import app.model.Fact -import app.model.Process -import app.model.ProcessEntry -import app.model.Repo -import app.model.User +import app.model.* interface Api { companion object { @@ -36,4 +30,6 @@ interface Api { fun postAuthors(authorsList: List<Author>): Result<Unit> fun postProcessCreate(requestNumEntries: Int): Result<Process> fun postProcess(processEntries: List<ProcessEntry>): Result<Unit> + fun postAuthorDistances(authorDistanceList: List<AuthorDistance>): + Result<Unit> } diff --git a/src/main/kotlin/app/api/MockApi.kt b/src/main/kotlin/app/api/MockApi.kt index 0a65a9b0..13d86a8a 100644 --- a/src/main/kotlin/app/api/MockApi.kt +++ b/src/main/kotlin/app/api/MockApi.kt @@ -4,13 +4,7 @@ package app.api import app.Logger -import app.model.Author -import app.model.Commit -import app.model.Repo -import app.model.Fact -import app.model.Process -import app.model.ProcessEntry -import app.model.User +import app.model.* class MockApi( // GET requests. var mockUser: User = User(), @@ -25,6 +19,7 @@ class MockApi( // GET requests. var receivedUsers: MutableList<User> = mutableListOf() var receivedProcessCreate: MutableList<Process> = mutableListOf() var receivedProcess: MutableList<Process> = mutableListOf() + var receivedDistances: MutableList<AuthorDistance> = mutableListOf() // DELETE requests. var receivedDeletedCommits: MutableList<Commit> = mutableListOf() @@ -93,4 +88,12 @@ class MockApi( // GET requests. receivedProcess.add(Process(entries = processEntries)) return Result() } + + override fun postAuthorDistances(authorDistanceList: + List<AuthorDistance>): Result<Unit> { + Logger.debug { "MockApi: postAuthorDistances request (${authorDistanceList + .size} distances)" } + receivedDistances.addAll(authorDistanceList) + return Result() + } } diff --git a/src/main/kotlin/app/api/ServerApi.kt b/src/main/kotlin/app/api/ServerApi.kt index ae4cbb40..f1c72cb3 100644 --- a/src/main/kotlin/app/api/ServerApi.kt +++ b/src/main/kotlin/app/api/ServerApi.kt @@ -6,16 +6,7 @@ package app.api import app.BuildConfig import app.Logger import app.config.Configurator -import app.model.Author -import app.model.AuthorGroup -import app.model.Commit -import app.model.CommitGroup -import app.model.Fact -import app.model.FactGroup -import app.model.Process -import app.model.ProcessEntry -import app.model.Repo -import app.model.User +import app.model.* import com.github.kittinunf.fuel.core.FuelManager import com.github.kittinunf.fuel.core.Method import com.github.kittinunf.fuel.core.Request @@ -125,6 +116,13 @@ class ServerApi (private val configurator: Configurator) : Api { .body(process.serialize()) } + private fun createRequestPostAuthorDistances(distances: + AuthorDistanceGroup): + Request { + return post("/distances").header(getContentTypeHeader()) + .body(distances.serialize()) + } + private fun <T> makeRequest(request: Request, requestName: String, parser: (ByteArray) -> T): Result<T> { @@ -214,4 +212,10 @@ class ServerApi (private val configurator: Configurator) : Api { return makeRequest(createRequestPostProcess(process), "postProcess", {}) } + override fun postAuthorDistances(authorDistanceList: List<AuthorDistance>): + Result<Unit> { + val distances = AuthorDistanceGroup(authorDistanceList) + return makeRequest(createRequestPostAuthorDistances(distances), + "postDistances", {}) + } } diff --git a/src/main/kotlin/app/hashers/Colleagues.kt b/src/main/kotlin/app/hashers/Colleagues.kt new file mode 100644 index 00000000..cf94420b --- /dev/null +++ b/src/main/kotlin/app/hashers/Colleagues.kt @@ -0,0 +1,63 @@ +// Copyright 2018 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) + +package app.hashers + +import app.FactCodes +import app.api.Api +import app.model.Author +import app.model.Fact +import app.model.Repo +import io.reactivex.Observable +import java.util.concurrent.TimeUnit + +class AuthorDistanceHasher( + private val serverRepo: Repo, + private val api: Api, + private val emails: HashSet<String>, + private val userEmails: HashSet<String>) { + fun updateFromObservable(observable: Observable<Triple<String, + List<String>, Long>>, onError: (Throwable) -> Unit) { + val authorScores = hashMapOf<String, Double>() + emails.forEach { authorScores[it] = 0.0 } + + // Store the time of the earliest commit for a path by user. + val authorPathLastContribution = hashMapOf<String, Long>() + + observable.subscribe({ (email, paths, time) -> + if (email in userEmails) { + paths.forEach { path -> + authorPathLastContribution[path] = time + } + } + else { + val score = paths + .filter { path -> path in authorPathLastContribution } + .filter { path -> + val authorTime = authorPathLastContribution[path]!! + val timeDelta = TimeUnit.DAYS.convert( + authorTime - time, TimeUnit.SECONDS) + timeDelta < 365 + }.size + authorScores[email] = authorScores[email]!! + score + } + }, onError, { + val stats = mutableListOf<Fact>() + val author = Author(email = userEmails.toList()[0]) + authorScores.forEach { email, value -> + if (email !in userEmails) { + stats.add(Fact(serverRepo, FactCodes.COLLEAGUES, value = + email, value2 = value.toString(), author = author)) + } + } + + postDistancesToServer(stats) + }) + } + + private fun postDistancesToServer(stats: List<Fact>) { + if (stats.isNotEmpty()) { + api.postFacts(stats).onErrorThrow() + } + } +} diff --git a/src/main/kotlin/app/hashers/CommitCrawler.kt b/src/main/kotlin/app/hashers/CommitCrawler.kt index f784e904..3d1e7b1f 100644 --- a/src/main/kotlin/app/hashers/CommitCrawler.kt +++ b/src/main/kotlin/app/hashers/CommitCrawler.kt @@ -251,6 +251,118 @@ object CommitCrawler { }) } + fun getJGitPathsObservable(git: Git, + tail : RevCommit? = null) : + Observable<Triple<String, List<String>, Long>> = Observable.create { + subscriber -> + val repo: Repository = git.repository + val revWalk = RevWalk(repo) + val head: RevCommit = + try { revWalk.parseCommit(getDefaultBranchHead(git)) } + catch(e: Exception) { throw Exception("No head was found!") } + + val df = DiffFormatter(DisabledOutputStream.INSTANCE) + df.setRepository(repo) + df.isDetectRenames = true + + val confTreeWalk = TreeWalk(repo) + confTreeWalk.addTree(head.tree) + confTreeWalk.filter = PathFilter.create(CONF_FILE_PATH) + + var ignoredPaths = + if (confTreeWalk.next()) { + getIgnoredPaths(repo, confTreeWalk.getObjectId(0)) + } + else { + listOf() + } + + var commitCount = 0 + revWalk.markStart(head) + var commit: RevCommit? = revWalk.next() // Move the walker to the head. + while (commit != null && commit != tail) { + commitCount++ + val parentCommit: RevCommit? = revWalk.next() + + // Smart casts are not yet supported for a mutable variable captured + // in an inline lambda, see + // https://youtrack.jetbrains.com/issue/KT-7186. + if (Logger.isTrace) { + val commitName = commit.name + val commitMsg = commit.shortMessage + Logger.trace { "commit: $commitName; '$commitMsg'" } + if (parentCommit != null) { + val parentCommitName = parentCommit.name + val parentCommitMsg = parentCommit.shortMessage + Logger.trace { "parent commit: $parentCommitName; " + + "'$parentCommitMsg'" } + } + else { + Logger.trace { "parent commit: null" } + } + } + + val email = commit.authorIdent.emailAddress.toLowerCase() + + val diffEntries = df.scan(parentCommit, commit) + val paths = diffEntries + .filter { diff -> + diff.changeType != DiffEntry.ChangeType.COPY + } + .filter { diff -> + val path = diff.newPath + for (cnv in VendorConventions) { + if (cnv.containsMatchIn(path)) { + return@filter false + } + } + + val fileId = + if (path != DiffEntry.DEV_NULL) { + diff.newId.toObjectId() + } else { + diff.oldId.toObjectId() + } + val stream = try { + repo.open(fileId).openStream() + } catch (e: Exception) { + null + } + stream != null && !RawText.isBinary(stream) + } + .mapNotNull { diff -> + val filePath = + if (diff.getNewPath() != DiffEntry.DEV_NULL) { + diff.getNewPath() + } else { + diff.getOldPath() + } + + // Update ignored paths list. The config file has retroactive + // force, i.e. if it was added at this commit, then we presume + // it is applied to all commits, preceding this commit. + if (diff.oldPath == CONF_FILE_PATH) { + ignoredPaths = + getIgnoredPaths(repo, diff.newId.toObjectId()) + } + + if (!ignoredPaths.any { path -> + if (path.endsWith("/")) { + filePath.startsWith(path) + } + else { + path == filePath + } + }) {filePath} else null + } + val date = commit.authorIdent.getWhen().time / 1000 + subscriber.onNext(Triple(email, paths, date)) + commit = parentCommit + } + + subscriber.onComplete() + } + private fun getDiffFiles(jgitRepo: Repository, jgitDiffs: List<JgitDiff>) : List<DiffFile> { return jgitDiffs diff --git a/src/main/kotlin/app/hashers/RepoHasher.kt b/src/main/kotlin/app/hashers/RepoHasher.kt index da0ae0c4..a428e7e5 100644 --- a/src/main/kotlin/app/hashers/RepoHasher.kt +++ b/src/main/kotlin/app/hashers/RepoHasher.kt @@ -7,8 +7,6 @@ import app.BuildConfig import app.Logger import app.api.Api import app.config.Configurator -import app.extractors.Extractor -import app.extractors.Heuristics import app.model.Author import app.model.LocalRepo import app.model.ProcessEntry @@ -98,6 +96,12 @@ class RepoHasher(private val api: Api, commitsCount = commitsCount, userEmails = userEmail) } + if (BuildConfig.DISTANCES_ENABLED) { + val userEmails = configurator.getUser().emails.map { it.email }.toHashSet() + val pathsObservable = CommitCrawler.getJGitPathsObservable(git) + AuthorDistanceHasher(serverRepo, api, emails, userEmails) + .updateFromObservable(pathsObservable, onError) + } // Start and synchronously wait until all subscribers complete. Logger.print("Stats computation. May take a while...") diff --git a/src/main/kotlin/app/model/AuthorDistance.kt b/src/main/kotlin/app/model/AuthorDistance.kt new file mode 100644 index 00000000..9fb959a8 --- /dev/null +++ b/src/main/kotlin/app/model/AuthorDistance.kt @@ -0,0 +1,38 @@ +// Copyright 2018 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) + +package app.model + +import app.Protos +import com.google.protobuf.InvalidProtocolBufferException +import java.security.InvalidParameterException + +data class AuthorDistance( + var repo: Repo = Repo(), + var email: String = "", + var score: Double = 0.0 +) { + @Throws(InvalidParameterException::class) + constructor(proto: Protos.AuthorDistance) : this() { + repo = Repo(rehash = proto.repoRehash) + email = proto.email + score = proto.score + } + + @Throws(InvalidProtocolBufferException::class) + constructor(bytes: ByteArray) : this(Protos.AuthorDistance.parseFrom(bytes)) + + constructor(serialized: String) : this(serialized.toByteArray()) + + fun getProto(): Protos.AuthorDistance { + return Protos.AuthorDistance.newBuilder() + .setRepoRehash(repo.rehash) + .setEmail(email) + .setScore(score) + .build() + } + + fun serialize(): ByteArray { + return getProto().toByteArray() + } +} diff --git a/src/main/kotlin/app/model/AuthorDistanceGroup.kt b/src/main/kotlin/app/model/AuthorDistanceGroup.kt new file mode 100644 index 00000000..b6aee1f7 --- /dev/null +++ b/src/main/kotlin/app/model/AuthorDistanceGroup.kt @@ -0,0 +1,33 @@ +// Copyright 2018 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) + +package app.model + +import app.Protos +import com.google.protobuf.InvalidProtocolBufferException +import java.security.InvalidParameterException + +data class AuthorDistanceGroup( + var stats: List<AuthorDistance> = listOf() +) { + @Throws(InvalidParameterException::class) + constructor(proto: Protos.AuthorDistanceGroup) : this() { + stats = proto.authorDistancesList.map { AuthorDistance(it) } + } + + @Throws(InvalidProtocolBufferException::class) + constructor(bytes: ByteArray) : this(Protos.AuthorDistanceGroup.parseFrom + (bytes)) + + constructor(serialized: String) : this(serialized.toByteArray()) + + fun getProto(): Protos.AuthorDistanceGroup { + return Protos.AuthorDistanceGroup.newBuilder() + .addAllAuthorDistances(stats.map { it.getProto() }) + .build() + } + + fun serialize(): ByteArray { + return getProto().toByteArray() + } +} diff --git a/src/main/proto/sourcerer.proto b/src/main/proto/sourcerer.proto index c63d7f65..3aaa2e65 100644 --- a/src/main/proto/sourcerer.proto +++ b/src/main/proto/sourcerer.proto @@ -1,5 +1,6 @@ // Copyright 2017 Sourcerer, Inc. All Rights Reserved. // Author: Anatoly Kislov (anatoly@sourcerer.io) +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) syntax = "proto3"; @@ -155,3 +156,13 @@ message ProcessEntry { uint32 status = 2; uint32 error_code = 3; } + +message AuthorDistance { + string email = 1; + double score = 2; + string repo_rehash = 3; +} + +message AuthorDistanceGroup { + repeated AuthorDistance author_distances = 1; +} diff --git a/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt b/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt new file mode 100644 index 00000000..f06bf9c1 --- /dev/null +++ b/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt @@ -0,0 +1,86 @@ +// Copyright 2018 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) + +package test.tests.hashers + +import app.FactCodes +import app.api.MockApi +import app.hashers.AuthorDistanceHasher +import app.hashers.CommitCrawler +import app.model.Author +import app.model.Fact +import app.model.Repo +import org.eclipse.jgit.api.Git +import org.jetbrains.spek.api.Spek +import org.jetbrains.spek.api.dsl.given +import org.jetbrains.spek.api.dsl.it +import test.utils.TestRepo +import java.io.File +import java.util.* +import kotlin.test.assertTrue +import kotlin.test.fail + +class AuthorDistanceHasherTest : Spek({ + given("repo with a file") { + val testRepoPath = "../author_distance_hasher" + val testRepo = TestRepo(testRepoPath) + val serverRepo = Repo(rehash = "test_repo_rehash") + val api = MockApi(mockRepo = serverRepo) + val fileName = "test1.txt" + val author1 = Author("First Author", "first.author@gmail.com") + val author2 = Author("Second Author", "second.author@gmail.com") + val author3 = Author("Third Author", "third.author@gmail.com") + val emails = hashSetOf(author1.email, author2.email, author3.email) + + testRepo.createFile(fileName, listOf("line1", "line2")) + testRepo.commit(message = "initial commit", + author = author1, + date = Calendar.Builder().setDate(2017, 1, 1).setTimeOfDay + (0, 0, 0).build().time) + + testRepo.deleteLines(fileName, 1, 1) + testRepo.commit(message = "delete second line", + author = author2, + date = Calendar.Builder().setDate(2017, 1, 1).setTimeOfDay + (0, 1, 0).build().time) + + testRepo.deleteLines(fileName, 0, 0) + testRepo.commit(message = "delete first line", + author = author1, + date = Calendar.Builder().setDate(2018, 1, 1).setTimeOfDay + (0, 1, 0).build().time) + testRepo.insertLines(fileName, 0, listOf("line1")) + testRepo.commit(message = "add first line", + author = author3, + date = Calendar.Builder().setDate(2019, 1, 1).setTimeOfDay + (0, 1, 0).build().time) + + val gitHasher = Git.open(File(testRepoPath)) + it("extracts colleagues") { + val observable = CommitCrawler.getJGitPathsObservable(gitHasher) + AuthorDistanceHasher(serverRepo, api, emails, + hashSetOf(author2.email)).updateFromObservable(observable, + onError = { _ -> fail("exception") }) + + assertTrue(api.receivedFacts.contains( + Fact(repo = serverRepo, + code = FactCodes.COLLEAGUES, + author = author2, + value = author1.email, + value2 = (1.0).toString()) + )) + + assertTrue(api.receivedFacts.contains( + Fact(repo = serverRepo, + code = FactCodes.COLLEAGUES, + author = author2, + value = author3.email, + value2 = (0.0).toString()) + )) + } + + afterGroup { + testRepo.destroy() + } + } +}) From 5c23dbd23fdb6f7b6013e84c1ab7ef2eccb51acc Mon Sep 17 00:00:00 2001 From: Liubov Yaronskaya <lyaronskaya@sourcerer.io> Date: Tue, 23 Oct 2018 14:24:43 +0300 Subject: [PATCH 2/5] wip: post distances. --- src/main/kotlin/app/hashers/Colleagues.kt | 11 ++++----- .../tests/hashers/AuthorDistanceHasherTest.kt | 23 ++++++++----------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/main/kotlin/app/hashers/Colleagues.kt b/src/main/kotlin/app/hashers/Colleagues.kt index cf94420b..7e7ff513 100644 --- a/src/main/kotlin/app/hashers/Colleagues.kt +++ b/src/main/kotlin/app/hashers/Colleagues.kt @@ -6,6 +6,7 @@ package app.hashers import app.FactCodes import app.api.Api import app.model.Author +import app.model.AuthorDistance import app.model.Fact import app.model.Repo import io.reactivex.Observable @@ -42,12 +43,10 @@ class AuthorDistanceHasher( authorScores[email] = authorScores[email]!! + score } }, onError, { - val stats = mutableListOf<Fact>() - val author = Author(email = userEmails.toList()[0]) + val stats = mutableListOf<AuthorDistance>() authorScores.forEach { email, value -> if (email !in userEmails) { - stats.add(Fact(serverRepo, FactCodes.COLLEAGUES, value = - email, value2 = value.toString(), author = author)) + stats.add(AuthorDistance(serverRepo, email, value)) } } @@ -55,9 +54,9 @@ class AuthorDistanceHasher( }) } - private fun postDistancesToServer(stats: List<Fact>) { + private fun postDistancesToServer(stats: List<AuthorDistance>) { if (stats.isNotEmpty()) { - api.postFacts(stats).onErrorThrow() + api.postAuthorDistances(stats).onErrorThrow() } } } diff --git a/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt b/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt index f06bf9c1..e0be1b2e 100644 --- a/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt +++ b/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt @@ -8,6 +8,7 @@ import app.api.MockApi import app.hashers.AuthorDistanceHasher import app.hashers.CommitCrawler import app.model.Author +import app.model.AuthorDistance import app.model.Fact import app.model.Repo import org.eclipse.jgit.api.Git @@ -62,21 +63,15 @@ class AuthorDistanceHasherTest : Spek({ hashSetOf(author2.email)).updateFromObservable(observable, onError = { _ -> fail("exception") }) - assertTrue(api.receivedFacts.contains( - Fact(repo = serverRepo, - code = FactCodes.COLLEAGUES, - author = author2, - value = author1.email, - value2 = (1.0).toString()) - )) + assertTrue(api.receivedDistances.contains( + AuthorDistance(repo = serverRepo, + email = author1.email, + score = 1.0))) - assertTrue(api.receivedFacts.contains( - Fact(repo = serverRepo, - code = FactCodes.COLLEAGUES, - author = author2, - value = author3.email, - value2 = (0.0).toString()) - )) + assertTrue(api.receivedDistances.contains( + AuthorDistance(repo = serverRepo, + email = author3.email, + score = 0.0))) } afterGroup { From cfca26921ecb62ffa67d7c2f0208aa33496c89bc Mon Sep 17 00:00:00 2001 From: Liubov Yaronskaya <lyaronskaya@sourcerer.io> Date: Thu, 25 Oct 2018 16:53:28 +0300 Subject: [PATCH 3/5] wip: crawler to extract different data. --- src/main/kotlin/app/hashers/CodeLongevity.kt | 12 +- src/main/kotlin/app/hashers/Colleagues.kt | 13 +- src/main/kotlin/app/hashers/CommitCrawler.kt | 175 +++++------------- src/main/kotlin/app/hashers/RepoHasher.kt | 7 +- .../tests/hashers/AuthorDistanceHasherTest.kt | 5 +- 5 files changed, 69 insertions(+), 143 deletions(-) diff --git a/src/main/kotlin/app/hashers/CodeLongevity.kt b/src/main/kotlin/app/hashers/CodeLongevity.kt index 950a2214..ceda54b1 100644 --- a/src/main/kotlin/app/hashers/CodeLongevity.kt +++ b/src/main/kotlin/app/hashers/CodeLongevity.kt @@ -320,7 +320,7 @@ class CodeLongevity( /** * Scans the repo to extract code line ages. */ - fun updateFromObservable(diffObservable: Observable<JgitPair> = + fun updateFromObservable(diffObservable: Observable<JgitData> = CommitCrawler.getJGitObservable(git), onError: (Throwable) -> Unit = {}, api: Api, @@ -396,7 +396,7 @@ class CodeLongevity( * the revisions of the repo. */ fun getLinesList(tail : RevCommit? = null, - diffObservable: Observable<JgitPair> = + diffObservable: Observable<JgitData> = CommitCrawler.getJGitObservable(git), onError: (Throwable) -> Unit = {}) : List<CodeLine> { val codeLines: MutableList<CodeLine> = mutableListOf() @@ -411,7 +411,7 @@ class CodeLongevity( * the revisions of the repo. */ fun getLinesObservable(tail : RevCommit? = null, - diffObservable: Observable<JgitPair>, + diffObservable: Observable<JgitData>, onError: (Throwable) -> Unit) : Observable<CodeLine> = Observable.create { subscriber -> @@ -448,7 +448,7 @@ class CodeLongevity( // to the diff. Traverse the diffs backwards to handle double // renames properly. // TODO(alex): cover file renames by tests (see APP-132 issue). - for ((diff, editList) in diffs.asReversed()) { + for ((diff, editList) in diffs!!.asReversed()) { val oldPath = diff.oldPath val oldId = diff.oldId.toObjectId() val newPath = diff.newPath @@ -483,7 +483,7 @@ class CodeLongevity( Logger.trace { "ins ($insStart, $insEnd)" } for (idx in insStart until insEnd) { - val from = RevCommitLine(commit, newId, + val from = RevCommitLine(commit!!, newId, newPath, idx, false) try { val to = lines[idx] @@ -514,7 +514,7 @@ class CodeLongevity( val tmpLines = ArrayList<RevCommitLine>(delCount) for (idx in delStart until delEnd) { - tmpLines.add(RevCommitLine(commit, oldId, + tmpLines.add(RevCommitLine(commit!!, oldId, oldPath, idx, true)) } lines.addAll(delStart, tmpLines) diff --git a/src/main/kotlin/app/hashers/Colleagues.kt b/src/main/kotlin/app/hashers/Colleagues.kt index 7e7ff513..644d1bde 100644 --- a/src/main/kotlin/app/hashers/Colleagues.kt +++ b/src/main/kotlin/app/hashers/Colleagues.kt @@ -3,11 +3,8 @@ package app.hashers -import app.FactCodes import app.api.Api -import app.model.Author import app.model.AuthorDistance -import app.model.Fact import app.model.Repo import io.reactivex.Observable import java.util.concurrent.TimeUnit @@ -17,15 +14,18 @@ class AuthorDistanceHasher( private val api: Api, private val emails: HashSet<String>, private val userEmails: HashSet<String>) { - fun updateFromObservable(observable: Observable<Triple<String, - List<String>, Long>>, onError: (Throwable) -> Unit) { + fun updateFromObservable(observable: Observable<JgitData>, onError: (Throwable) + -> Unit) { val authorScores = hashMapOf<String, Double>() emails.forEach { authorScores[it] = 0.0 } // Store the time of the earliest commit for a path by user. val authorPathLastContribution = hashMapOf<String, Long>() - observable.subscribe({ (email, paths, time) -> + observable.subscribe({ + val email = it.email!! + val paths = it.paths!! + val time = it.date!! if (email in userEmails) { paths.forEach { path -> authorPathLastContribution[path] = time @@ -49,7 +49,6 @@ class AuthorDistanceHasher( stats.add(AuthorDistance(serverRepo, email, value)) } } - postDistancesToServer(stats) }) } diff --git a/src/main/kotlin/app/hashers/CommitCrawler.kt b/src/main/kotlin/app/hashers/CommitCrawler.kt index 3d1e7b1f..0a5be12f 100644 --- a/src/main/kotlin/app/hashers/CommitCrawler.kt +++ b/src/main/kotlin/app/hashers/CommitCrawler.kt @@ -1,5 +1,6 @@ // Copyright 2017 Sourcerer Inc. All Rights Reserved. // Author: Anatoly Kislov (anatoly@sourcerer.io) +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) package app.hashers @@ -29,7 +30,12 @@ import org.eclipse.jgit.treewalk.TreeWalk import org.eclipse.jgit.util.io.DisabledOutputStream import java.util.LinkedList -data class JgitPair(val commit: RevCommit, val list: List<JgitDiff>) +data class JgitData(var commit: RevCommit? = null, + var list: List<JgitDiff>? = null, + var paths: List<String>? = null, + var date: Long? = null, + var email: String? = null) + data class JgitDiff(val diffEntry: DiffEntry, val editList: EditList) /** @@ -96,9 +102,14 @@ object CommitCrawler { fun getJGitObservable(git: Git, totalCommitCount: Int = 0, + extractCommit: Boolean = true, + extractDiffs: Boolean = true, + extractPaths: Boolean = false, + extractDate: Boolean = false, + extractEmail: Boolean = false, filteredEmails: HashSet<String>? = null, tail : RevCommit? = null) : - Observable<JgitPair> = Observable.create { subscriber -> + Observable<JgitData> = Observable.create { subscriber -> val repo: Repository = git.repository val revWalk = RevWalk(repo) val head: RevCommit = @@ -156,9 +167,9 @@ object CommitCrawler { commit = parentCommit continue } + val paths = mutableListOf<String>() val diffEntries = df.scan(parentCommit, commit) - val diffEdits = diffEntries .filter { diff -> diff.changeType != DiffEntry.ChangeType.COPY } @@ -199,24 +210,46 @@ object CommitCrawler { getIgnoredPaths(repo, diff.getNewId().toObjectId()) } - !ignoredPaths.any { path -> + if (!ignoredPaths.any { path -> if (path.endsWith("/")) { filePath.startsWith(path) } else { path == filePath } + }) { + paths.add(filePath) + true + } else false + } + + val jgitData = JgitData() + if (extractCommit) { + jgitData.commit = commit + } + if (extractDiffs) { + val diffEdits = diffEntries + .map { diff -> + JgitDiff(diff, df.toFileHeader(diff).toEditList()) } + .filter { diff -> + diff.editList.fold(0) { acc, edit -> + acc + edit.lengthA + edit.lengthB + } < MAX_DIFF_SIZE + } + jgitData.list = diffEdits } - .map { diff -> - JgitDiff(diff, df.toFileHeader(diff).toEditList()) + if (extractPaths) { + jgitData.paths = paths } - .filter { diff -> - diff.editList.fold(0) { acc, edit -> - acc + edit.lengthA + edit.lengthB - } < MAX_DIFF_SIZE + if (extractDate) { + jgitData.date = commit.authorIdent.getWhen().time / 1000 } - subscriber.onNext(JgitPair(commit, diffEdits)) + if (extractEmail) { + jgitData.email = email + } + + subscriber.onNext(jgitData) commit = parentCommit } @@ -229,12 +262,12 @@ object CommitCrawler { } fun getObservable(git: Git, - jgitObservable: Observable<JgitPair>, + jgitObservable: Observable<JgitData>, repo: Repo): Observable<Commit> { - return jgitObservable.map( { (jgitCommit, jgitDiffs) -> + return jgitObservable.map( { (jgitCommit, jgitDiffs, _) -> // Mapping and stats extraction. - val commit = Commit(jgitCommit) - commit.diffs = getDiffFiles(git.repository, jgitDiffs) + val commit = Commit(jgitCommit!!) + commit.diffs = getDiffFiles(git.repository, jgitDiffs!!) // Count lines on all non-binary files. This is additional // statistics to CommitStats because not all file extensions @@ -251,118 +284,6 @@ object CommitCrawler { }) } - fun getJGitPathsObservable(git: Git, - tail : RevCommit? = null) : - Observable<Triple<String, List<String>, Long>> = Observable.create { - subscriber -> - val repo: Repository = git.repository - val revWalk = RevWalk(repo) - val head: RevCommit = - try { revWalk.parseCommit(getDefaultBranchHead(git)) } - catch(e: Exception) { throw Exception("No head was found!") } - - val df = DiffFormatter(DisabledOutputStream.INSTANCE) - df.setRepository(repo) - df.isDetectRenames = true - - val confTreeWalk = TreeWalk(repo) - confTreeWalk.addTree(head.tree) - confTreeWalk.filter = PathFilter.create(CONF_FILE_PATH) - - var ignoredPaths = - if (confTreeWalk.next()) { - getIgnoredPaths(repo, confTreeWalk.getObjectId(0)) - } - else { - listOf() - } - - var commitCount = 0 - revWalk.markStart(head) - var commit: RevCommit? = revWalk.next() // Move the walker to the head. - while (commit != null && commit != tail) { - commitCount++ - val parentCommit: RevCommit? = revWalk.next() - - // Smart casts are not yet supported for a mutable variable captured - // in an inline lambda, see - // https://youtrack.jetbrains.com/issue/KT-7186. - if (Logger.isTrace) { - val commitName = commit.name - val commitMsg = commit.shortMessage - Logger.trace { "commit: $commitName; '$commitMsg'" } - if (parentCommit != null) { - val parentCommitName = parentCommit.name - val parentCommitMsg = parentCommit.shortMessage - Logger.trace { "parent commit: $parentCommitName; " + - "'$parentCommitMsg'" } - } - else { - Logger.trace { "parent commit: null" } - } - } - - val email = commit.authorIdent.emailAddress.toLowerCase() - - val diffEntries = df.scan(parentCommit, commit) - val paths = diffEntries - .filter { diff -> - diff.changeType != DiffEntry.ChangeType.COPY - } - .filter { diff -> - val path = diff.newPath - for (cnv in VendorConventions) { - if (cnv.containsMatchIn(path)) { - return@filter false - } - } - - val fileId = - if (path != DiffEntry.DEV_NULL) { - diff.newId.toObjectId() - } else { - diff.oldId.toObjectId() - } - val stream = try { - repo.open(fileId).openStream() - } catch (e: Exception) { - null - } - stream != null && !RawText.isBinary(stream) - } - .mapNotNull { diff -> - val filePath = - if (diff.getNewPath() != DiffEntry.DEV_NULL) { - diff.getNewPath() - } else { - diff.getOldPath() - } - - // Update ignored paths list. The config file has retroactive - // force, i.e. if it was added at this commit, then we presume - // it is applied to all commits, preceding this commit. - if (diff.oldPath == CONF_FILE_PATH) { - ignoredPaths = - getIgnoredPaths(repo, diff.newId.toObjectId()) - } - - if (!ignoredPaths.any { path -> - if (path.endsWith("/")) { - filePath.startsWith(path) - } - else { - path == filePath - } - }) {filePath} else null - } - val date = commit.authorIdent.getWhen().time / 1000 - subscriber.onNext(Triple(email, paths, date)) - commit = parentCommit - } - - subscriber.onComplete() - } - private fun getDiffFiles(jgitRepo: Repository, jgitDiffs: List<JgitDiff>) : List<DiffFile> { return jgitDiffs diff --git a/src/main/kotlin/app/hashers/RepoHasher.kt b/src/main/kotlin/app/hashers/RepoHasher.kt index a428e7e5..f1ba7ba6 100644 --- a/src/main/kotlin/app/hashers/RepoHasher.kt +++ b/src/main/kotlin/app/hashers/RepoHasher.kt @@ -71,7 +71,7 @@ class RepoHasher(private val api: Api, filteredEmails } else null val jgitObservable = CommitCrawler.getJGitObservable(git, - rehashes.size, crawlerEmails + rehashes.size, filteredEmails = crawlerEmails ).publish() val observable = CommitCrawler.getObservable(git, jgitObservable, serverRepo) @@ -98,7 +98,10 @@ class RepoHasher(private val api: Api, } if (BuildConfig.DISTANCES_ENABLED) { val userEmails = configurator.getUser().emails.map { it.email }.toHashSet() - val pathsObservable = CommitCrawler.getJGitPathsObservable(git) + val pathsObservable = CommitCrawler.getJGitObservable(git, + extractCommit = false, extractDate = true, + extractDiffs = false, extractEmail = true, + extractPaths = true) AuthorDistanceHasher(serverRepo, api, emails, userEmails) .updateFromObservable(pathsObservable, onError) } diff --git a/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt b/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt index e0be1b2e..7b1d0cd0 100644 --- a/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt +++ b/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt @@ -58,7 +58,10 @@ class AuthorDistanceHasherTest : Spek({ val gitHasher = Git.open(File(testRepoPath)) it("extracts colleagues") { - val observable = CommitCrawler.getJGitPathsObservable(gitHasher) + val observable = CommitCrawler.getJGitObservable(gitHasher, + extractCommit = false, extractDate = true, + extractDiffs = false, extractEmail = true, + extractPaths = true) AuthorDistanceHasher(serverRepo, api, emails, hashSetOf(author2.email)).updateFromObservable(observable, onError = { _ -> fail("exception") }) From 4729d06af0010e88e086eeca92df57ffa4e1edd8 Mon Sep 17 00:00:00 2001 From: Anatoly Stansler <anatoly@sourcerer.io> Date: Wed, 31 Oct 2018 13:47:49 +0300 Subject: [PATCH 4/5] chore: style --- src/main/kotlin/app/api/ServerApi.kt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/kotlin/app/api/ServerApi.kt b/src/main/kotlin/app/api/ServerApi.kt index f1c72cb3..891a9909 100644 --- a/src/main/kotlin/app/api/ServerApi.kt +++ b/src/main/kotlin/app/api/ServerApi.kt @@ -117,8 +117,7 @@ class ServerApi (private val configurator: Configurator) : Api { } private fun createRequestPostAuthorDistances(distances: - AuthorDistanceGroup): - Request { + AuthorDistanceGroup): Request { return post("/distances").header(getContentTypeHeader()) .body(distances.serialize()) } From 483bd54daf2d14760377b876cbe46f306af5ae2a Mon Sep 17 00:00:00 2001 From: Anatoly Stansler <anatoly@sourcerer.io> Date: Mon, 12 Nov 2018 18:15:06 +0300 Subject: [PATCH 5/5] chore: style changes --- .../app/hashers/{Colleagues.kt => AuthorDistanceHasher.kt} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/main/kotlin/app/hashers/{Colleagues.kt => AuthorDistanceHasher.kt} (94%) diff --git a/src/main/kotlin/app/hashers/Colleagues.kt b/src/main/kotlin/app/hashers/AuthorDistanceHasher.kt similarity index 94% rename from src/main/kotlin/app/hashers/Colleagues.kt rename to src/main/kotlin/app/hashers/AuthorDistanceHasher.kt index 644d1bde..db213de4 100644 --- a/src/main/kotlin/app/hashers/Colleagues.kt +++ b/src/main/kotlin/app/hashers/AuthorDistanceHasher.kt @@ -14,8 +14,8 @@ class AuthorDistanceHasher( private val api: Api, private val emails: HashSet<String>, private val userEmails: HashSet<String>) { - fun updateFromObservable(observable: Observable<JgitData>, onError: (Throwable) - -> Unit) { + fun updateFromObservable(observable: Observable<JgitData>, + onError: (Throwable) -> Unit) { val authorScores = hashMapOf<String, Double>() emails.forEach { authorScores[it] = 0.0 }