Skip to content

Commit

Permalink
feat: try to use simliar chunk
Browse files Browse the repository at this point in the history
  • Loading branch information
phodal committed Jul 18, 2023
1 parent c75b0bf commit d3734e3
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 31 deletions.
Expand Up @@ -2,13 +2,17 @@ package cc.unitmesh.ide.pycharm.context

import cc.unitmesh.devti.context.ClassContext
import cc.unitmesh.devti.context.builder.ClassContextBuilder
import com.intellij.openapi.diagnostic.Logger
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiNameIdentifierOwner
import com.jetbrains.python.psi.PyClass
import com.jetbrains.python.psi.PyFunction
import com.jetbrains.python.psi.types.TypeEvalContext

class PythonClassContextBuilder : ClassContextBuilder {
companion object {
val logger = Logger.getInstance(PythonClassContextBuilder::class.java)
}
override fun getClassContext(psiElement: PsiElement, gatherUsages: Boolean): ClassContext? {
if (psiElement !is PyClass) {
return null
Expand Down
@@ -1,6 +1,7 @@
package cc.unitmesh.ide.pycharm.provider

import cc.unitmesh.devti.context.ClassContextProvider
import cc.unitmesh.devti.context.FileContextProvider
import cc.unitmesh.devti.context.chunks.SimilarChunksWithPaths
import cc.unitmesh.devti.gui.chat.ChatBotActionType
import cc.unitmesh.devti.provider.ContextPrompter
import com.intellij.openapi.diagnostic.Logger
Expand All @@ -12,8 +13,8 @@ class PythonContextPrompter : ContextPrompter() {
private var selectedText: String = ""
private var file: PsiFile? = null
private var project: Project? = null
private val classProvider = ClassContextProvider(false)
private var lang: String = ""
private val fileContextProvider = FileContextProvider()

override fun initContext(actionType: ChatBotActionType, prefixText: String, file: PsiFile?, project: Project) {
this.action = actionType
Expand All @@ -24,8 +25,10 @@ class PythonContextPrompter : ContextPrompter() {
}

override fun getUIPrompt(): String {
val classInfo = classProvider.from(file!!).toQuery()
val classInfo = fileContextProvider.from(file!!).toQuery()
logger.warn("classInfo: $classInfo")
val chunkContext = SimilarChunksWithPaths().similarChunksWithPaths(file!!).toQuery()
logger.warn("chunkContext: $chunkContext")

return """$action for the code:
$classInfo
Expand All @@ -36,7 +39,7 @@ class PythonContextPrompter : ContextPrompter() {
}

override fun getRequestPrompt(): String {
val classInfo = classProvider.from(file!!).toQuery()
val classInfo = fileContextProvider.from(file!!).toQuery()
logger.warn("classInfo: $classInfo")

return """$action for the code:
Expand Down
Expand Up @@ -3,6 +3,7 @@ package cc.unitmesh.devti.context
import cc.unitmesh.devti.context.builder.ClassContextBuilder
import com.intellij.lang.Language
import com.intellij.lang.LanguageExtension
import com.intellij.openapi.diagnostic.Logger
import com.intellij.psi.PsiElement

class ClassContextProvider(private val gatherUsages: Boolean) {
Expand All @@ -14,6 +15,10 @@ class ClassContextProvider(private val gatherUsages: Boolean) {
providers = registeredLanguages.mapNotNull(languageExtension::forLanguage)
}

companion object {
val logger = Logger.getInstance(ClassContextProvider::class.java)
}

fun from(psiElement: PsiElement): ClassContext {
for (provider in providers) {
val classContext = provider.getClassContext(psiElement, gatherUsages)
Expand Down
@@ -0,0 +1,42 @@
package cc.unitmesh.devti.context.chunks

import cc.unitmesh.devti.context.base.LLMQueryContext
import com.google.gson.Gson
import com.intellij.lang.Commenter
import com.intellij.lang.Language
import com.intellij.lang.LanguageCommenters

class SimilarChunkContext(val language: Language, val paths: List<String>?, val chunks: List<String>?) : LLMQueryContext {
override fun toQuery(): String {
val commenter: Commenter? = LanguageCommenters.INSTANCE.forLanguage(language)
val commentPrefix: String? = commenter?.lineCommentPrefix

if (paths == null || chunks == null) return ""

val filteredPairs = paths.zip(chunks).toList()

val queryBuilder = StringBuilder()
for ((path, chunk) in filteredPairs) {
val commentedCode = commentCode(chunk, commentPrefix)
queryBuilder.append("$commentPrefix Compare this snippet from $commentPrefix:\n")
queryBuilder.append(commentedCode)
queryBuilder.append("\n")
}

return queryBuilder.toString().trim()
}

override fun toJson(): String = Gson().toJson(
mapOf(
"paths" to paths,
"chunks" to chunks
)
)

private fun commentCode(code: String, commentSymbol: String?): String {
if (commentSymbol == null) return code
return code.split("\n").joinToString("\n") {
"$commentSymbol $commentSymbol"
}
}
}
@@ -0,0 +1,88 @@
package cc.unitmesh.devti.context.chunks

import com.intellij.openapi.fileEditor.impl.EditorHistoryManager
import com.intellij.openapi.fileTypes.FileType
import com.intellij.openapi.roots.ProjectFileIndex
import com.intellij.openapi.roots.ProjectRootManager
import com.intellij.openapi.vfs.VfsUtilCore
import com.intellij.openapi.vfs.VirtualFile
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiManager
import java.io.File

class SimilarChunksWithPaths() {
companion object {
val INSTANCE: SimilarChunksWithPaths = SimilarChunksWithPaths()
private const val CHUNK_SIZE = 60
private const val MAX_RELEVANT_FILES = 20
}

fun similarChunksWithPaths(element: PsiElement): SimilarChunkContext {
val mostRecentFiles: List<VirtualFile> = getMostRecentFiles(element)
val mostRecentFilesRelativePaths: List<String> = mostRecentFiles.map { INSTANCE.relativePathTo(it, element)!! }
val chunks: List<List<String>> = extractChunks(element, mostRecentFiles)
val jaccardSimilarities: List<List<Double>> = tokenLevelJaccardSimilarity(chunks, element)
val paths: MutableList<String> = ArrayList()
val chunksList: MutableList<String> = ArrayList()

jaccardSimilarities.forEachIndexed { index, list ->
val maxIndex = list.indexOf(list.maxOrNull()!!)
paths.add(mostRecentFilesRelativePaths[index])
chunksList.add(chunks[index][maxIndex])
}

return SimilarChunkContext(element.language, paths, chunksList)
}

private fun tokenLevelJaccardSimilarity(chunks: List<List<String>>, element: PsiElement): List<List<Double>> {
val currentFileTokens: Set<String> = tokenize(element.containingFile.text).toSet()
return chunks.map { list ->
list.map {
val tokenizedFile: Set<String> = tokenize(it).toSet()
jaccardSimilarity(currentFileTokens, tokenizedFile)
}
}
}

private fun relativePathTo(relativeFile: VirtualFile, element: PsiElement): String? {
val fileIndex: ProjectFileIndex = ProjectRootManager.getInstance(element.project).fileIndex
var contentRoot: VirtualFile? = fileIndex.getContentRootForFile(relativeFile)
if (contentRoot == null) {
contentRoot = fileIndex.getClassRootForFile(relativeFile)
}

return contentRoot?.let { VfsUtilCore.getRelativePath(relativeFile, it, File.separatorChar) }
}

private fun tokenize(chunk: String): List<String> {
return chunk.split(Regex("[^a-zA-Z0-9]"))
.filter { it.isNotBlank() }
}

private fun jaccardSimilarity(set1: Set<String>, set2: Set<String>): Double {
val intersectionSize: Int = set1.intersect(set2).size
val unionSize: Int = set1.union(set2).size
return intersectionSize.toDouble() / unionSize.toDouble()
}

private fun extractChunks(element: PsiElement, mostRecentFiles: List<VirtualFile>): List<List<String>> {
val psiManager: PsiManager = PsiManager.getInstance(element.project)
return mostRecentFiles.mapNotNull { file ->
val psiFile = psiManager.findFile(file)
psiFile?.text?.split("\n", limit = CHUNK_SIZE)?.chunked(CHUNK_SIZE)?.flatten()
}
}

private fun getMostRecentFiles(element: PsiElement): List<VirtualFile> {
val fileType: FileType? = element.containingFile?.fileType
if (element.containingFile == null || fileType == null) {
return emptyList()
}
val recentFiles: List<VirtualFile> = EditorHistoryManager.getInstance(element.project).fileList.filter { file ->
file.isValid && file.fileType == fileType
}
val start = (recentFiles.size - MAX_RELEVANT_FILES + 1).coerceAtLeast(0)
val end = (recentFiles.size - 1).coerceAtLeast(0)
return recentFiles.subList(start, end)
}
}
@@ -1,27 +1 @@
<html>
<body>
Write your description here.
Start the description with a verb in 3rd person singular, like reports, detects, highlights.
In the first sentence, briefly explain what exactly the inspection helps you detect.
Make sure the sentence is not very long and complicated.
<p>
The first sentence must be in a dedicated paragraph separated from the rest of the text. This will make the
description easier to read.
Make sure the description doesn’t just repeat the inspection title.
</p>
<p>
See https://jetbrains.design/intellij/text/inspections/#descriptions for more information.
</p>
<p>
Embed code snippets:
</p>
<pre><code>
// automatically highlighted according to inspection registration 'language' attribute
</code></pre>
<!-- tooltip end -->
<p>Text after this comment will only be shown in the settings of the inspection.</p>

<p>To open related settings directly from the description, add a link with `settings://$` optionally followed by `?$` to
pre-select a UI element.</p>
</body>
</html>
Invoke AutoDev Action in here.

0 comments on commit d3734e3

Please sign in to comment.