diff --git a/extensions/ql-vscode/src/log-insights/log-summary.ts b/extensions/ql-vscode/src/log-insights/log-summary.ts index 5fa4bda58b8..6919210f98c 100644 --- a/extensions/ql-vscode/src/log-insights/log-summary.ts +++ b/extensions/ql-vscode/src/log-insights/log-summary.ts @@ -16,7 +16,8 @@ type EvaluationStrategy = | "EXTENSIONAL" | "SENTINEL_EMPTY" | "CACHACA" - | "CACHE_HIT"; + | "CACHE_HIT" + | "NAMED_LOCAL"; interface SummaryEventBase { evaluationStrategy: EvaluationStrategy; @@ -28,6 +29,8 @@ interface SummaryEventBase { interface ResultEventBase extends SummaryEventBase { resultSize: number; + dependencies?: { [key: string]: string }; + mainHash?: string; } export interface ComputeSimple extends ResultEventBase { @@ -60,6 +63,15 @@ export interface InLayer extends ResultEventBase { predicateIterationMillis: number[]; } +interface NamedLocal extends ResultEventBase { + evaluationStrategy: "NAMED_LOCAL"; + deltaSizes: number[]; + ra: Ra; + pipelineRuns: PipelineRun[]; + queryCausingWork?: string; + predicateIterationMillis: number[]; +} + interface ComputedExtensional extends ResultEventBase { evaluationStrategy: "COMPUTED_EXTENSIONAL"; queryCausingWork?: string; @@ -92,4 +104,5 @@ export type SummaryEvent = | Extensional | SentinelEmpty | Cachaca - | CacheHit; + | CacheHit + | NamedLocal; diff --git a/extensions/ql-vscode/src/log-insights/performance-comparison.ts b/extensions/ql-vscode/src/log-insights/performance-comparison.ts index 023d4fa536f..7dfb516b9c3 100644 --- a/extensions/ql-vscode/src/log-insights/performance-comparison.ts +++ b/extensions/ql-vscode/src/log-insights/performance-comparison.ts @@ -1,9 +1,11 @@ +import { createHash } from "crypto"; import type { SummaryEvent } from "./log-summary"; export interface PipelineSummary { steps: string[]; /** Total counts for each step in the RA array, across all iterations */ counts: number[]; + hash: string; } /** @@ -26,6 +28,9 @@ export interface PerformanceComparisonDataFromLog { */ names: string[]; + /** RA hash of the `i`th predicate event */ + raHashes: string[]; + /** Number of milliseconds spent evaluating the `i`th predicate from the `names` array. */ timeCosts: number[]; @@ -52,12 +57,15 @@ export interface PerformanceComparisonDataFromLog { * All the pipeline runs seen for the `i`th predicate from the `names` array. */ pipelineSummaryList: Array<Record<string, PipelineSummary>>; + + /** All dependencies of the `i`th predicate from the `names` array, encoded as a list of indices in `names`. */ + dependencyLists: number[][]; } export class PerformanceOverviewScanner { - private readonly nameToIndex = new Map<string, number>(); private readonly data: PerformanceComparisonDataFromLog = { names: [], + raHashes: [], timeCosts: [], tupleCosts: [], cacheHitIndices: [], @@ -65,28 +73,35 @@ export class PerformanceOverviewScanner { pipelineSummaryList: [], evaluationCounts: [], iterationCounts: [], + dependencyLists: [], }; + private readonly raToIndex = new Map<string, number>(); + private readonly mainHashToRepr = new Map<string, number>(); + private readonly nameToIndex = new Map<string, number>(); - private getPredicateIndex(name: string): number { - const { nameToIndex } = this; - let index = nameToIndex.get(name); + private getPredicateIndex(name: string, ra: string): number { + let index = this.raToIndex.get(ra); if (index === undefined) { - index = nameToIndex.size; - nameToIndex.set(name, index); + index = this.raToIndex.size; + this.raToIndex.set(ra, index); const { names, + raHashes, timeCosts, tupleCosts, iterationCounts, evaluationCounts, pipelineSummaryList, + dependencyLists, } = this.data; names.push(name); + raHashes.push(ra); timeCosts.push(0); tupleCosts.push(0); iterationCounts.push(0); evaluationCounts.push(0); pipelineSummaryList.push({}); + dependencyLists.push([]); } return index; } @@ -96,46 +111,63 @@ export class PerformanceOverviewScanner { } onEvent(event: SummaryEvent): void { - if ( - event.completionType !== undefined && - event.completionType !== "SUCCESS" - ) { + const { completionType, evaluationStrategy, predicateName, raHash } = event; + if (completionType !== undefined && completionType !== "SUCCESS") { return; // Skip any evaluation that wasn't successful } - switch (event.evaluationStrategy) { - case "EXTENSIONAL": + switch (evaluationStrategy) { + case "EXTENSIONAL": { + break; + } case "COMPUTED_EXTENSIONAL": { + if (predicateName.startsWith("cached_")) { + // Add a dependency from a cached COMPUTED_EXTENSIONAL to the predicate with the actual contents. + // The raHash of the this event may appear in a CACHE_HIT event in the other event log. The dependency + // we're adding here is needed in order to associate the original predicate with such a cache hit. + const originalName = predicateName.substring("cached_".length); + const originalIndex = this.nameToIndex.get(originalName); + if (originalIndex != null) { + const index = this.getPredicateIndex(predicateName, raHash); + this.data.dependencyLists[index].push(originalIndex); + } + } break; } case "CACHE_HIT": case "CACHACA": { // Record a cache hit, but only if the predicate has not been seen before. // We're mainly interested in the reuse of caches from an earlier query run as they can distort comparisons. - if (!this.nameToIndex.has(event.predicateName)) { + if (!this.raToIndex.has(raHash)) { this.data.cacheHitIndices.push( - this.getPredicateIndex(event.predicateName), + this.getPredicateIndex(predicateName, raHash), ); } break; } case "SENTINEL_EMPTY": { - this.data.sentinelEmptyIndices.push( - this.getPredicateIndex(event.predicateName), - ); + const index = this.getPredicateIndex(predicateName, raHash); + this.data.sentinelEmptyIndices.push(index); + const sentinelIndex = this.raToIndex.get(event.sentinelRaHash); + if (sentinelIndex != null) { + this.data.dependencyLists[index].push(sentinelIndex); // needed for matching up cache hits + } break; } case "COMPUTE_RECURSIVE": case "COMPUTE_SIMPLE": + case "NAMED_LOCAL": case "IN_LAYER": { - const index = this.getPredicateIndex(event.predicateName); + const index = this.getPredicateIndex(predicateName, raHash); + this.nameToIndex.set(predicateName, index); let totalTime = 0; let totalTuples = 0; - if (event.evaluationStrategy !== "IN_LAYER") { + if (evaluationStrategy === "COMPUTE_SIMPLE") { totalTime += event.millis; } else { - // IN_LAYER events do no record of their total time. - // Make a best-effort estimate by adding up the positive iteration times (they can be negative). + // Make a best-effort estimate of the total time by adding up the positive iteration times (they can be negative). + // Note that for COMPUTE_RECURSIVE the "millis" field contain the total time of the SCC, not just that predicate, + // but we don't have a good way to show that in the UI, so we rely on the accumulated iteration times. for (const millis of event.predicateIterationMillis ?? []) { if (millis > 0) { totalTime += millis; @@ -148,13 +180,16 @@ export class PerformanceOverviewScanner { iterationCounts, evaluationCounts, pipelineSummaryList, + dependencyLists, } = this.data; const pipelineSummaries = pipelineSummaryList[index]; + const dependencyList = dependencyLists[index]; for (const { counts, raReference } of event.pipelineRuns ?? []) { // Get or create the pipeline summary for this RA const pipelineSummary = (pipelineSummaries[raReference] ??= { steps: event.ra[raReference], counts: counts.map(() => 0), + hash: getPipelineHash(event.ra[raReference]), }); const { counts: totalTuplesPerStep } = pipelineSummary; for (let i = 0, length = counts.length; i < length; ++i) { @@ -169,6 +204,25 @@ export class PerformanceOverviewScanner { totalTuplesPerStep[i] += count; } } + for (const dependencyHash of Object.values(event.dependencies ?? {})) { + const dependencyIndex = this.raToIndex.get(dependencyHash); + if (dependencyIndex != null) { + dependencyList.push(dependencyIndex); + } + } + // For predicates in the same SCC, add two-way dependencies with an arbitrary SCC member + const sccHash = + event.mainHash ?? + (evaluationStrategy === "COMPUTE_RECURSIVE" ? raHash : null); + if (sccHash != null) { + const mainIndex = this.mainHashToRepr.get(sccHash); + if (mainIndex == null) { + this.mainHashToRepr.set(sccHash, index); + } else { + dependencyLists[index].push(mainIndex); + dependencyLists[mainIndex].push(index); + } + } timeCosts[index] += totalTime; tupleCosts[index] += totalTuples; iterationCounts[index] += event.pipelineRuns?.length ?? 0; @@ -178,3 +232,11 @@ export class PerformanceOverviewScanner { } } } + +function getPipelineHash(steps: string[]) { + const md5 = createHash("md5"); + for (const step of steps) { + md5.write(step); + } + return md5.digest("base64"); +} diff --git a/extensions/ql-vscode/src/view/compare-performance/ComparePerformance.tsx b/extensions/ql-vscode/src/view/compare-performance/ComparePerformance.tsx index 5708b65faf0..c9fac0a09dc 100644 --- a/extensions/ql-vscode/src/view/compare-performance/ComparePerformance.tsx +++ b/extensions/ql-vscode/src/view/compare-performance/ComparePerformance.tsx @@ -18,7 +18,7 @@ import type { } from "../../log-insights/performance-comparison"; import { formatDecimal } from "../../common/number"; import { styled } from "styled-components"; -import { Codicon, ViewTitle, WarningBox } from "../common"; +import { Codicon, ViewTitle } from "../common"; import { abbreviateRANames, abbreviateRASteps } from "./RAPrettyPrinter"; import { Renaming, RenamingInput } from "./RenamingInput"; @@ -35,6 +35,8 @@ function isPresent<T>(x: Optional<T>): x is T { } interface PredicateInfo { + name: string; + raHash: string; tuples: number; evaluationCount: number; iterationCount: number; @@ -43,23 +45,37 @@ interface PredicateInfo { } class ComparisonDataset { + /** + * Predicates indexed by a key consisting of the name and its pipeline hash. + * Unlike the RA hash, the pipeline hash only depends on the predicate's own pipeline. + */ + public keyToIndex = new Map<string, number>(); + public raToIndex = new Map<string, number>(); public nameToIndex = new Map<string, number>(); public cacheHitIndices: Set<number>; public sentinelEmptyIndices: Set<number>; - constructor(public data: PerformanceComparisonDataFromLog) { - const { names } = data; - const { nameToIndex } = this; + constructor(private data: PerformanceComparisonDataFromLog) { + const { names, raHashes, pipelineSummaryList } = data; + const { keyToIndex, raToIndex, nameToIndex } = this; for (let i = 0; i < names.length; i++) { - nameToIndex.set(names[i], i); + const name = names[i]; + const pipelineHash = getPipelineSummaryHash(pipelineSummaryList[i]); + keyToIndex.set(`${name}@${pipelineHash}`, i); + nameToIndex.set(name, i); + raToIndex.set(raHashes[i], i); } this.cacheHitIndices = new Set(data.cacheHitIndices); this.sentinelEmptyIndices = new Set(data.sentinelEmptyIndices); } - getTupleCountInfo(name: string): Optional<PredicateInfo> { - const { data, nameToIndex, cacheHitIndices, sentinelEmptyIndices } = this; - const index = nameToIndex.get(name); + keys() { + return Array.from(this.keyToIndex.keys()); + } + + getTupleCountInfo(key: string): Optional<PredicateInfo> { + const { data, keyToIndex, cacheHitIndices, sentinelEmptyIndices } = this; + const index = keyToIndex.get(key); if (index == null) { return AbsentReason.NotSeen; } @@ -72,6 +88,8 @@ class ComparisonDataset { } } return { + name: data.names[index], + raHash: data.raHashes[index], evaluationCount: data.evaluationCounts[index], iterationCount: data.iterationCounts[index], timeCost: data.timeCosts[index], @@ -79,6 +97,75 @@ class ComparisonDataset { pipelines: data.pipelineSummaryList[index], }; } + + /** + * Returns the RA hashes of all predicates that were evaluated in this data set, but not seen in `other`, + * because in `other` the dependency upon these predicates was cut off by a cache hit. + * + * For example, suppose predicate `A` depends on `B`, which depends on `C`, and the + * predicates were evaluated in the first log but not the second: + * ``` + * first eval. log second eval. log + * predicate A seen evaluation seen cache hit + * | + * V + * predicate B seen evaluation not seen + * | + * V + * predicate C seen evaluation not seen + * ``` + * + * To ensure a meaningful comparison, we want to omit `predicate A` from the comparison view because of the cache hit. + * + * But predicates B and C did not have a recorded cache hit in the second log, because they were never scheduled for evaluation. + * Given the dependency graph, the most likely explanation is that they would have been evaluated if `A` had not been a cache hit. + * We therefore say that B and C are "shadowed" by the cache hit on A. + * + * The dependency graph is only visible in the first evaluation log, because `B` and `C` do not exist in the second log. + * So to compute this, we use the dependency graph from one log together with the set of cache hits in the other log. + */ + getPredicatesShadowedByCacheHit(other: ComparisonDataset) { + const { + data: { dependencyLists, raHashes, names }, + raToIndex, + } = this; + const cacheHits = new Set<string>(); + + function visit(index: number, raHash: string) { + if (cacheHits.has(raHash)) { + return; + } + cacheHits.add(raHash); + const dependencies = dependencyLists[index]; + for (const dep of dependencies) { + const name = names[dep]; + if (!other.nameToIndex.has(name)) { + visit(dep, raHashes[dep]); + } + } + } + + for (const otherCacheHit of other.cacheHitIndices) { + { + // Look up by RA hash + const raHash = other.data.raHashes[otherCacheHit]; + const ownIndex = raToIndex.get(raHash); + if (ownIndex != null) { + visit(ownIndex, raHash); + } + } + { + // Look up by name + const name = other.data.names[otherCacheHit]; + const ownIndex = this.nameToIndex.get(name); + if (ownIndex != null) { + visit(ownIndex, raHashes[ownIndex]); + } + } + } + + return cacheHits; + } } function renderOptionalValue(x: Optional<number>, unit: string | undefined) { @@ -336,6 +423,7 @@ function HighLevelStats(props: HighLevelStatsProps) { } interface Row { + key: string; name: string; before: Optional<PredicateInfo>; after: Optional<PredicateInfo>; @@ -472,7 +560,7 @@ function ComparePerformanceWithData(props: { const comparison = data?.comparison; - const [hideCacheHits, setHideCacheHits] = useState(false); + const [hideCacheHits, setHideCacheHits] = useState(true); const [sortOrder, setSortOrder] = useState<"delta" | "absDelta">("absDelta"); @@ -480,19 +568,27 @@ function ComparePerformanceWithData(props: { const [isPerEvaluation, setPerEvaluation] = useState(false); - const nameSet = useMemo( - () => union(from.data.names, to.data.names), - [from, to], - ); + const keySet = useMemo(() => union(from.keys(), to.keys()), [from, to]); const hasCacheHitMismatch = useRef(false); + const shadowedCacheHitsFrom = useMemo( + () => + hideCacheHits ? from.getPredicatesShadowedByCacheHit(to) : new Set(), + [from, to, hideCacheHits], + ); + const shadowedCacheHitsTo = useMemo( + () => + hideCacheHits ? to.getPredicatesShadowedByCacheHit(from) : new Set(), + [from, to, hideCacheHits], + ); + const rows: Row[] = useMemo(() => { hasCacheHitMismatch.current = false; - return Array.from(nameSet) - .map((name) => { - const before = from.getTupleCountInfo(name); - const after = to.getTupleCountInfo(name); + return Array.from(keySet) + .map((key) => { + const before = from.getTupleCountInfo(key); + const after = to.getTupleCountInfo(key); const beforeValue = metricGetOptional(metric, before, isPerEvaluation); const afterValue = metricGetOptional(metric, after, isPerEvaluation); if (beforeValue === afterValue) { @@ -507,14 +603,39 @@ function ComparePerformanceWithData(props: { return undefined!; } } + if ( + (isPresent(before) && + !isPresent(after) && + shadowedCacheHitsFrom.has(before.raHash)) || + (isPresent(after) && + !isPresent(before) && + shadowedCacheHitsTo.has(after.raHash)) + ) { + return undefined!; + } const diff = (isPresent(afterValue) ? afterValue : 0) - (isPresent(beforeValue) ? beforeValue : 0); - return { name, before, after, diff } satisfies Row; + const name = isPresent(before) + ? before.name + : isPresent(after) + ? after.name + : key; + return { key, name, before, after, diff } satisfies Row; }) .filter((x) => !!x) .sort(getSortOrder(sortOrder)); - }, [nameSet, from, to, metric, hideCacheHits, sortOrder, isPerEvaluation]); + }, [ + keySet, + from, + to, + metric, + hideCacheHits, + sortOrder, + isPerEvaluation, + shadowedCacheHitsFrom, + shadowedCacheHitsTo, + ]); const { totalBefore, totalAfter, totalDiff } = useMemo(() => { let totalBefore = 0; @@ -575,23 +696,14 @@ function ComparePerformanceWithData(props: { <> <ViewTitle>Performance comparison</ViewTitle> {comparison && hasCacheHitMismatch.current && ( - <WarningBox> - <strong>Inconsistent cache hits</strong> - <br /> - Some predicates had a cache hit on one side but not the other. For - more accurate results, try running the{" "} - <strong>CodeQL: Clear Cache</strong> command before each query. - <br /> - <br /> - <label> - <input - type="checkbox" - checked={hideCacheHits} - onChange={() => setHideCacheHits(!hideCacheHits)} - /> - Hide predicates with cache hits - </label> - </WarningBox> + <label> + <input + type="checkbox" + checked={hideCacheHits} + onChange={() => setHideCacheHits(!hideCacheHits)} + /> + Hide differences due to cache hits + </label> )} <RenamingInput renamings={renamings} setRenamings={setRenamings} /> Compare{" "} @@ -860,3 +972,14 @@ function collatePipelines( function samePipeline(a: string[], b: string[]) { return a.length === b.length && a.every((x, i) => x === b[i]); } + +function getPipelineSummaryHash(pipelines: Record<string, PipelineSummary>) { + // Note: we can't import "crypto" here because it is not available in the browser, + // so we just concatenate the hashes of the individual pipelines. + const keys = Object.keys(pipelines).sort(); + let result = ""; + for (const key of keys) { + result += `${pipelines[key].hash};`; + } + return result; +}