Skip to content

Commit

Permalink
Make TiSpark's Explain clearer and easier to read (pingcap#2439)
Browse files Browse the repository at this point in the history
  • Loading branch information
qidi1 committed Jul 20, 2022
1 parent a314f90 commit 86bc504
Show file tree
Hide file tree
Showing 15 changed files with 986 additions and 404 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/alter-primary-key-false-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: alter-primary-key-false-test

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
test:
runs-on: ubuntu-latest
name: Java adopt sample
steps:

- name: checkout
uses: actions/checkout@v2

- name: set up JDK
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
cache: maven

- name: add host and copy properties
run: |
echo -e "127.0.0.1 pd0 \n127.0.0.1 tikv0" | sudo tee -a /etc/hosts
sudo cp -r config /config
sed -i 's/^alter-primary-key.*/alter-primary-key=false/g' ./config/tidb-4.0.toml
echo "spark.sql.catalog.tidb_catalog=org.apache.spark.sql.catalyst.catalog.TiCatalog" > tidb_config.properties
mv tidb_config.properties core/src/test/resources/tidb_config.properties
- name: build docker
run: docker-compose -f docker-compose-4.0.yaml up -d

- name: build
run: mvn clean package -Dmaven.test.skip=true -B

- name: test
run: mvn test -am -pl core -Dtest=moo -DwildcardSuites=org.apache.spark.sql.catalyst.plans.logical.LogicalPlanTestSuite -DfailIfNoTests=false
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ case class ColumnarRegionTaskExec(
override def simpleString(maxFields: Int): String = verboseString(maxFields)

override def verboseString(maxFields: Int): String =
s"TiSpark $nodeName{downgradeThreshold=$downgradeThreshold,downgradeFilter=${dagRequest.getFilters}"
s"TiSpark $nodeName{downgradeThreshold=$downgradeThreshold,downgradeFilter=${dagRequest.getDowngradeFilters}"

private def inputRDD(): RDD[InternalRow] = {
val numOutputRows = longMetric("numOutputRows")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class InvisibleIndexSuite extends BasePlanTest {
"insert into t_invisible_index values(1, 1),(2, 2),(3, 3),(4, 4),(5, 5),(6, 6)")
tidbStmt.execute("analyze table t_invisible_index")
val df = spark.sql("select * from t_invisible_index where a = 1")
checkIsIndexScan(df, "t_invisible_index")
checkIsIndexLookUp(df, "t_invisible_index")
checkIndex(df, "idx_a")
}

Expand All @@ -65,7 +65,7 @@ class InvisibleIndexSuite extends BasePlanTest {
tidbStmt.execute("analyze table t_invisible_index")
val df = spark.sql("select * from t_invisible_index where a = 1")
intercept[TestFailedException] {
checkIsIndexScan(df, "t_invisible_index")
checkIsIndexLookUp(df, "t_invisible_index")
checkIndex(df, "idx_a")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package org.apache.spark.sql.catalyst.plans

import com.pingcap.tikv.meta.TiDAGRequest.IndexScanType
import com.pingcap.tikv.meta.TiDAGRequest.ScanType
import com.pingcap.tikv.meta.{TiDAGRequest, TiIndexInfo}
import org.apache.spark.sql.execution.{ColumnarCoprocessorRDD, ColumnarRegionTaskExec, SparkPlan}
import org.apache.spark.sql.{BaseTiSparkTest, Dataset}
Expand All @@ -32,24 +32,38 @@ class BasePlanTest extends BaseTiSparkTest {
case plan: ColumnarCoprocessorRDD => plan
case plan: ColumnarRegionTaskExec => plan
}
val extractDAGRequest: PartialFunction[SparkPlan, TiDAGRequest] = {
case plan: ColumnarRegionTaskExec => plan.dagRequest
case plan: ColumnarCoprocessorRDD => plan.dagRequest
val extractDAGRequest: PartialFunction[SparkPlan, Seq[TiDAGRequest]] = {
case plan: ColumnarRegionTaskExec => {
List(plan.dagRequest)
}
case plan: ColumnarCoprocessorRDD => {
plan.tiRDDs.map(x => {
x.dagRequest
})
}
}

def explain[T](df: Dataset[T]): Unit = df.explain

def extractDAGRequests[T](df: Dataset[T]): Seq[TiDAGRequest] =
toPlan(df).collect { extractDAGRequest }
toPlan(df).collect {
extractDAGRequest
}.flatten

def extractTiSparkPlans[T](df: Dataset[T]): Seq[SparkPlan] =
toPlan(df).collect { extractTiSparkPlan }
toPlan(df).collect {
extractTiSparkPlan
}

def extractCoprocessorRDDs[T](df: Dataset[T]): Seq[ColumnarCoprocessorRDD] =
toPlan(df).collect { extractCoprocessorRDD }
toPlan(df).collect {
extractCoprocessorRDD
}

def extractRegionTaskExecs[T](df: Dataset[T]): List[ColumnarRegionTaskExec] =
toPlan(df).collect { extractRegionTaskExec }.toList
toPlan(df).collect {
extractRegionTaskExec
}.toList

def checkIndex[T](df: Dataset[T], index: String): Unit = {
if (!extractCoprocessorRDDs(df).exists(checkIndexName(_, index))) {
Expand All @@ -64,41 +78,38 @@ class BasePlanTest extends BaseTiSparkTest {
private def extractIndexInfo(coprocessorRDD: ColumnarCoprocessorRDD): TiIndexInfo =
coprocessorRDD.dagRequest.getIndexInfo

def checkIsTableScan[T](df: Dataset[T], tableName: String): Unit =
checkIndexScanType(df, tableName, IndexScanType.TABLE_SCAN)
def checkIsTableReader[T](df: Dataset[T], tableName: String): Unit =
checkScanType(df, tableName, ScanType.TABLE_READER)

private def checkIndexScanType[T](
df: Dataset[T],
tableName: String,
indexScanType: IndexScanType): Unit = {
private def checkScanType[T](df: Dataset[T], tableName: String, scanType: ScanType): Unit = {
val tiSparkPlans = extractTiSparkPlans(df)
if (tiSparkPlans.isEmpty) {
fail(df, "No TiSpark plans found in Dataset")
}
val filteredRequests = tiSparkPlans.collect { extractDAGRequest }.filter {
val filteredRequests = tiSparkPlans.collect { extractDAGRequest }.flatten.filter {
_.getTableInfo.getName.equalsIgnoreCase(tableName)
}
if (filteredRequests.isEmpty) {
fail(df, s"No TiSpark plan contains desired table $tableName")
} else if (!tiSparkPlans.exists(checkIndexScanType(_, indexScanType))) {
} else if (!tiSparkPlans.exists(checkScanType(_, scanType))) {
fail(
df,
s"Index scan type not match: ${filteredRequests.head.getIndexScanType}, expected $indexScanType")
s"Index scan type not match: ${filteredRequests.head.getScanType}, expected $scanType")
}
}

private def checkIndexScanType(plan: SparkPlan, indexScanType: IndexScanType): Boolean =
private def checkScanType(plan: SparkPlan, scanType: ScanType): Boolean =
plan match {
case p: ColumnarCoprocessorRDD => getIndexScanType(p).equals(indexScanType)
case p: ColumnarCoprocessorRDD => getScanType(p).equals(scanType)
case _ => false
}

private def getIndexScanType(coprocessorRDD: ColumnarCoprocessorRDD): IndexScanType = {
getIndexScanType(coprocessorRDD.dagRequest)
private def getScanType(coprocessorRDD: ColumnarCoprocessorRDD): ScanType = {
getScanType(coprocessorRDD.dagRequest)
}

private def getIndexScanType(dagRequest: TiDAGRequest): IndexScanType = {
dagRequest.getIndexScanType
private def getScanType(dagRequest: TiDAGRequest): ScanType = {
dagRequest.getScanType
}

/**
Expand All @@ -109,19 +120,19 @@ class BasePlanTest extends BaseTiSparkTest {
fail(message)
}

def checkIsCoveringIndexScan[T](df: Dataset[T], tableName: String): Unit =
checkIndexScanType(df, tableName, IndexScanType.COVERING_INDEX_SCAN)
def checkIsIndexReader[T](df: Dataset[T], tableName: String): Unit =
checkScanType(df, tableName, ScanType.INDEX_READER)

def checkIsIndexScan[T](df: Dataset[T], tableName: String): Unit =
checkIndexScanType(df, tableName, IndexScanType.INDEX_SCAN)
def checkIsIndexLookUp[T](df: Dataset[T], tableName: String): Unit =
checkScanType(df, tableName, ScanType.INDEX_LOOKUP)

def checkEstimatedRowCount[T](df: Dataset[T], tableName: String, answer: Double): Unit = {
val estimatedRowCount = getEstimatedRowCount(df, tableName)
assert(estimatedRowCount === answer)
}

def getEstimatedRowCount[T](df: Dataset[T], tableName: String): Double =
extractTiSparkPlans(df).collect { extractDAGRequest }.head.getEstimatedCount
extractDAGRequests(df).head.getEstimatedCount

def toPlan[T](df: Dataset[T]): SparkPlan = df.queryExecution.sparkPlan

Expand Down

0 comments on commit 86bc504

Please sign in to comment.