Skip to content

Commit

Permalink
[SPARK-2225] Turn HAVING without GROUP BY into WHERE.
Browse files Browse the repository at this point in the history
@willb

Author: Reynold Xin <rxin@apache.org>

Closes apache#1161 from rxin/having-filter and squashes the following commits:

fa8359a [Reynold Xin] [SPARK-2225] Turn HAVING without GROUP BY into WHERE.
  • Loading branch information
rxin committed Jun 20, 2014
1 parent 171ebb3 commit 0ac71d1
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 23 deletions.
15 changes: 4 additions & 11 deletions sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,6 @@ private[hive] object HiveQl {
withWhere)
}.getOrElse(withWhere)


// The projection of the query can either be a normal projection, an aggregation
// (if there is a group by) or a script transformation.
val withProject = transformation.getOrElse {
Expand All @@ -581,16 +580,10 @@ private[hive] object HiveQl {
val withDistinct =
if (selectDistinctClause.isDefined) Distinct(withProject) else withProject

val withHaving = havingClause.map { h =>

if (groupByClause == None) {
throw new SemanticException("HAVING specified without GROUP BY")
}

val havingExpr = h.getChildren.toSeq match {
case Seq(hexpr) => nodeToExpr(hexpr)
}

val withHaving = havingClause.map { h =>
val havingExpr = h.getChildren.toSeq match { case Seq(hexpr) => nodeToExpr(hexpr) }
// Note that we added a cast to boolean. If the expression itself is already boolean,
// the optimizer will get rid of the unnecessary cast.
Filter(Cast(havingExpr, BooleanType), withDistinct)
}.getOrElse(withDistinct)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,27 +227,22 @@ class HiveQuerySuite extends HiveComparisonTest {
test("SPARK-2180: HAVING support in GROUP BY clauses (positive)") {
val fixture = List(("foo", 2), ("bar", 1), ("foo", 4), ("bar", 3))
.zipWithIndex.map {case Pair(Pair(value, attr), key) => HavingRow(key, value, attr)}

TestHive.sparkContext.parallelize(fixture).registerAsTable("having_test")

val results =
hql("SELECT value, max(attr) AS attr FROM having_test GROUP BY value HAVING attr > 3")
.collect()
.map(x => Pair(x.getString(0), x.getInt(1)))

assert(results === Array(Pair("foo", 4)))

TestHive.reset()
}

test("SPARK-2180: HAVING without GROUP BY raises exception") {
intercept[Exception] {
hql("SELECT value, attr FROM having_test HAVING attr > 3")
}
}

test("SPARK-2180: HAVING with non-boolean clause raises no exceptions") {
val results = hql("select key, count(*) c from src group by key having c").collect()
test("SPARK-2180: HAVING with non-boolean clause raises no exceptions") {
hql("select key, count(*) c from src group by key having c").collect()
}

test("SPARK-2225: turn HAVING without GROUP BY into a simple filter") {
assert(hql("select key from src having key > 490").collect().size < 100)
}

test("Query Hive native command execution result") {
Expand Down

0 comments on commit 0ac71d1

Please sign in to comment.