Skip to content

Commit

Permalink
[SPARK-2470] PEP8 fixes to join.py
Browse files Browse the repository at this point in the history
  • Loading branch information
nchammas committed Jul 20, 2014
1 parent d14f2f1 commit c85e1e5
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion python/pyspark/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@

from pyspark.resultiterable import ResultIterable


def _do_python_join(rdd, other, numPartitions, dispatch):
vs = rdd.map(lambda (k, v): (k, (1, v)))
ws = other.map(lambda (k, v): (k, (2, v)))
return vs.union(ws).groupByKey(numPartitions).flatMapValues(lambda x : dispatch(x.__iter__()))
return vs.union(ws).groupByKey(numPartitions).flatMapValues(lambda x: dispatch(x.__iter__()))


def python_join(rdd, other, numPartitions):
Expand Down Expand Up @@ -85,6 +86,7 @@ def make_mapper(i):
vrdds = [rdd.map(make_mapper(i)) for i, rdd in enumerate(rdds)]
union_vrdds = reduce(lambda acc, other: acc.union(other), vrdds)
rdd_len = len(vrdds)

def dispatch(seq):
bufs = [[] for i in range(rdd_len)]
for (n, v) in seq:
Expand Down

0 comments on commit c85e1e5

Please sign in to comment.