Skip to content
This repository has been archived by the owner on Nov 22, 2022. It is now read-only.

Commit

Permalink
Merge branch 'master' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
Johannes Huber committed Aug 20, 2019
2 parents eeb854b + 0cd94f5 commit d9dd735
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 16 deletions.
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ python:
- "3.6"
- "3.5"

before_install:
- pip install -r requirements-tests-py3.txt

install:
- pip install .
- pip install -r requirements-tests-py3.txt

script: ./tests/mypy_test.py
3 changes: 2 additions & 1 deletion requirements-tests-py3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ git+https://github.com/python/mypy.git@master
typed-ast>=1.0.4
flake8==3.5.0
flake8-bugbear==18.2.0
pyspark>=2.4.0,<3.0.0
# pyspark>=3.0.0dev0,<3.1.0
https://bitbucket.org/zero323/spark-builds/downloads/pyspark-3.0.0.dev0.tar.gz
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def list_packages(src_path=src_path):
setup(
name='pyspark-stubs',
package_dir={'': src_path},
version='2.4.0',
version='3.0.0dev0',
description='A collection of the Apache Spark stub files',
long_description=(open('README.rst').read() if os.path.exists('README.rst')
else ''),
Expand All @@ -33,5 +33,5 @@ def list_packages(src_path=src_path):
package_data={
'': ['*.pyi', 'py.typed']
},
install_requires=['pyspark>=2.4.0,<3.0.0']
install_requires=['pyspark>=3.0.0dev0,<3.1.0']
)
4 changes: 2 additions & 2 deletions third_party/3/pyspark/sql/_typing.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, List, Optional, TypeVar, Union, SupportsFloat
from typing import Any, List, Optional, TypeVar, Union
from typing_extensions import Protocol
import datetime
import decimal
Expand All @@ -7,10 +7,10 @@ import pyspark.sql.column
import pyspark.sql.types

ColumnOrName = Union[pyspark.sql.column.Column, str]
ColumnNameOrFloat = Union[ColumnOrName, SupportsFloat]
DecimalLiteral = decimal.Decimal
DateTimeLiteral = Union[datetime.datetime, datetime.date]
LiteralType = Union[bool, int, float, str]
DataTypeOrString = Union[pyspark.sql.types.DataType, str]

class SupportsOpen(Protocol):
def open(self, partition_id: int, epoch_id: int) -> bool:
Expand Down
7 changes: 7 additions & 0 deletions third_party/3/pyspark/sql/dataframe.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,15 @@ class DataFrame:
def storageLevel(self) -> StorageLevel: ...
def unpersist(self, blocking: bool = ...) -> DataFrame: ...
def coalesce(self, numPartitions: int) -> DataFrame: ...
@overload
def repartition(self, numPartitions: int, *cols: ColumnOrName) -> DataFrame: ...
@overload
def repartition(self, *cols: ColumnOrName) -> DataFrame: ...
@overload
def repartitionByRange(self, numPartitions: int, *cols: ColumnOrName) -> DataFrame: ...
@overload
def repartitionByRange(self, *cols: ColumnOrName) -> DataFrame: ...
def distinct(self) -> DataFrame: ...
def sample(self, withReplacement: Optional[bool], fraction: float, seed: Optional[int] = ...) -> DataFrame: ...
def sampleBy(self, col: str, fractions: float, seed: Optional[int] = ...) -> DataFrame: ...
def randomSplit(self, weights: List[float], seed: Optional[int] = ...) -> List[DataFrame]: ...
Expand Down
35 changes: 25 additions & 10 deletions third_party/3/pyspark/sql/functions.pyi
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Stubs for pyspark.sql.functions (Python 3.5)

from typing import overload
from typing import Any, Optional, Union, Dict, Callable
from typing import Any, Optional, Union, Dict, Callable, SupportsFloat

import pandas.core.frame # type: ignore
import pandas.core.series # type: ignore

from pyspark.sql._typing import ColumnOrName, ColumnNameOrFloat
from pyspark.sql._typing import ColumnOrName, DataTypeOrString
from pyspark.sql.column import Column
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.types import ArrayType, DataType, StructField, StructType
from pyspark.sql.types import ArrayType, DataType, StructType

def approxCountDistinct(col: ColumnOrName, rsd: Optional[float] = ...) -> Column: ...
def approx_count_distinct(col: Column, rsd: Optional[float] = ...) -> Column: ...
Expand Down Expand Up @@ -159,7 +159,12 @@ def asc_nulls_last(col: ColumnOrName) -> Column: ...
def ascii(col: ColumnOrName) -> Column: ...
def asin(col: ColumnOrName) -> Column: ...
def atan(col: ColumnOrName) -> Column: ...
def atan2(col1: ColumnNameOrFloat, col2: ColumnNameOrFloat) -> Column: ...
@overload
def atan2(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
@overload
def atan2(col1: SupportsFloat, col2: ColumnOrName) -> Column: ...
@overload
def atan2(col1: ColumnOrName, col2: SupportsFloat) -> Column: ...
def avg(col: ColumnOrName) -> Column: ...
def base64(col: ColumnOrName) -> Column: ...
def bitwiseNOT(col: ColumnOrName) -> Column: ...
Expand All @@ -181,7 +186,12 @@ def desc_nulls_last(col: ColumnOrName) -> Column: ...
def exp(col: ColumnOrName) -> Column: ...
def expm1(col: ColumnOrName) -> Column: ...
def floor(col: ColumnOrName) -> Column: ...
def hypot(col1: ColumnNameOrFloat, col2: ColumnNameOrFloat) -> Column: ...
@overload
def hypot(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
@overload
def hypot(col1: SupportsFloat, col2: ColumnOrName) -> Column: ...
@overload
def hypot(col1: ColumnOrName, col2: SupportsFloat) -> Column: ...
def kurtosis(col: ColumnOrName) -> Column: ...
def lit(col: Any) -> Column: ...
def log10(col: ColumnOrName) -> Column: ...
Expand All @@ -192,7 +202,12 @@ def max(col: ColumnOrName) -> Column: ...
def mean(col: ColumnOrName) -> Column: ...
def min(col: ColumnOrName) -> Column: ...
def percent_rank() -> Column: ...
def pow(col1: ColumnNameOrFloat, col2: ColumnNameOrFloat) -> Column: ...
@overload
def pow(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
@overload
def pow(col1: SupportsFloat, col2: ColumnOrName) -> Column: ...
@overload
def pow(col1: ColumnOrName, col2: SupportsFloat) -> Column: ...
def radians(col: ColumnOrName) -> Column: ...
def rank() -> Column: ...
def rint(col: ColumnOrName) -> Column: ...
Expand Down Expand Up @@ -222,17 +237,17 @@ def variance(col: ColumnOrName) -> Column: ...
class UserDefinedFunction:
func = ... # type: Callable[..., Any]
returnType = ... # type: DataType
def __init__(self, func: Callable[..., Any], returnType: DataType, name: Optional[str] = ...) -> None: ...
def __init__(self, func: Callable[..., Any], returnType: DataTypeOrString, name: Optional[str] = ...) -> None: ...
def __call__(self, *cols: ColumnOrName) -> Column: ...

def udf(f: Callable[..., Any], returnType: DataType = ...) -> Callable[..., Column]: ...
def udf(f: Callable[..., Any], returnType: DataTypeOrString = ...) -> Callable[..., Column]: ...

class PandasUDFType:
SCALAR: int = ...
GROUPED_MAP: int = ...
GROUPED_AGG: int = ...

@overload
def pandas_udf(f: Callable[..., pandas.core.series.Series], returnType: StructField, functionType: int) -> Callable[..., Column]: ...
def pandas_udf(f: Callable[..., pandas.core.series.Series], returnType: DataTypeOrString, functionType: int = ...) -> Callable[..., Column]: ...
@overload
def pandas_udf(f: Callable[[pandas.core.frame.DataFrame], pandas.core.frame.DataFrame], returnType: StructType, functionType: int) -> Callable[..., Column]: ... # type: ignore
def pandas_udf(f: Callable[[pandas.core.frame.DataFrame], pandas.core.frame.DataFrame], returnType: Union[StructType, str], functionType: int = ...) -> Callable[..., Column]: ... # type: ignore

0 comments on commit d9dd735

Please sign in to comment.