This repository has been archived by the owner on Nov 22, 2022. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 37
/
streaming.pyi
81 lines (74 loc) · 5.38 KB
/
streaming.pyi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Stubs for pyspark.sql.streaming (Python 3.5)
#
from typing import overload
from typing import Any, Callable, Dict, List, Optional, Union
from pyspark.sql._typing import SupportsProcess
from pyspark.sql.context import SQLContext
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.readwriter import OptionUtils
from pyspark.sql.types import Row, StructType
from pyspark.sql.utils import StreamingQueryException
from py4j.java_gateway import JavaObject # type: ignore
class StreamingQuery:
def __init__(self, jsq: JavaObject) -> None: ...
@property
def id(self) -> str: ...
@property
def runId(self) -> str: ...
@property
def name(self) -> str: ...
@property
def isActive(self) -> bool: ...
def awaitTermination(self, timeout: Optional[int] = ...) -> Optional[bool]: ...
@property
def status(self) -> Dict[str, Any]: ...
@property
def recentProgress(self) -> List[Dict[str, Any]]: ...
@property
def lastProgress(self) -> Optional[Dict[str, Any]]: ...
def processAllAvailable(self) -> None: ...
def stop(self) -> None: ...
def explain(self, extended: bool = ...) -> None: ...
def exception(self) -> Optional[StreamingQueryException]: ...
class StreamingQueryManager:
def __init__(self, jsqm: JavaObject) -> None: ...
@property
def active(self) -> List[StreamingQuery]: ...
def get(self, id: str) -> StreamingQuery: ...
def awaitAnyTermination(self, timeout: Optional[int] = ...) -> bool: ...
def resetTerminated(self) -> None: ...
class DataStreamReader(OptionUtils):
def __init__(self, spark: SQLContext) -> None: ...
def format(self, source: str) -> DataStreamReader: ...
def schema(self, schema: Union[StructType, str]) -> DataStreamReader: ...
def option(self, key: str, value: Union[bool, float, int, str]) -> DataStreamReader: ...
def options(self, **options: str) -> DataStreamReader: ...
def load(self, path: Optional[str] = ..., format: Optional[str] = ..., schema: Optional[StructType] = ..., **options: str) -> DataFrame: ...
def json(self, path: str, schema: Optional[str] = ..., primitivesAsString: Optional[Union[bool, str]] = ..., prefersDecimal: Optional[Union[bool, str]] = ..., allowComments: Optional[Union[bool, str]] = ..., allowUnquotedFieldNames: Optional[Union[bool, str]] = ..., allowSingleQuotes: Optional[Union[bool, str]] = ..., allowNumericLeadingZero: Optional[Union[bool, str]] = ..., allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = ..., mode: Optional[str] = ..., columnNameOfCorruptRecord: Optional[str] = ..., dateFormat: Optional[str] = ..., timestampFormat: Optional[str] = ..., multiLine: Optional[Union[bool, str]] = ..., allowUnquotedControlChars: Optional[Union[bool, str]] = ..., lineSep: Optional[str] = ..., locale: Optional[str] = ..., dropFieldIfAllNull: Optional[Union[bool, str]] = ..., encoding: Optional[str] = ...) -> DataFrame: ...
def orc(self, path: str) -> DataFrame: ...
def parquet(self, path: str) -> DataFrame: ...
def text(self, path: str, wholetext: bool = ..., lineSep: Optional[str] = ...) -> DataFrame: ...
def csv(self, path: str, schema: Optional[StructType] = ..., sep: Optional[str] = ..., encoding: Optional[str] = ..., quote: Optional[str] = ..., escape: Optional[str] = ..., comment: Optional[str] = ..., header: Optional[Union[bool, str]] = ..., inferSchema: Optional[Union[bool, str]] = ..., ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = ..., ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = ..., nullValue: Optional[str] = ..., nanValue: Optional[str] = ..., positiveInf: Optional[str] = ..., negativeInf: Optional[str] = ..., dateFormat: Optional[str] = ..., timestampFormat: Optional[str] = ..., maxColumns: Optional[Union[int, str]] = ..., maxCharsPerColumn: Optional[Union[int, str]] = ..., mode: Optional[str] = ..., columnNameOfCorruptRecord: Optional[str] = ..., multiLine: Optional[Union[bool, str]] = ..., charToEscapeQuoteEscaping: Optional[Union[bool, str]] = ..., enforceSchema: Optional[Union[bool, str]] = ..., emptyValue: Optional[str] = ..., locale: Optional[str] = ..., lineSep: Optional[str] = ...) -> DataFrame: ...
class DataStreamWriter:
def __init__(self, df: DataFrame) -> None: ...
def outputMode(self, outputMode: str) -> DataStreamWriter: ...
def format(self, source: str) -> DataStreamWriter: ...
def option(self, key: str, value: Union[bool, float, int, str]) -> DataStreamWriter: ...
def options(self, **options: str) -> DataStreamWriter: ...
@overload
def partitionBy(self, *cols: str) -> DataStreamWriter: ...
@overload
def partitionBy(self, __cols: List[str]) -> DataStreamWriter: ...
def queryName(self, queryName: str) -> DataStreamWriter: ...
@overload
def trigger(self, processingTime: str) -> DataStreamWriter: ...
@overload
def trigger(self, once: bool) -> DataStreamWriter: ...
@overload
def trigger(self, continuous: bool) -> DataStreamWriter: ...
def start(self, path: Optional[str] = ..., format: Optional[str] = ..., outputMode: Optional[str] = ..., partitionBy: Optional[Union[str, List[str]]] = ..., queryName: Optional[str] = ..., **options: str) -> StreamingQuery: ...
@overload
def foreach(self, f: Callable[[Row], None]) -> DataStreamWriter: ...
@overload
def foreach(self, f: SupportsProcess) -> DataStreamWriter: ...
def foreachBatch(self, func: Callable[[DataFrame, int], None]) -> DataStreamWriter: ...