In [1]:
!conda env list


# conda environments:
#
base                   C:\anaconda3
pyspark_env            C:\anaconda3\envs\pyspark_env



In [3]:
!conda activate pyspark_env


In [5]:
!pip install pyspark

Collecting pyspark
  Downloading pyspark-4.0.0.tar.gz (434.1 MB)
     ---------------------------------------- 0.0/434.1 MB ? eta -:--:--
     ---------------------------------------- 2.1/434.1 MB 9.8 MB/s eta 0:00:45
     ---------------------------------------- 3.9/434.1 MB 9.0 MB/s eta 0:00:48
      --------------------------------------- 5.8/434.1 MB 8.8 MB/s eta 0:00:49
      --------------------------------------- 7.3/434.1 MB 8.9 MB/s eta 0:00:49
      --------------------------------------- 8.7/434.1 MB 8.1 MB/s eta 0:00:53
      --------------------------------------- 9.4/434.1 MB 7.3 MB/s eta 0:00:58
      -------------------------------------- 10.7/434.1 MB 7.0 MB/s eta 0:01:01
     - ------------------------------------- 11.5/434.1 MB 6.8 MB/s eta 0:01:03
     - ------------------------------------- 12.8/434.1 MB 6.5 MB/s eta 0:01:05
     - ------------------------------------- 13.6/434.1 MB 6.3 MB/s eta 0:01:07
     - ------------------------------------- 15.2/434.1 MB 6.3

In [1]:
import pyspark

In [2]:
import pandas as pd
df = pd.read_csv('test1.csv')
df

Unnamed: 0,Name,age,Experience,Salary
0,Krish,31,10,30000
1,Sudhanshu,30,8,25000
2,Sunny,29,4,20000
3,Paul,24,3,20000
4,Harsha,21,1,15000
5,Shubham,23,2,18000


# Creating spark session
- To work with pyspark, we need to first create a spark session.
- Lets create a spark session.

In [3]:
from pyspark.sql import SparkSession

In [5]:
spark = SparkSession.builder.appName('Practice').getOrCreate()

In [6]:
spark

# Reading data:
- Now that we have the spark object, we can read data using it.
- We have read object which has different ways to read data.

In [7]:
df_spark = spark.read.csv('test1.csv')
df_spark

DataFrame[_c0: string, _c1: string, _c2: string, _c3: string]

In [8]:
df_spark.show()

+---------+---+----------+------+
|      _c0|_c1|       _c2|   _c3|
+---------+---+----------+------+
|     Name|age|Experience|Salary|
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
+---------+---+----------+------+



In [9]:
# using header row.

In [10]:
df_spark = spark.read.option('header',True).csv('test1.csv')
df_spark

DataFrame[Name: string, age: string, Experience: string, Salary: string]

In [11]:
df_spark.show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
+---------+---+----------+------+



# lets look at the type

In [12]:
type(df_spark)

pyspark.sql.classic.dataframe.DataFrame

# head function

In [13]:
df_spark.head(5) # note: we need to provide the number of rows we want it to display

[Row(Name='Krish', age='31', Experience='10', Salary='30000'),
 Row(Name='Sudhanshu', age='30', Experience='8', Salary='25000'),
 Row(Name='Sunny', age='29', Experience='4', Salary='20000'),
 Row(Name='Paul', age='24', Experience='3', Salary='20000'),
 Row(Name='Harsha', age='21', Experience='1', Salary='15000')]

# schema

In [14]:
df_spark.printSchema()

root
 |-- Name: string (nullable = true)
 |-- age: string (nullable = true)
 |-- Experience: string (nullable = true)
 |-- Salary: string (nullable = true)



- PySpark Dataframe
- Reading The Dataset
- Checking the Datatypes of the Column(Schema)
- Selecting Columns And Indexing
- Check Describe option similar to Pandas
- Adding Columns
- Dropping Columns
- Renaming column

In [15]:
df_pyspark = spark.read.option('header', 'true').csv('test1.csv', inferSchema = True)

# note inferSchema will detect the datatypes automatically

In [16]:
df_pyspark

DataFrame[Name: string, age: int, Experience: int, Salary: int]

In [17]:
df_pyspark.printSchema()

root
 |-- Name: string (nullable = true)
 |-- age: integer (nullable = true)
 |-- Experience: integer (nullable = true)
 |-- Salary: integer (nullable = true)



In [18]:
# we can do the same in the following way

df_pyspark = spark.read.csv('test1.csv', inferSchema = True, header = True)

In [19]:
df_pyspark.printSchema()

root
 |-- Name: string (nullable = true)
 |-- age: integer (nullable = true)
 |-- Experience: integer (nullable = true)
 |-- Salary: integer (nullable = true)



In [20]:
df_pyspark.show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
+---------+---+----------+------+



In [21]:
# working with columns

In [22]:
df_pyspark.columns

['Name', 'age', 'Experience', 'Salary']

In [23]:
# getting a column

In [24]:
df_pyspark.select('name')

DataFrame[name: string]

In [25]:
df_pyspark.select('name').show()

+---------+
|     name|
+---------+
|    Krish|
|Sudhanshu|
|    Sunny|
|     Paul|
|   Harsha|
|  Shubham|
+---------+



In [26]:
df_pyspark.select(['name', 'Experience'])

DataFrame[name: string, Experience: int]

In [27]:
df_pyspark.select(['name', 'Experience']).show()

+---------+----------+
|     name|Experience|
+---------+----------+
|    Krish|        10|
|Sudhanshu|         8|
|    Sunny|         4|
|     Paul|         3|
|   Harsha|         1|
|  Shubham|         2|
+---------+----------+



In [28]:
df_pyspark['Name'] # this only tells us that it is a column. we will not be able to get the data using this.

Column<'Name'>

In [29]:
#dtypes

In [30]:
df_pyspark.dtypes

[('Name', 'string'), ('age', 'int'), ('Experience', 'int'), ('Salary', 'int')]

In [31]:
# describe function

In [32]:
df_pyspark.describe().show()

+-------+------+------------------+-----------------+------------------+
|summary|  Name|               age|       Experience|            Salary|
+-------+------+------------------+-----------------+------------------+
|  count|     6|                 6|                6|                 6|
|   mean|  NULL|26.333333333333332|4.666666666666667|21333.333333333332|
| stddev|  NULL| 4.179314138308661|3.559026084010437| 5354.126134736337|
|    min|Harsha|                21|                1|             15000|
|    max| Sunny|                31|               10|             30000|
+-------+------+------------------+-----------------+------------------+



In [33]:
# adding columns

In [34]:
new_df = df_pyspark.withColumn('Experience After 2 Years', df_pyspark['Experience'] + 2)

In [35]:
new_df.show()

+---------+---+----------+------+------------------------+
|     Name|age|Experience|Salary|Experience After 2 Years|
+---------+---+----------+------+------------------------+
|    Krish| 31|        10| 30000|                      12|
|Sudhanshu| 30|         8| 25000|                      10|
|    Sunny| 29|         4| 20000|                       6|
|     Paul| 24|         3| 20000|                       5|
|   Harsha| 21|         1| 15000|                       3|
|  Shubham| 23|         2| 18000|                       4|
+---------+---+----------+------+------------------------+



In [36]:
# Dropping column

In [37]:
new_df = new_df.drop('Experience After 2 Years')

In [38]:
new_df.show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
+---------+---+----------+------+



In [39]:
# Renaming column
new_df = new_df.withColumnRenamed('name', 'New Name')

In [40]:
new_df.show()

+---------+---+----------+------+
| New Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
+---------+---+----------+------+



In [None]:
new_df = df_pyspark.drop()

# Pyspark Mandling missing values
- Dropping Columns
- Dropping Rows
- Various Parameter in Dropping functionalities
- Handling Missing values by Mean, Median and mode

In [41]:
df_pyspark = spark.read.csv('test2.csv', header = True, inferSchema = True)

In [42]:
df_pyspark

DataFrame[Name: string, age: int, Experience: int, Salary: int]

In [43]:
df_pyspark.show()

+---------+----+----------+------+
|     Name| age|Experience|Salary|
+---------+----+----------+------+
|    Krish|  31|        10| 30000|
|Sudhanshu|  30|         8| 25000|
|    Sunny|  29|         4| 20000|
|     Paul|  24|         3| 20000|
|   Harsha|  21|         1| 15000|
|  Shubham|  23|         2| 18000|
|   Mahesh|NULL|      NULL| 40000|
|     NULL|  34|        10| 38000|
|     NULL|  36|      NULL|  NULL|
+---------+----+----------+------+



In [44]:
# dropping the columns
df_pyspark.drop('Name').show()

+----+----------+------+
| age|Experience|Salary|
+----+----------+------+
|  31|        10| 30000|
|  30|         8| 25000|
|  29|         4| 20000|
|  24|         3| 20000|
|  21|         1| 15000|
|  23|         2| 18000|
|NULL|      NULL| 40000|
|  34|        10| 38000|
|  36|      NULL|  NULL|
+----+----------+------+



 - drop all nulls.

In [47]:
df_pyspark.na.drop().show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
+---------+---+----------+------+



- how how = any/all parameter
- when using how = 'all' , the data will be deleted only when all the columns are nulls.
- the default value for the how is any. it will delete rows if there is any nulls.

In [50]:
df_pyspark.na.drop(how = 'all').show()

+---------+----+----------+------+
|     Name| age|Experience|Salary|
+---------+----+----------+------+
|    Krish|  31|        10| 30000|
|Sudhanshu|  30|         8| 25000|
|    Sunny|  29|         4| 20000|
|     Paul|  24|         3| 20000|
|   Harsha|  21|         1| 15000|
|  Shubham|  23|         2| 18000|
|   Mahesh|NULL|      NULL| 40000|
|     NULL|  34|        10| 38000|
|     NULL|  36|      NULL|  NULL|
+---------+----+----------+------+



- threshold thresh = 2
- It will keep all the rows with the number or greater number of non-null values as the threshold.
- In the following Example, it will keep all the roww with a minimum of 2 non null values in the row.

In [53]:
df_pyspark.na.drop(thresh = 2).show()

+---------+----+----------+------+
|     Name| age|Experience|Salary|
+---------+----+----------+------+
|    Krish|  31|        10| 30000|
|Sudhanshu|  30|         8| 25000|
|    Sunny|  29|         4| 20000|
|     Paul|  24|         3| 20000|
|   Harsha|  21|         1| 15000|
|  Shubham|  23|         2| 18000|
|   Mahesh|NULL|      NULL| 40000|
|     NULL|  34|        10| 38000|
+---------+----+----------+------+



- in the following example, it keeps all the rows with atleast 1 non null values

In [54]:
df_pyspark.na.drop(thresh = 1).show()

+---------+----+----------+------+
|     Name| age|Experience|Salary|
+---------+----+----------+------+
|    Krish|  31|        10| 30000|
|Sudhanshu|  30|         8| 25000|
|    Sunny|  29|         4| 20000|
|     Paul|  24|         3| 20000|
|   Harsha|  21|         1| 15000|
|  Shubham|  23|         2| 18000|
|   Mahesh|NULL|      NULL| 40000|
|     NULL|  34|        10| 38000|
|     NULL|  36|      NULL|  NULL|
+---------+----+----------+------+



- subset subset = [col1, col2...]
- it tells if we want to check the null values only in specific columns.
- In the following example, we are removing nulls available only in Experience column

In [55]:
df_pyspark.na.drop(subset=['Experience']).show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
|     NULL| 34|        10| 38000|
+---------+---+----------+------+



# filling missing values
- value
- subset

- note - na.fill will only replace the null with the anoter value depending on the value provided.
- if we provide 'Missing Values', then it will only replace in string.
- if we provide -1, then it will repace in numbers columns.

In [59]:
df_pyspark.na.fill('Missing Values').show()

+--------------+----+----------+------+
|          Name| age|Experience|Salary|
+--------------+----+----------+------+
|         Krish|  31|        10| 30000|
|     Sudhanshu|  30|         8| 25000|
|         Sunny|  29|         4| 20000|
|          Paul|  24|         3| 20000|
|        Harsha|  21|         1| 15000|
|       Shubham|  23|         2| 18000|
|        Mahesh|NULL|      NULL| 40000|
|Missing Values|  34|        10| 38000|
|Missing Values|  36|      NULL|  NULL|
+--------------+----+----------+------+



In [60]:
df_pyspark.na.fill(-1).show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
|   Mahesh| -1|        -1| 40000|
|     NULL| 34|        10| 38000|
|     NULL| 36|        -1|    -1|
+---------+---+----------+------+



- if we need to replace in multiple columns, we can provide a dictionary with different column names and its replacement value

In [61]:
df_pyspark.na.fill({
    'Name': 'Missing Value',
    'Experience' : -1,
    'age': -1,
    'Salary': -1
}).show()

+-------------+---+----------+------+
|         Name|age|Experience|Salary|
+-------------+---+----------+------+
|        Krish| 31|        10| 30000|
|    Sudhanshu| 30|         8| 25000|
|        Sunny| 29|         4| 20000|
|         Paul| 24|         3| 20000|
|       Harsha| 21|         1| 15000|
|      Shubham| 23|         2| 18000|
|       Mahesh| -1|        -1| 40000|
|Missing Value| 34|        10| 38000|
|Missing Value| 36|        -1|    -1|
+-------------+---+----------+------+



In [63]:
df_pyspark.printSchema()


root
 |-- Name: string (nullable = true)
 |-- age: integer (nullable = true)
 |-- Experience: integer (nullable = true)
 |-- Salary: integer (nullable = true)



- Replacing with mean using imputer function
- pyspark comes with an Imputer class which helps to impute data.
- it has inputCols and outputCols
- the setStragegy takes the the type of imputation that we want to perform. (mean/median/mode)
- we can then use the imputer object to perform fit and then transform the data.
- As it is an ml model, we need to fit the data and then transform the data.

In [67]:
from pyspark.ml.feature import Imputer

inputCols = ['age', 'Experience', 'Salary']
outputCols = [f'{col}_imputed' for col in inputCols]
imputer = Imputer(
    inputCols = inputCols,
    outputCols = outputCols,
).setStrategy('mean')

In [68]:
imputer.fit(df_pyspark).transform(df_pyspark).show()

+---------+----+----------+------+-----------+------------------+--------------+
|     Name| age|Experience|Salary|age_imputed|Experience_imputed|Salary_imputed|
+---------+----+----------+------+-----------+------------------+--------------+
|    Krish|  31|        10| 30000|         31|                10|         30000|
|Sudhanshu|  30|         8| 25000|         30|                 8|         25000|
|    Sunny|  29|         4| 20000|         29|                 4|         20000|
|     Paul|  24|         3| 20000|         24|                 3|         20000|
|   Harsha|  21|         1| 15000|         21|                 1|         15000|
|  Shubham|  23|         2| 18000|         23|                 2|         18000|
|   Mahesh|NULL|      NULL| 40000|         28|                 5|         40000|
|     NULL|  34|        10| 38000|         34|                10|         38000|
|     NULL|  36|      NULL|  NULL|         36|                 5|         25750|
+---------+----+----------+-

In [69]:
# replacing with median

In [70]:
from pyspark.ml.feature import Imputer

inputCols = ['age', 'Experience', 'Salary']
outputCols = [f'{col}_imputed' for col in inputCols]
imputer = Imputer(
    inputCols = inputCols,
    outputCols = outputCols,
).setStrategy('median')

In [71]:
imputer.fit(df_pyspark).transform(df_pyspark).show()

+---------+----+----------+------+-----------+------------------+--------------+
|     Name| age|Experience|Salary|age_imputed|Experience_imputed|Salary_imputed|
+---------+----+----------+------+-----------+------------------+--------------+
|    Krish|  31|        10| 30000|         31|                10|         30000|
|Sudhanshu|  30|         8| 25000|         30|                 8|         25000|
|    Sunny|  29|         4| 20000|         29|                 4|         20000|
|     Paul|  24|         3| 20000|         24|                 3|         20000|
|   Harsha|  21|         1| 15000|         21|                 1|         15000|
|  Shubham|  23|         2| 18000|         23|                 2|         18000|
|   Mahesh|NULL|      NULL| 40000|         29|                 4|         40000|
|     NULL|  34|        10| 38000|         34|                10|         38000|
|     NULL|  36|      NULL|  NULL|         36|                 4|         20000|
+---------+----+----------+-

# Pyspark Drataframe filters
- Filter Operations
- &, |, ==
- ~

In [72]:
df_pyspark = spark.read.csv('test1.csv', header = True, inferSchema = True)

In [73]:
df_pyspark

DataFrame[Name: string, age: int, Experience: int, Salary: int]

In [74]:
df_pyspark.show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2| 18000|
+---------+---+----------+------+



In [75]:
# salary of people less than or equal to 20000
df_pyspark.filter('Salary <=20000').show()

+-------+---+----------+------+
|   Name|age|Experience|Salary|
+-------+---+----------+------+
|  Sunny| 29|         4| 20000|
|   Paul| 24|         3| 20000|
| Harsha| 21|         1| 15000|
|Shubham| 23|         2| 18000|
+-------+---+----------+------+



- selecting specific columns aftet filtering data.

In [76]:
df_pyspark.filter('Salary<=20000').select(['Name', 'Salary']).show()

+-------+------+
|   Name|Salary|
+-------+------+
|  Sunny| 20000|
|   Paul| 20000|
| Harsha| 15000|
|Shubham| 18000|
+-------+------+



- using masking

In [77]:
df_pyspark.filter(df_pyspark['Salary'] <= 20000).show()

+-------+---+----------+------+
|   Name|age|Experience|Salary|
+-------+---+----------+------+
|  Sunny| 29|         4| 20000|
|   Paul| 24|         3| 20000|
| Harsha| 21|         1| 15000|
|Shubham| 23|         2| 18000|
+-------+---+----------+------+



- multiple conditions
- Note: we need a set of paranthesis for each condition.

In [78]:
df_pyspark.filter((df_pyspark['Salary'] <= 20000) & 
                  (df_pyspark['Salary'] >=15000)).show()

+-------+---+----------+------+
|   Name|age|Experience|Salary|
+-------+---+----------+------+
|  Sunny| 29|         4| 20000|
|   Paul| 24|         3| 20000|
| Harsha| 21|         1| 15000|
|Shubham| 23|         2| 18000|
+-------+---+----------+------+



In [80]:
df_pyspark.filter((df_pyspark['Experience'] == 3) | 
                  (df_pyspark['Experience'] == 4)).show()

+-----+---+----------+------+
| Name|age|Experience|Salary|
+-----+---+----------+------+
|Sunny| 29|         4| 20000|
| Paul| 24|         3| 20000|
+-----+---+----------+------+



In [82]:
df_pyspark.filter(~(df_pyspark['Salary'] <= 20000)).show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
+---------+---+----------+------+



# Groupby and Aggregate functions.


In [83]:
df_pyspark = spark.read.csv('test3.csv', header = True, inferSchema = True)


In [84]:
df_pyspark.show()

+---------+------------+------+
|     Name| Departments|salary|
+---------+------------+------+
|    Krish|Data Science| 10000|
|    Krish|         IOT|  5000|
|   Mahesh|    Big Data|  4000|
|    Krish|    Big Data|  4000|
|   Mahesh|Data Science|  3000|
|Sudhanshu|Data Science| 20000|
|Sudhanshu|         IOT| 10000|
|Sudhanshu|    Big Data|  5000|
|    Sunny|Data Science| 10000|
|    Sunny|    Big Data|  2000|
+---------+------------+------+



In [85]:
df_pyspark.printSchema()

root
 |-- Name: string (nullable = true)
 |-- Departments: string (nullable = true)
 |-- salary: integer (nullable = true)



In [None]:
- groupby

In [86]:
df_pyspark.groupby('Name')

GroupedData[grouping expressions: [Name], value: [Name: string, Departments: string ... 1 more field], type: GroupBy]

- note: The groupBy() creates a GroupedData object and not a DataFrame. So, we cannot use a show() on it.
- we need to apply aggregate function

In [88]:
df_pyspark.groupby('Name').sum()

DataFrame[Name: string, sum(salary): bigint]

In [89]:
df_pyspark.groupby('Name').sum().show()

+---------+-----------+
|     Name|sum(salary)|
+---------+-----------+
|Sudhanshu|      35000|
|    Sunny|      12000|
|    Krish|      19000|
|   Mahesh|       7000|
+---------+-----------+



In [93]:
df_pyspark.groupby('Departments').avg().show()

+------------+-----------+
| Departments|avg(salary)|
+------------+-----------+
|         IOT|     7500.0|
|    Big Data|     3750.0|
|Data Science|    10750.0|
+------------+-----------+



In [94]:
df_pyspark.groupby('Departments').count().show()

+------------+-----+
| Departments|count|
+------------+-----+
|         IOT|    2|
|    Big Data|    4|
|Data Science|    4|
+------------+-----+



In [97]:
df_pyspark.groupby('Name').max().show()

+---------+-----------+
|     Name|max(salary)|
+---------+-----------+
|Sudhanshu|      20000|
|    Sunny|      10000|
|    Krish|      10000|
|   Mahesh|       4000|
+---------+-----------+



# using agg()

In [95]:
df_pyspark.agg({'Salary': 'sum'}).show()

+-----------+
|sum(Salary)|
+-----------+
|      73000|
+-----------+



In [106]:
# getting name of the person with the minimum salary.
# Step 1: Get the minimum salary value
min_salary = df_pyspark.agg({'Salary': 'min'}).collect()[0][0]
min_salary
# Step 2: Filter rows where Salary == min_salary
df_pyspark.filter(df_pyspark["Salary"] == min_salary).select(['Name', 'Salary']).show()

+-----+------+
| Name|Salary|
+-----+------+
|Sunny|  2000|
+-----+------+

