In [1]:
import findspark
findspark.init('D:/spark')

###  SparkContext oluşturma yöntem-1: SparkSession 

In [2]:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark import SparkContext

In [3]:
pyspark = SparkSession.builder \
.master("local[4]") \
.appName("RDD-Olusturmak") \
.config("spark.executor.memory","4g") \
.config("spark.driver.memory","2g") \
.getOrCreate()

In [4]:
sc = pyspark.sparkContext

In [14]:
sc.stop()

### SparkContext oluşturma yöntem-2: ParkSession ve SparkConf

In [15]:
conf = SparkConf() \
.setMaster("local[4]") \
.setAppName("RDD-Olusturmak") \
.setExecutorEnv("spark.executor.memory","4g") \
.setExecutorEnv("spark.driver.memory","4g")

pyspark = SparkSession.builder \
.config(conf=conf) \
.getOrCreate()

In [16]:
sc = pyspark.sparkContext

In [8]:
sc.stop()

### SparkContext oluşturma yöntem-3: SparkContext ve SparkConf¶

In [10]:
sparkConf = SparkConf() \
.setMaster("local[4]") \
.setAppName("RDD-Olusturmak") \
.setExecutorEnv("spark.executor.memory","2g") \
.setExecutorEnv("spark.driver.memory","1g")

In [11]:
sc = SparkContext(conf=sparkConf)

# Python listelerinden RDD oluşturmak

In [5]:
rdd1 = sc.parallelize([('Ahmet',25),('Cemal',29),('İnci',38),('Burcu',33)])

In [6]:
rdd1.take(2)

[('Ahmet', 25), ('Cemal', 29)]

In [22]:
rdd2 = sc.parallelize([['Ahmet',25],['Cemal',29],['İnci',38],['Burcu',33]])

In [23]:
rdd2.take(2)

[['Ahmet', 25], ['Cemal', 29]]

In [24]:
rdd2.count()

4

In [25]:
sayilarRDD = sc.parallelize([[1,2,3],[4,5,6]])

In [26]:
sayilarRDD.take(2)

[[1, 2, 3], [4, 5, 6]]

# Python sözlükten (dictionary) RDD oluşturmak

In [35]:
# Sözlük oluşturma
my_dict ={
    "Ogrenci":['Ali','Mehmet','Ayse'],
    "Notlar":[70,80,90]
}

In [36]:
import pandas as pd

In [37]:
pdDF = pd.DataFrame(my_dict)

In [38]:
pdDF.head()

Unnamed: 0,Ogrenci,Notlar
0,Ali,70
1,Mehmet,80
2,Ayse,90


In [39]:
rdd_from_pandasDF = pyspark.createDataFrame(pdDF)

In [40]:
rdd_from_pandasDF.show()

+-------+------+
|Ogrenci|Notlar|
+-------+------+
|    Ali|    70|
| Mehmet|    80|
|   Ayse|    90|
+-------+------+



In [41]:
rdd_from_pandas = rdd_from_pandasDF.rdd

In [42]:
rdd_from_pandas.take(3)

[Row(Ogrenci='Ali', Notlar=70),
 Row(Ogrenci='Mehmet', Notlar=80),
 Row(Ogrenci='Ayse', Notlar=90)]

# Metin dosyalarından RDD oluşturmak¶

In [43]:
rdd_metin = sc.textFile("C:/Users/toshiba/SkyDrive/veribilimi.co/Datasets/OnlineRetail.csv")

In [44]:
rdd_metin.take(10)

['InvoiceNo;StockCode;Description;Quantity;InvoiceDate;UnitPrice;CustomerID;Country',
 '536365;85123A;WHITE HANGING HEART T-LIGHT HOLDER;6;1.12.2010 08:26;2,55;17850;United Kingdom',
 '536365;71053;WHITE METAL LANTERN;6;1.12.2010 08:26;3,39;17850;United Kingdom',
 '536365;84406B;CREAM CUPID HEARTS COAT HANGER;8;1.12.2010 08:26;2,75;17850;United Kingdom',
 '536365;84029G;KNITTED UNION FLAG HOT WATER BOTTLE;6;1.12.2010 08:26;3,39;17850;United Kingdom',
 '536365;84029E;RED WOOLLY HOTTIE WHITE HEART.;6;1.12.2010 08:26;3,39;17850;United Kingdom',
 '536365;22752;SET 7 BABUSHKA NESTING BOXES;2;1.12.2010 08:26;7,65;17850;United Kingdom',
 '536365;21730;GLASS STAR FROSTED T-LIGHT HOLDER;6;1.12.2010 08:26;4,25;17850;United Kingdom',
 '536366;22633;HAND WARMER UNION JACK;6;1.12.2010 08:28;1,85;17850;United Kingdom',
 '536366;22632;HAND WARMER RED POLKA DOT;6;1.12.2010 08:28;1,85;17850;United Kingdom']