In [1]:
import numpy as np
import pandas as pd

## Series

In [2]:
myDictionary = {"Zeki" : 50, "Zeynep" : 40, "Mehmet" : 30}

In [3]:
pd.Series(myDictionary)

Zeki      50
Zeynep    40
Mehmet    30
dtype: int64

In [4]:
age = [50, 40, 30]
name = ["Zeki", "Zeynep", "Mehmet"]

In [5]:
pd.Series(age, name)

Zeki      50
Zeynep    40
Mehmet    30
dtype: int64

In [6]:
pd.Series(data = age, index = name)

Zeki      50
Zeynep    40
Mehmet    30
dtype: int64

In [7]:
pd.Series(["Zeki", "Elif", "Esra"],[1,2,3])

1    Zeki
2    Elif
3    Esra
dtype: object

In [8]:
race1 = pd.Series(index = ["Zeki", "Elif", "Esra"], data = [10,20,30])
race1

Zeki    10
Elif    20
Esra    30
dtype: int64

In [9]:
race2 = pd.Series(index = ["Zeki", "Elif", "Esra"], data = [50,10,40])
race2

Zeki    50
Elif    10
Esra    40
dtype: int64

In [10]:
(race1+race2)["Zeki"]

60

In [11]:
race3 = pd.Series(index = ["Zeki", "Elif", "Esra", "Zeynep"], data = [50,10,40,90])
race3

Zeki      50
Elif      10
Esra      40
Zeynep    90
dtype: int64

In [12]:
race3+race2

Elif       20.0
Esra       80.0
Zeki      100.0
Zeynep      NaN
dtype: float64

## DataFrame

In [13]:
data = np.random.randn(4,3)
data

array([[ 0.24637952, -0.85523204,  0.58560268],
       [ 0.80893045, -1.45072896,  0.19674271],
       [ 0.39757221, -1.7184562 ,  0.65761003],
       [ 0.22082854, -1.02426986, -0.18878246]])

In [14]:
dataFrame = pd.DataFrame(data)
dataFrame

Unnamed: 0,0,1,2
0,0.24638,-0.855232,0.585603
1,0.80893,-1.450729,0.196743
2,0.397572,-1.718456,0.65761
3,0.220829,-1.02427,-0.188782


In [15]:
newDataFrame = pd.DataFrame(data, index = ["Healthy", "Power", "Durability","price"], columns=["name", "surname", "image"])
newDataFrame

Unnamed: 0,name,surname,image
Healthy,0.24638,-0.855232,0.585603
Power,0.80893,-1.450729,0.196743
Durability,0.397572,-1.718456,0.65761
price,0.220829,-1.02427,-0.188782


In [16]:
newDataFrame["name"]

Healthy       0.246380
Power         0.808930
Durability    0.397572
price         0.220829
Name: name, dtype: float64

In [17]:
newDataFrame[["name", "surname"]]

Unnamed: 0,name,surname
Healthy,0.24638,-0.855232
Power,0.80893,-1.450729
Durability,0.397572,-1.718456
price,0.220829,-1.02427


In [18]:
newDataFrame.loc["Power"]

name       0.808930
surname   -1.450729
image      0.196743
Name: Power, dtype: float64

In [19]:
newDataFrame.iloc[2]

name       0.397572
surname   -1.718456
image      0.657610
Name: Durability, dtype: float64

In [20]:
newDataFrame["Emeklilik"] = newDataFrame["name"] + newDataFrame["surname"]
newDataFrame

Unnamed: 0,name,surname,image,Emeklilik
Healthy,0.24638,-0.855232,0.585603,-0.608853
Power,0.80893,-1.450729,0.196743,-0.641799
Durability,0.397572,-1.718456,0.65761,-1.320884
price,0.220829,-1.02427,-0.188782,-0.803441


In [21]:
newDataFrame.drop("image", axis=1)

Unnamed: 0,name,surname,Emeklilik
Healthy,0.24638,-0.855232,-0.608853
Power,0.80893,-1.450729,-0.641799
Durability,0.397572,-1.718456,-1.320884
price,0.220829,-1.02427,-0.803441


In [22]:
newDataFrame.drop("price", axis=0)

Unnamed: 0,name,surname,image,Emeklilik
Healthy,0.24638,-0.855232,0.585603,-0.608853
Power,0.80893,-1.450729,0.196743,-0.641799
Durability,0.397572,-1.718456,0.65761,-1.320884


In [23]:
newDataFrame

Unnamed: 0,name,surname,image,Emeklilik
Healthy,0.24638,-0.855232,0.585603,-0.608853
Power,0.80893,-1.450729,0.196743,-0.641799
Durability,0.397572,-1.718456,0.65761,-1.320884
price,0.220829,-1.02427,-0.188782,-0.803441


In [24]:
newDataFrame.drop("image", axis=1, inplace=True)
newDataFrame

Unnamed: 0,name,surname,Emeklilik
Healthy,0.24638,-0.855232,-0.608853
Power,0.80893,-1.450729,-0.641799
Durability,0.397572,-1.718456,-1.320884
price,0.220829,-1.02427,-0.803441


In [25]:
newDataFrame.loc["Power"]["name"]

0.8089304520295479

In [26]:
newDataFrame.loc["Power","name"]

0.8089304520295479

In [27]:
booleanFrame = newDataFrame < 0
booleanFrame

Unnamed: 0,name,surname,Emeklilik
Healthy,False,True,True
Power,False,True,True
Durability,False,True,True
price,False,True,True


In [28]:
newDataFrame[booleanFrame]
#newDataFrame[newDataFrame < 0]

Unnamed: 0,name,surname,Emeklilik
Healthy,,-0.855232,-0.608853
Power,,-1.450729,-0.641799
Durability,,-1.718456,-1.320884
price,,-1.02427,-0.803441


In [29]:
newDataFrame["surname"] > 0

Healthy       False
Power         False
Durability    False
price         False
Name: surname, dtype: bool

In [30]:
result = newDataFrame[newDataFrame["surname"] > 0]
result

Unnamed: 0,name,surname,Emeklilik


In [31]:
newDataFrame

Unnamed: 0,name,surname,Emeklilik
Healthy,0.24638,-0.855232,-0.608853
Power,0.80893,-1.450729,-0.641799
Durability,0.397572,-1.718456,-1.320884
price,0.220829,-1.02427,-0.803441


In [32]:
newDataFrame.reset_index()

Unnamed: 0,index,name,surname,Emeklilik
0,Healthy,0.24638,-0.855232,-0.608853
1,Power,0.80893,-1.450729,-0.641799
2,Durability,0.397572,-1.718456,-1.320884
3,price,0.220829,-1.02427,-0.803441


In [33]:
newIndexList = ["He","Po","Du","Pr"]

In [34]:
newDataFrame["NewIndex"] = newIndexList
newDataFrame

Unnamed: 0,name,surname,Emeklilik,NewIndex
Healthy,0.24638,-0.855232,-0.608853,He
Power,0.80893,-1.450729,-0.641799,Po
Durability,0.397572,-1.718456,-1.320884,Du
price,0.220829,-1.02427,-0.803441,Pr


In [35]:
newDataFrame.set_index("NewIndex", inplace=True)
newDataFrame

Unnamed: 0_level_0,name,surname,Emeklilik
NewIndex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
He,0.24638,-0.855232,-0.608853
Po,0.80893,-1.450729,-0.641799
Du,0.397572,-1.718456,-1.320884
Pr,0.220829,-1.02427,-0.803441


In [36]:
newDataFrame.loc["He"]

name         0.246380
surname     -0.855232
Emeklilik   -0.608853
Name: He, dtype: float64

In [37]:
firstIndex=["Simpson","Simpson","Simpson","Family Guy","Family Guy","Family Guy"]

In [38]:
secondIndex=["Homer","Bart","Marge","Peter","Louise","Meg"]

In [39]:
birlesmisIndex=list(zip(firstIndex,secondIndex))
birlesmisIndex

[('Simpson', 'Homer'),
 ('Simpson', 'Bart'),
 ('Simpson', 'Marge'),
 ('Family Guy', 'Peter'),
 ('Family Guy', 'Louise'),
 ('Family Guy', 'Meg')]

In [40]:
birlesmisIndex = pd.MultiIndex.from_tuples(birlesmisIndex)
birlesmisIndex

MultiIndex([(   'Simpson',  'Homer'),
            (   'Simpson',   'Bart'),
            (   'Simpson',  'Marge'),
            ('Family Guy',  'Peter'),
            ('Family Guy', 'Louise'),
            ('Family Guy',    'Meg')],
           )

In [41]:
cartoonList=[[40,"A"],[10,"B"],[20,"C"],[30,"D"],[50,"E"],[70,"F"]]
cartoonNumpy = np.array(cartoonList)
cartoonDataFrame = pd.DataFrame(cartoonNumpy, index=birlesmisIndex, columns=["Yas","Meslek"])
cartoonDataFrame

Unnamed: 0,Unnamed: 1,Yas,Meslek
Simpson,Homer,40,A
Simpson,Bart,10,B
Simpson,Marge,20,C
Family Guy,Peter,30,D
Family Guy,Louise,50,E
Family Guy,Meg,70,F


In [42]:
cartoonDataFrame.loc["Simpson"]

Unnamed: 0,Yas,Meslek
Homer,40,A
Bart,10,B
Marge,20,C


In [43]:
cartoonDataFrame.loc["Simpson"].loc["Bart"]

Yas       10
Meslek     B
Name: Bart, dtype: object

In [44]:
cartoonDataFrame.index.names = ["Cartoon Name","Karakter"]

In [45]:
cartoonDataFrame

Unnamed: 0_level_0,Unnamed: 1_level_0,Yas,Meslek
Cartoon Name,Karakter,Unnamed: 2_level_1,Unnamed: 3_level_1
Simpson,Homer,40,A
Simpson,Bart,10,B
Simpson,Marge,20,C
Family Guy,Peter,30,D
Family Guy,Louise,50,E
Family Guy,Meg,70,F


## Pandas Operetions

In [46]:
havaDurumuDataFrame = pd.DataFrame({"Istanbul" : [30,29,np.nan],"Ankara" : [20,np.nan,25],"Izmir" : [39,37,40]})
havaDurumuDataFrame

Unnamed: 0,Istanbul,Ankara,Izmir
0,30.0,20.0,39
1,29.0,,37
2,,25.0,40


In [47]:
days = ["Pazartesi","Salı","Çarşamda"]
havaDurumuDataFrame [""] = days

In [48]:
havaDurumuDataFrame.set_index("", inplace=True)

In [49]:
havaDurumuDataFrame

Unnamed: 0,Istanbul,Ankara,Izmir
,,,
Pazartesi,30.0,20.0,39.0
Salı,29.0,,37.0
Çarşamda,,25.0,40.0


In [50]:
havaDurumuDataFrame.dropna(axis = 0)

Unnamed: 0,Istanbul,Ankara,Izmir
,,,
Pazartesi,30.0,20.0,39.0


In [51]:
havaDurumuDataFrame.dropna(axis = 1)

Unnamed: 0,Izmir
,
Pazartesi,39.0
Salı,37.0
Çarşamda,40.0


In [52]:
havaDurumu2 = pd.DataFrame({"Istanbul" : [30,29,np.nan],"Ankara" : [20,np.nan,25],"Izmir" : [39,37,40],"Antalya" : [41,np.nan,np.nan]})
havaDurumu2

Unnamed: 0,Istanbul,Ankara,Izmir,Antalya
0,30.0,20.0,39,41.0
1,29.0,,37,
2,,25.0,40,


In [53]:
havaDurumu2.set_index([["Pazartesi","Salı","Çarşamba"]], inplace=True)

In [54]:
havaDurumu2

Unnamed: 0,Istanbul,Ankara,Izmir,Antalya
Pazartesi,30.0,20.0,39,41.0
Salı,29.0,,37,
Çarşamba,,25.0,40,


In [55]:
havaDurumu2.dropna(axis=1, thresh=2)

Unnamed: 0,Istanbul,Ankara,Izmir
Pazartesi,30.0,20.0,39
Salı,29.0,,37
Çarşamba,,25.0,40


In [56]:
havaDurumu2.fillna(28)

Unnamed: 0,Istanbul,Ankara,Izmir,Antalya
Pazartesi,30.0,20.0,39,41.0
Salı,29.0,28.0,37,28.0
Çarşamba,28.0,25.0,40,28.0


## GroupBy

In [57]:
newDictionary = {"Departmanlar" : ["Yazılım", "Yazılım","Pazarlama","Pazarlama","HR", "HR"],
         "Elemanlar" : ["Ahmet","Mehmet","Zeki","Enes","Furkan","Eyşan"],
         "Maas" : [100,150,200,250,300,400]}
maasDataFrame = pd.DataFrame(newDictionary)
maasDataFrame

Unnamed: 0,Departmanlar,Elemanlar,Maas
0,Yazılım,Ahmet,100
1,Yazılım,Mehmet,150
2,Pazarlama,Zeki,200
3,Pazarlama,Enes,250
4,HR,Furkan,300
5,HR,Eyşan,400


In [58]:
result = maasDataFrame.groupby("Departmanlar")

In [59]:
result.count()

Unnamed: 0_level_0,Elemanlar,Maas
Departmanlar,Unnamed: 1_level_1,Unnamed: 2_level_1
HR,2,2
Pazarlama,2,2
Yazılım,2,2


In [60]:
result.mean()

Unnamed: 0_level_0,Maas
Departmanlar,Unnamed: 1_level_1
HR,350.0
Pazarlama,225.0
Yazılım,125.0


In [61]:
result.max()

Unnamed: 0_level_0,Elemanlar,Maas
Departmanlar,Unnamed: 1_level_1,Unnamed: 2_level_1
HR,Furkan,400
Pazarlama,Zeki,250
Yazılım,Mehmet,150


In [62]:
result.min()

Unnamed: 0_level_0,Elemanlar,Maas
Departmanlar,Unnamed: 1_level_1,Unnamed: 2_level_1
HR,Eyşan,300
Pazarlama,Enes,200
Yazılım,Ahmet,100


In [63]:
result.describe()

Unnamed: 0_level_0,Maas,Maas,Maas,Maas,Maas,Maas,Maas,Maas
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Departmanlar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
HR,2.0,350.0,70.710678,300.0,325.0,350.0,375.0,400.0
Pazarlama,2.0,225.0,35.355339,200.0,212.5,225.0,237.5,250.0
Yazılım,2.0,125.0,35.355339,100.0,112.5,125.0,137.5,150.0


## Concat

In [64]:
dic1 = {"Isim": ["Ahmet","Mehmet", "Zeynep"],
        "Spor":["Kosu","Yuzme","Basket"],
       "Kalori" : [400,600,780]}
df1 = pd.DataFrame(dic1)
df1

Unnamed: 0,Isim,Spor,Kalori
0,Ahmet,Kosu,400
1,Mehmet,Yuzme,600
2,Zeynep,Basket,780


In [65]:
dic2 = {"Isim": ["Zeki","Elif", "Esra","Avni"],
        "Spor":["Kosu","Yuzme","Basket","Futbol"],
       "Kalori" : [400,600,780,800]}
df2 = pd.DataFrame(dic2)
df2

Unnamed: 0,Isim,Spor,Kalori
0,Zeki,Kosu,400
1,Elif,Yuzme,600
2,Esra,Basket,780
3,Avni,Futbol,800


In [66]:
dic3 = {"Isim": ["Enes","Furkan"],
        "Spor":["Kosu","Yuzme"],
       "Kalori" : [400,600]}
df3 = pd.DataFrame(dic3)
df3

Unnamed: 0,Isim,Spor,Kalori
0,Enes,Kosu,400
1,Furkan,Yuzme,600


In [67]:
result=pd.concat([df1,df2,df3])#axis=1 verme durumunda yan yana birleştirir
result

Unnamed: 0,Isim,Spor,Kalori
0,Ahmet,Kosu,400
1,Mehmet,Yuzme,600
2,Zeynep,Basket,780
0,Zeki,Kosu,400
1,Elif,Yuzme,600
2,Esra,Basket,780
3,Avni,Futbol,800
0,Enes,Kosu,400
1,Furkan,Yuzme,600


In [68]:
result.reset_index(inplace=True)

In [69]:
result.drop("index",axis=1,inplace=True)
result

Unnamed: 0,Isim,Spor,Kalori
0,Ahmet,Kosu,400
1,Mehmet,Yuzme,600
2,Zeynep,Basket,780
3,Zeki,Kosu,400
4,Elif,Yuzme,600
5,Esra,Basket,780
6,Avni,Futbol,800
7,Enes,Kosu,400
8,Furkan,Yuzme,600


## Merging

In [70]:
mergeDic = {"Isim" : ["Ahmet", "Mehmet", "Zeki", "Avni"],
           "Spor" : ["Koşu","Yüzme","Futbol","Basketbol"]}

mergeDataFrame1 = pd.DataFrame(mergeDic)
mergeDataFrame1

Unnamed: 0,Isim,Spor
0,Ahmet,Koşu
1,Mehmet,Yüzme
2,Zeki,Futbol
3,Avni,Basketbol


In [71]:
mergeDic2 = {"Isim" : ["Ahmet", "Mehmet", "Zeki", "Avni"],
           "Kalori" : [100,200,300,400]}

mergeDataFrame2 = pd.DataFrame(mergeDic2)
mergeDataFrame2

Unnamed: 0,Isim,Kalori
0,Ahmet,100
1,Mehmet,200
2,Zeki,300
3,Avni,400


In [72]:
pd.merge(mergeDataFrame1,mergeDataFrame2,on="Isim")

Unnamed: 0,Isim,Spor,Kalori
0,Ahmet,Koşu,100
1,Mehmet,Yüzme,200
2,Zeki,Futbol,300
3,Avni,Basketbol,400


In [73]:
maasDic2 = {"Isim" : ["Ahmet", "Mehmet", "Zeki", "Avni"],
          "Departman" : ["Yazılım", "Satış", "Pazarlama","Yazılım"],
          "Maas" : [100,200,300,400]}
maasDataFrame2 = pd.DataFrame(maasDic2)
maasDataFrame2

Unnamed: 0,Isim,Departman,Maas
0,Ahmet,Yazılım,100
1,Mehmet,Satış,200
2,Zeki,Pazarlama,300
3,Avni,Yazılım,400


In [74]:
maasDataFrame2["Departman"].unique()

array(['Yazılım', 'Satış', 'Pazarlama'], dtype=object)

In [75]:
maasDataFrame2["Departman"].nunique()

3

In [76]:
maasDataFrame2["Departman"].value_counts()

Yazılım      2
Satış        1
Pazarlama    1
Name: Departman, dtype: int64

In [77]:
def brutNet(maas):
    return maas*0.66

In [78]:
maasDataFrame2["Maas"].apply(brutNet)

0     66.0
1    132.0
2    198.0
3    264.0
Name: Maas, dtype: float64

## Pivot

In [79]:
newData = {"Karakter Sınıfı" : ["Cars", "Cars","Simpson","Simpson","Simpson"],
          "Karakter Ismi" : ["Cartman","Kenny","Homer","Bart","Bart"],
          "Karakter Yas" : [20,50,23,31,69]}
karakterDF=pd.DataFrame(newData)
karakterDF

Unnamed: 0,Karakter Sınıfı,Karakter Ismi,Karakter Yas
0,Cars,Cartman,20
1,Cars,Kenny,50
2,Simpson,Homer,23
3,Simpson,Bart,31
4,Simpson,Bart,69


In [81]:
karakterDF.pivot_table(values="Karakter Yas", index=["Karakter Sınıfı","Karakter Ismi"],aggfunc=np.sum)
# aggfunc aynı karakterlerin yaslarına uygulanacak işlemi belirler
# bir şey yazmazsak yaş ortalamasını yazardı

Unnamed: 0_level_0,Unnamed: 1_level_0,Karakter Yas
Karakter Sınıfı,Karakter Ismi,Unnamed: 2_level_1
Cars,Cartman,20
Cars,Kenny,50
Simpson,Bart,100
Simpson,Homer,23


## Excel

In [83]:
excelDF=pd.read_excel("deneme.xlsx")
excelDF

Unnamed: 0.1,Unnamed: 0,Maas,Yas,Departman
0,Zeki,1000,24.0,Yazılım
1,Mehmet,2000,30.0,Satış
2,Ahmet,3000,42.0,
3,Avni,4000,68.0,Pazarlama
4,Halime,5550,,Finans
5,Elif,5060,29.0,Hukuk


In [86]:
nonEmptyDF = excelDF.dropna()
nonEmptyDF

Unnamed: 0.1,Unnamed: 0,Maas,Yas,Departman
0,Zeki,1000,24.0,Yazılım
1,Mehmet,2000,30.0,Satış
3,Avni,4000,68.0,Pazarlama
5,Elif,5060,29.0,Hukuk


In [87]:
nonEmptyDF.to_excel("nonEmptyDeneme.xlsx")