In [1]:
import pandas as pd
import numpy as np

In [2]:
# Pandas Series Object
data = pd.Series([0.25, 0.5, 0.75, 1.0])
print(data)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [3]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [4]:
data[1] # 0 indexli => 0.5

0.5

In [5]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [6]:
data['c']

0.75

In [7]:
nufus_dict = {'Istanbul': 16_000_000, 'Ankara': 5_000_00, 'Izmir': 3_000_000}

In [8]:
nufus = pd.Series(nufus_dict)

In [9]:
print(nufus)

Istanbul    16000000
Ankara        500000
Izmir        3000000
dtype: int64


In [10]:
nufus['Ankara']

500000

In [11]:
# Pandas Dataframe 
alan_dict = {'Istanbul': 9_000, 'Ankara': 8_000, 'Izmir': 6_000}

In [14]:
sehirler = pd.DataFrame({'Nufus': nufus_dict, 'Alan':alan_dict})
sehirler

Unnamed: 0,Nufus,Alan
Istanbul,16000000,9000
Ankara,500000,8000
Izmir,3000000,6000


In [15]:
sehirler.index # DataFrame'in index listesi

Index(['Istanbul', 'Ankara', 'Izmir'], dtype='object')

In [16]:
sehirler.columns # DataFrame'in kolon isimleri

Index(['Nufus', 'Alan'], dtype='object')

In [17]:
sehirler['Alan']

Istanbul    9000
Ankara      8000
Izmir       6000
Name: Alan, dtype: int64

In [18]:
pd.DataFrame([{'a':1, 'b':2},{'a':2, 'b':5}]) # DataFrame oluşturulması

Unnamed: 0,a,b
0,1,2
1,2,5


In [19]:
pd.DataFrame([{'a':1, 'b':2},{'a':2, 'b':5, 'c':5}]) # DataFrame oluşturulması * Sınavda Çıkabilir *

Unnamed: 0,a,b,c
0,1,2,
1,2,5,5.0


In [20]:
# Numpy Array => Pandas DataFrame
arr = np.random.rand(3,2)
print(arr)

[[0.68505602 0.78139942]
 [0.15705935 0.30093351]
 [0.00772289 0.86754997]]


In [21]:
pd.DataFrame(arr, columns = ['a','b'], index=['r1','r2','r3'])

Unnamed: 0,a,b
r1,0.685056,0.781399
r2,0.157059,0.300934
r3,0.007723,0.86755


In [22]:
# Data index'leme ve Seçim

In [23]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a','b','c','d'])

In [24]:
data['c'] # 0.75

0.75

In [25]:
print(data)

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64


In [26]:
data[2] # index'ler sayısal değilse bile index değerine erişilebilir.

0.75

In [27]:
# loc, iloc, ix
data = pd.Series(['a','b','c'], index=[1,3,5])
data

1    a
3    b
5    c
dtype: object

In [28]:
data[1]

'a'

In [29]:
data.loc[1] # loc: index adı ile veriye erişim sağlar

'a'

In [30]:
data.loc[5]

'c'

In [31]:
data.iloc[1] # iloc: index rakamı ile (sıfır indeksli olarak!) 

'b'

In [32]:
data.iloc[1:3]# 1 ve 2. sıradaki değerler

3    b
5    c
dtype: object

In [33]:
sehirler

Unnamed: 0,Nufus,Alan
Istanbul,16000000,9000
Ankara,500000,8000
Izmir,3000000,6000


In [34]:
sehirler['Alan']

Istanbul    9000
Ankara      8000
Izmir       6000
Name: Alan, dtype: int64

In [35]:
sehirler.Alan

Istanbul    9000
Ankara      8000
Izmir       6000
Name: Alan, dtype: int64

In [36]:
sehirler['Yogunluk'] = sehirler['Nufus'] / sehirler['Alan']
sehirler

Unnamed: 0,Nufus,Alan,Yogunluk
Istanbul,16000000,9000,1777.777778
Ankara,500000,8000,62.5
Izmir,3000000,6000,500.0


In [37]:
sehirler.values

array([[1.60000000e+07, 9.00000000e+03, 1.77777778e+03],
       [5.00000000e+05, 8.00000000e+03, 6.25000000e+01],
       [3.00000000e+06, 6.00000000e+03, 5.00000000e+02]])

In [38]:
sehirler.T # Transpose

Unnamed: 0,Istanbul,Ankara,Izmir
Nufus,16000000.0,500000.0,3000000.0
Alan,9000.0,8000.0,6000.0
Yogunluk,1777.778,62.5,500.0


In [39]:
sehirler.values[0]

array([1.60000000e+07, 9.00000000e+03, 1.77777778e+03])

In [40]:
sehirler.values[0, 1]

9000.0

In [42]:
sehirler.loc['Istanbul','Alan']

9000

In [44]:
sehirler.iloc[0, 1]

9000

In [45]:
sehirler

Unnamed: 0,Nufus,Alan,Yogunluk
Istanbul,16000000,9000,1777.777778
Ankara,500000,8000,62.5
Izmir,3000000,6000,500.0


In [46]:
# Filtreleme
sehirler.loc[sehirler.Yogunluk > 100]

Unnamed: 0,Nufus,Alan,Yogunluk
Istanbul,16000000,9000,1777.777778
Izmir,3000000,6000,500.0


In [49]:
sehirler.loc['Ankara','Nufus'] = 5_500_000

In [50]:
sehirler

Unnamed: 0,Nufus,Alan,Yogunluk
Istanbul,16000000,9000,1777.777778
Ankara,5500000,8000,62.5
Izmir,3000000,6000,500.0


In [51]:
sehirler.loc['Ankara','Yogunluk'] = sehirler.loc['Ankara','Nufus'] / sehirler.loc['Ankara','Alan']

In [52]:
sehirler

Unnamed: 0,Nufus,Alan,Yogunluk
Istanbul,16000000,9000,1777.777778
Ankara,5500000,8000,687.5
Izmir,3000000,6000,500.0


In [53]:
sehirler.loc['Ankara','Nufus']

5500000

In [54]:
alan_dict

{'Istanbul': 9000, 'Ankara': 8000, 'Izmir': 6000}

In [55]:
nufus_dict

{'Istanbul': 16000000, 'Ankara': 500000, 'Izmir': 3000000}

In [56]:
nufus_dict / alan_dict

TypeError: unsupported operand type(s) for /: 'dict' and 'dict'

In [66]:
nufus = pd.Series({'Istanbul': 16000000, 'Ankara': 5000000, 'Izmir': 3000000}, name='nufus')

In [67]:
nufus

Istanbul    16000000
Ankara       5000000
Izmir        3000000
Name: nufus, dtype: int64

In [63]:
alan = pd.Series({'Istanbul': 9000, 'Ankara': 8000, 'Izmir': 6000}, name='alan')

In [64]:
alan

Istanbul    9000
Ankara      8000
Izmir       6000
Name: alan, dtype: int64

In [68]:
nufus / alan

Istanbul    1777.777778
Ankara       625.000000
Izmir        500.000000
dtype: float64

In [69]:
nufus.index

Index(['Istanbul', 'Ankara', 'Izmir'], dtype='object')

In [70]:
# DataFrame İşlemleri
# Toplama

In [75]:
A = pd.DataFrame(np.random.randint(0,20,(2,2)), columns=list('AB'))

In [76]:
A

Unnamed: 0,A,B
0,7,1
1,13,19


In [77]:
B = pd.DataFrame(np.random.randint(0,20,(2,2)), columns=list('AB'))

In [78]:
B

Unnamed: 0,A,B
0,3,7
1,17,0


In [79]:
A + B

Unnamed: 0,A,B
0,10,8
1,30,19


In [80]:
C = pd.DataFrame(np.random.randint(0,20,(3,3)), columns=list('ABC'))
C

Unnamed: 0,A,B,C
0,17,2,1
1,18,2,17
2,13,1,9


In [81]:
B + C

Unnamed: 0,A,B,C
0,20.0,9.0,
1,35.0,2.0,
2,,,


In [83]:
B.add(C, fill_value=0)

Unnamed: 0,A,B,C
0,20.0,9.0,1.0
1,35.0,2.0,17.0
2,13.0,1.0,9.0


In [84]:
A - B

Unnamed: 0,A,B
0,4,-6
1,-4,19


In [85]:
# Eksik veri olması durumunda ?

In [86]:
vals1 = np.array([1, None, 3, 4])
vals1

array([1, None, 3, 4], dtype=object)

In [87]:
vals1.sum()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [88]:
vals1.mean()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [91]:
vals1

array([1, None, 3, 4], dtype=object)

In [92]:
vals1[1] = np.nan

In [93]:
vals1

array([1, nan, 3, 4], dtype=object)

In [94]:
vals1.sum()

nan

In [96]:
np.sum(vals1)

nan

In [97]:
# Numpy NaN çözümü
np.nansum(vals1)  # NaN olan değerleri hesaba katmadan toplama işlemini yap

8

In [99]:
np.nanmean(vals1) # NaN olan değerleri hesaba katmadan ortalama işlemini yap

2.6666666666666665

In [None]:
# Pandas'ta olmayan verilerin işlenmesi

In [100]:
pd.Series([1, np.nan, 3, 4])

0    1.0
1    NaN
2    3.0
3    4.0
dtype: float64

In [101]:
data = pd.Series([1, np.nan, 3, 'Merhaba', None])

In [102]:
data.isnull() # null olan değerler olup olmadığını gösterir

0    False
1     True
2    False
3    False
4     True
dtype: bool

In [103]:
data.notnull() # null olmayan değerlerin olup olmadığını gösterir

0     True
1    False
2     True
3     True
4    False
dtype: bool

In [104]:
data.dropna() # null olan değerleri veri setinden çıkar

0          1
2          3
3    Merhaba
dtype: object

In [105]:
df = pd.DataFrame([[1,      np.nan, 2],
                   [2,      3,      5],
                   [np.nan, 4,      6]])
df

Unnamed: 0,0,1,2
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [106]:
df.dropna()

Unnamed: 0,0,1,2
1,2.0,3.0,5


In [107]:
df.dropna(axis='columns')

Unnamed: 0,2
0,2
1,5
2,6


In [109]:
df.fillna(0)

Unnamed: 0,0,1,2
0,1.0,0.0,2
1,2.0,3.0,5
2,0.0,4.0,6


In [111]:
# Pandas Çoklu İndeks

df = pd.DataFrame(np.random.rand(4,2), index=[['a','a','b','b'], [1, 2, 1, 2]], columns = ['data1','data2'])
df

Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.350905,0.899265
a,2,0.423081,0.848909
b,1,0.550237,0.752626
b,2,0.517603,0.006816


In [113]:
df.index

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [120]:
df.loc['a',2]['data1']

0.4230806085921016

In [124]:
df.iloc[1]

data1    0.423081
data2    0.848909
Name: (a, 2), dtype: float64

In [125]:
df.unstack()

Unnamed: 0_level_0,data1,data1,data2,data2
Unnamed: 0_level_1,1,2,1,2
a,0.350905,0.423081,0.899265,0.848909
b,0.550237,0.517603,0.752626,0.006816


In [126]:
df.unstack().columns

MultiIndex([('data1', 1),
            ('data1', 2),
            ('data2', 1),
            ('data2', 2)],
           )

In [128]:
# DataFrame Concatenation (Birleştirme)

def make_df(cols, ind):
    data = {c: [str(c) + str(i) for i in ind] for c in cols}
    return pd.DataFrame(data, ind)

In [129]:
make_df("ABC", range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [131]:
# Yeni satırların eklenmesi
df1 = make_df('AB', [1,2])
df2 = make_df('AB', [3,4])
df1

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [132]:
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4


In [133]:
pd.concat([df1, df2]) # pd.concat([df1, df2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [135]:
# yeni kolonların eklenmesi
df3 = make_df('AB',[0,1])
df4 = make_df('CD',[0,1])
df3

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [136]:
df4

Unnamed: 0,C,D
0,C0,D0
1,C1,D1


In [139]:
pd.concat([df3,df4], axis=1) # C ve D kolonları eklendi

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1


In [140]:
# Indeksleri aynı bile olsa yeni satır olarak eklenmesi
df1 = make_df('ABC', [1,2,3])
df2 = make_df('ABC', [1,2,3])
df1

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,A3,B3,C3


In [141]:
df2

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,A3,B3,C3


In [143]:
pd.concat([df1,df2], ignore_index=True)

Unnamed: 0,A,B,C
0,A1,B1,C1
1,A2,B2,C2
2,A3,B3,C3
3,A1,B1,C1
4,A2,B2,C2
5,A3,B3,C3
