In [1]:
import numpy as np
import pandas as pd

pokemons = pd.read_csv("datas/pokemon.csv", index_col = "Pokemon").squeeze()
pokemons

Pokemon
Bulbasaur      Grass / Poison
Ivysaur        Grass / Poison
Venusaur       Grass / Poison
Charmander               Fire
Charmeleon               Fire
                    ...      
Stakataka        Rock / Steel
Blacephalon      Fire / Ghost
Zeraora              Electric
Meltan                  Steel
Melmetal                Steel
Name: Type, Length: 809, dtype: object

In [2]:
pokemons.describe()

count        809
unique       159
top       Normal
freq          65
Name: Type, dtype: object

## 갯수 count

In [3]:
pokemons.count()

809

## nunique 중복 제거 카운트

In [4]:
pokemons.nunique()

159

In [5]:
pokemons_d = dict(pokemons)

# series 정렬

## sort_values 값 정렬

In [6]:
pokemons.sort_values()

Pokemon
Illumise                Bug
Silcoon                 Bug
Pinsir                  Bug
Burmy                   Bug
Wurmple                 Bug
                  ...      
Tirtouga       Water / Rock
Relicanth      Water / Rock
Corsola        Water / Rock
Carracosta     Water / Rock
Empoleon      Water / Steel
Name: Type, Length: 809, dtype: object

## ascending = False => 역방향 정렬

In [7]:
pokemons.sort_values(ascending = False) #역방향

Pokemon
Empoleon      Water / Steel
Corsola        Water / Rock
Relicanth      Water / Rock
Carracosta     Water / Rock
Tirtouga       Water / Rock
                  ...      
Kricketune              Bug
Cascoon                 Bug
Scatterbug              Bug
Kricketot               Bug
Grubbin                 Bug
Name: Type, Length: 809, dtype: object

## 연습

In [8]:
war = pd.read_csv("datas/revolutionary_war.csv",
                  parse_dates = ["Start Date"],
                  index_col = "Start Date",
                  usecols = ["Start Date","State"]).squeeze("columns")
war

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 232, dtype: object

In [9]:
war.isnull().sum()

70

In [10]:
war.count()

162

In [11]:
war.nunique()

17

In [12]:
war.sort_values(na_position = "first") # or last

Start Date
1775-09-17         NaN
1775-12-31         NaN
1776-03-03         NaN
1776-03-25         NaN
1776-05-18         NaN
                ...   
1781-07-06    Virginia
1781-07-01    Virginia
1781-06-26    Virginia
1781-04-25    Virginia
1783-01-22    Virginia
Name: State, Length: 232, dtype: object

In [13]:
war.dropna()

Start Date
1774-09-01     Massachusetts
1774-12-14     New Hampshire
1775-04-19     Massachusetts
1775-04-19     Massachusetts
1775-04-20          Virginia
                   ...      
1782-08-15          Virginia
1782-08-19          Virginia
1782-08-26    South Carolina
1782-09-11          Virginia
1783-01-22          Virginia
Name: State, Length: 162, dtype: object

In [14]:
# index기준 정렬
pokemons.sort_index(ascending = True) 

Pokemon
Abomasnow        Grass / Ice
Abra                 Psychic
Absol                   Dark
Accelgor                 Bug
Aegislash      Steel / Ghost
                  ...       
Zoroark                 Dark
Zorua                   Dark
Zubat        Poison / Flying
Zweilous       Dark / Dragon
Zygarde      Dragon / Ground
Name: Type, Length: 809, dtype: object

In [15]:
war.sort_index()

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1783-01-22         Virginia
NaT              New Jersey
NaT                Virginia
NaT                     NaN
NaT                     NaN
Name: State, Length: 232, dtype: object

In [16]:
war.sort_index(na_position="first")

Start Date
NaT              New Jersey
NaT                Virginia
NaT                     NaN
NaT                     NaN
1774-09-01    Massachusetts
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 232, dtype: object

In [17]:
#series의 index에 포함된 NaT값을 배제하는 두가지 방법
war.loc[war.index.dropna()]

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-19    Massachusetts
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 250, dtype: object

In [18]:
war.loc[war.index.notnull()]

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 228, dtype: object

In [29]:
gle = pd.read_csv("datas/google_stocks.csv",
                 parse_dates=['Date'], #Date 타입으로 저장, parse_dates 인수에 파싱할 column명을 리스트 형태로 전달
                 index_col="Date").squeeze()
gle

Date
2004-08-19      49.98
2004-08-20      53.95
2004-08-23      54.50
2004-08-24      52.24
2004-08-25      52.80
               ...   
2019-10-21    1246.15
2019-10-22    1242.80
2019-10-23    1259.13
2019-10-24    1260.99
2019-10-25    1265.13
Name: Close, Length: 3824, dtype: float64

In [30]:
gle.describe()

count    3824.000000
mean      479.945860
std       328.528592
min        49.820000
25%       235.860000
50%       314.680000
75%       708.205000
max      1287.580000
Name: Close, dtype: float64

In [31]:
gle.sort_values(ascending=False).head(10)

Date
2019-04-29    1287.58
2019-04-26    1272.18
2018-07-26    1268.33
2019-10-25    1265.13
2019-04-23    1264.55
2018-07-25    1263.70
2019-04-25    1263.45
2019-10-24    1260.99
2019-10-23    1259.13
2019-04-24    1256.00
Name: Close, dtype: float64

In [32]:
#큰 숫자를 순서대로 인수만큼 구한다.
gle.nlargest(10)

Date
2019-04-29    1287.58
2019-04-26    1272.18
2018-07-26    1268.33
2019-10-25    1265.13
2019-04-23    1264.55
2018-07-25    1263.70
2019-04-25    1263.45
2019-10-24    1260.99
2019-10-23    1259.13
2019-04-24    1256.00
Name: Close, dtype: float64

In [34]:
#작은 숫자를 순서대로 인수만큼 구한다.
gle.nsmallest(10)

Date
2004-09-03    49.82
2004-09-01    49.94
2004-08-19    49.98
2004-09-02    50.57
2004-09-07    50.60
2004-08-30    50.81
2004-09-08    50.96
2004-09-09    50.96
2004-08-31    50.99
2004-08-24    52.24
Name: Close, dtype: float64

In [39]:
bins = np.linspace(0, 1400, 8)
gle.value_counts(bins = bins).sort_index()

(-0.001, 200.0]      595
(200.0, 400.0]      1568
(400.0, 600.0]       575
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1000.0, 1200.0]     406
(1200.0, 1400.0]      93
Name: Close, dtype: int64

In [62]:
import pandas as pd
data = {
    "c1" : [1,2,"누락"],
    "c2" : [1.11,"",3.33],
    "c3" : ["one","two","three"]
}
csv01 = pd.DataFrame(data)
csv01

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,,two
2,누락,3.33,three


## pandas데이터 csv로 출력하기
## to_csv

In [64]:
csv01.to_csv("datas/sample1.csv", index = False)

In [65]:
df_read = pd.read_csv("datas/sample1.csv")
df_read

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,,two
2,누락,3.33,three


In [70]:
# column인덱스 배제 -> header = False
csv01.to_csv("datas/sample2.csv", index = False, header = False)
csv01

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,,two
2,누락,3.33,three


In [67]:
# column인덱스 추가 names키워드로 column인덱스 정보 추가
pd.read_csv('datas/sample2.csv',names=['c1','c2','c3'])

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,,two
2,누락,3.33,three


In [68]:
%pwd

'C:\\python'

### 주석 불가능  %%write ~

In [73]:
%%writefile sample3.txt
c1        c2        c3        c4
0.179181 -1.538472  1.347553  0.43381
1.024209  0.087307 -1.281997  0.49265
0.417899 -2.002308  0.255245 -1.10515

Overwriting sample3.txt


In [74]:
"""%%writefile datas/sample3.txt
c1       c2       c3       c4
0.179181 -1.538472 1.347553 0.43381
1.024209 0.087307 -1.281997 0.49265
0.417899 -2.002308 0.255245 -1.10515"""

'%%writefile datas/sample3.txt\nc1       c2       c3       c4\n0.179181 -1.538472 1.347553 0.43381\n1.024209 0.087307 -1.281997 0.49265\n0.417899 -2.002308 0.255245 -1.10515'

## 공백이 하나 이상인경우 '\s+'

In [76]:
pd.read_table('sample3.txt', sep='\s+')

Unnamed: 0,c1,c2,c3,c4
0,0.179181,-1.538472,1.347553,0.43381
1,1.024209,0.087307,-1.281997,0.49265
2,0.417899,-2.002308,0.255245,-1.10515


In [81]:
%%writefile sample4.txt
파일 제목: sample4.txt
데이터 포맷의 설명:
c1, c2, c3
1, 1.11, one
2, 2.22, two
3, 3.33, three

Overwriting sample4.txt


In [84]:
pd.read_csv('sample4.txt', sep='\s+')

Unnamed: 0,파일,제목:,sample4.txt
0,데이터,포맷의,설명:
1,"c1,","c2,",c3
2,1,"1.11,",one
3,2,"2.22,",two
4,3,"3.33,",three


In [85]:
#앞의 두줄을 가져오지 않겠다.
pd.read_csv('sample4.txt', skiprows=[0,1])

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


In [86]:
# nan으로 취급할 값을 넣는다 na_values
df_na_val = pd.read_csv('datas/sample1.csv', na_values=['누락'])
df_na_val

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


In [87]:
df_na_val.to_csv('sample5.txt',sep = '|')

In [89]:
df_na_val

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


In [90]:
df_na_val.to_csv('sample6.csv', na_rep='누락')

In [91]:
pd.read_csv('sample6.csv')

Unnamed: 0.1,Unnamed: 0,c1,c2,c3
0,0,1.0,1.11,one
1,1,2.0,누락,two
2,2,누락,3.33,three


## URL지정 온라인 csv가져오기

In [92]:
titanic = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")

In [93]:
titanic

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.2500,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.9250,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1000,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y
...,...,...,...,...,...,...,...,...,...,...
622,0,male,28.0,0,0,10.5000,Second,unknown,Southampton,y
623,0,male,25.0,0,0,7.0500,Third,unknown,Southampton,y
624,1,female,19.0,0,0,30.0000,First,B,Southampton,y
625,0,female,28.0,1,2,23.4500,Third,unknown,Southampton,n


In [97]:
import requests

url = 'http://apis.data.go.kr/6300000/mdlcnst' #http://apis.data.go.kr/1360000/AirInfoService
params ={'serviceKey' : '서비스키', 'pageNo' : '페이지번호', 'numOfRows' : '한페이지 결과 수'}

response = requests.get(url, params=params)
print(response.status_code)

500


In [100]:
response = requests.get('http://apis.data.go.kr/search/6300000/mdlcnst')
print(response.status_code)

500


In [125]:
"""response = requests.get("https://apis.data.go.kr/6300000/mdlcnst/getmdlcnst?serviceKey=R5ypB93DJ%2FynR6onZknXJyjQSU498Oq7WSErqucTLk4dzSa350lKjtGnDLfkwL9caJnlbIVGmQ4jV4p1J0r2ig%3D%3D&pageNo=3&numOfRows=20",
                        headers = {"accept" : "application/json"})
print(response.status_code)"""

200


In [132]:
response = requests.get("https://apis.data.go.kr/6300000/mdlcnst/getmdlcnst?serviceKey=R5ypB93DJ%252FynR6onZknXJyjQSU498Oq7WSErqucTLk4dzSa350lKjtGnDLfkwL9caJnlbIVGmQ4jV4p1J0r2ig%253D%253D&pageNo=3&numOfRows=200",
                        headers = {"accept" : "application/json"})
print(response.status_code)

200


In [133]:
import xmltodict

In [134]:
xmltodict.parse(response.text)

{'OpenAPI_ServiceResponse': {'cmmMsgHeader': {'errMsg': 'SERVICE ERROR',
   'returnAuthMsg': 'SERVICE_KEY_IS_NOT_REGISTERED_ERROR',
   'returnReasonCode': '30'}}}

In [135]:
"""import json
response.json()"""

'import json\nresponse.json()'