http://darribas.org/gds_scipy16/ipynb_md/01_data_processing.html

In [1]:
!cd

C:\Users\tsk_sato\Documents\GeoDataAnalysis


In [2]:
import pysal as ps
import pandas as pd
import numpy as np



In [3]:
# おなじみのデータセット
for data_name in ps.examples.available():
    print(data_name, end=", ")

10740, arcgis, baltim, book, burkitt, calemp, chicago, clearwater, columbus, desmith, geodanet, georgia, juvenile, Line, londonhp, mexico, nat, networks, newHaven, nyc_bikes, Point, Polygon, Polygon_Holes, sacramento2, sids2, snow_maps, south, stl, street_net_pts, taz, tokyo, us_income, virginia, wmat, 

In [4]:
# us_incomeデータの説明
ps.examples.explain('us_income')

{'name': 'us_income',
 'description': 'Nominal per capita income for the lower 48 US states 1929-2009',
 'explanation': ['* states48.gal: queen contiguity weights in GAL format.',
  '* us48.dbf: attribute data. (k=8)',
  '* us48.shp: Polygon shapefile. (n=48)',
  '* us48.shx: spatial index.',
  '* usjoin.csv: 48 US states nominal per capita income time series 1929-2009.']}

In [5]:
# usjoin.csvを読み込み
csv_path = ps.examples.get_path('usjoin.csv')
f = ps.open(csv_path)
f.header[0:10]

['Name',
 'STATE_FIPS',
 '1929',
 '1930',
 '1931',
 '1932',
 '1933',
 '1934',
 '1935',
 '1936']

In [6]:
# usjoin.csvの2009年のデータセット
y2009 = f.by_col('2009')
y2009[0:10]

[32274, 32077, 31493, 40902, 40093, 52736, 40135, 36565, 33086, 30987]

In [7]:
# Pandas with PySAL
ps.pdio

<module 'pysal.contrib.pdio' from 'C:\\Users\\tsk_sato\\Anaconda3\\lib\\site-packages\\pysal\\contrib\\pdio\\__init__.py'>

In [8]:
# shp/dbf ファイルを読み込み
shp_path = ps.examples.get_path('NAT.shp')
data_table = ps.pdio.read_files(shp_path)

In [9]:
# pandas の dataframe のように使ってみる
data_table.head()

Unnamed: 0,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,STFIPS,COFIPS,FIPSNO,SOUTH,HR60,...,BLK90,GI59,GI69,GI79,GI89,FH60,FH70,FH80,FH90,geometry
0,Lake of the Woods,Minnesota,27,77,27077,27,77,27077,0,0.0,...,0.024534,0.285235,0.372336,0.342104,0.336455,11.279621,5.4,5.663881,9.51586,<pysal.cg.shapes.Polygon object at 0x000002198...
1,Ferry,Washington,53,19,53019,53,19,53019,0,0.0,...,0.317712,0.256158,0.360665,0.361928,0.36064,10.053476,2.6,10.079576,11.397059,<pysal.cg.shapes.Polygon object at 0x000002198...
2,Stevens,Washington,53,65,53065,53,65,53065,0,1.863863,...,0.21003,0.283999,0.394083,0.357566,0.369942,9.258437,5.6,6.812127,10.352015,<pysal.cg.shapes.Polygon object at 0x000002198...
3,Okanogan,Washington,53,47,53047,53,47,53047,0,2.61233,...,0.155922,0.25854,0.371218,0.38124,0.394519,9.0399,8.1,10.084926,12.84034,<pysal.cg.shapes.Polygon object at 0x000002198...
4,Pend Oreille,Washington,53,51,53051,53,51,53051,0,0.0,...,0.134605,0.243263,0.365614,0.358706,0.387848,8.24393,4.1,7.557643,10.313002,<pysal.cg.shapes.Polygon object at 0x000002198...


In [10]:
# dataframe のように使える
data_table.shape

(3085, 70)

In [11]:
# csv ファイルを読み込むときは pandas.read_csv() を使う
usjoin = pd.read_csv(csv_path)
#usjoin = ps.pdio.read_files(csv_path) #will not work, not a shp/dbf pair

In [12]:
usjoin.head()

Unnamed: 0,Name,STATE_FIPS,1929,1930,1931,1932,1933,1934,1935,1936,...,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009
0,Alabama,1,323,267,224,162,166,211,217,251,...,23471,24467,25161,26065,27665,29097,30634,31988,32819,32274
1,Arizona,4,600,520,429,321,308,362,416,462,...,25578,26232,26469,27106,28753,30671,32552,33470,33445,32077
2,Arkansas,5,310,228,215,157,157,187,207,247,...,22257,23532,23929,25074,26465,27512,29041,31070,31800,31493
3,California,6,991,887,749,580,546,603,660,771,...,32275,32750,32900,33801,35663,37463,40169,41943,42377,40902
4,Colorado,8,634,578,471,354,353,368,444,542,...,32949,34228,33963,34092,35543,37388,39662,41165,41719,40093


In [13]:
# pandas の groupby オブジェクトでグループごとのサンプル数を確認
data_table.groupby("STATE_NAME").size()

STATE_NAME
Alabama                  67
Arizona                  14
Arkansas                 75
California               58
Colorado                 63
Connecticut               8
Delaware                  3
District of Columbia      1
Florida                  67
Georgia                 159
Idaho                    44
Illinois                102
Indiana                  92
Iowa                     99
Kansas                  105
Kentucky                120
Louisiana                64
Maine                    16
Maryland                 24
Massachusetts            12
Michigan                 83
Minnesota                87
Mississippi              82
Missouri                115
Montana                  55
Nebraska                 93
Nevada                   17
New Hampshire            10
New Jersey               21
New Mexico               32
New York                 58
North Carolina          100
North Dakota             53
Ohio                     88
Oklahoma                 77
Oregon   

In [14]:
# pandas の groupby オブジェクトでグループごとの特徴量平均を確認
data_table.groupby("STATE_NAME").mean()

Unnamed: 0_level_0,STFIPS,COFIPS,FIPSNO,SOUTH,HR60,HR70,HR80,HR90,HC60,HC70,...,BLK80,BLK90,GI59,GI69,GI79,GI89,FH60,FH70,FH80,FH90
STATE_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,1.0,67.0,1067.0,1.0,9.664273,12.366532,13.671724,11.275698,4.855721,6.890547,...,27.870452,27.738164,0.358849,0.400859,0.38979,0.401957,15.04729,12.658209,14.865086,17.849054
Arizona,4.0,12.928571,4012.928571,0.0,5.832053,7.765373,9.966464,7.859769,5.761905,9.809524,...,1.582592,1.682458,0.292598,0.366815,0.372539,0.400891,11.614241,9.55,11.084694,14.534198
Arkansas,5.0,75.0,5075.0,1.0,5.632127,7.802951,9.332246,9.338339,1.613333,2.413333,...,15.01721,15.116454,0.367968,0.404964,0.391768,0.395373,11.821237,9.409333,10.913212,13.878436
California,6.0,58.0,6058.0,0.0,4.439738,5.978943,9.570683,6.785964,10.764368,25.54023,...,3.004658,3.518975,0.264841,0.35501,0.374018,0.370989,9.786059,9.068966,11.426153,13.720628
Colorado,8.0,63.0,8063.0,0.0,3.924235,2.998493,4.759384,4.967797,1.201058,1.94709,...,0.599398,0.89006,0.273781,0.356433,0.361891,0.366794,9.510066,7.466667,8.550721,10.603334
Connecticut,9.0,8.0,9008.0,0.0,1.505726,2.408195,3.368457,3.91988,5.0,13.208333,...,4.476262,5.411204,0.231879,0.314034,0.336589,0.320163,11.379777,9.4625,12.486749,13.965955
Delaware,10.0,3.0,10003.0,1.0,5.192247,7.738128,7.015415,6.789098,7.0,13.111111,...,17.17239,17.28123,0.280433,0.340442,0.363893,0.352808,11.074513,10.666667,13.83693,15.358368
District of Columbia,11.0,1.0,11001.0,1.0,10.471807,30.308317,29.608371,64.260999,80.0,229.333333,...,70.324736,65.843467,0.288736,0.395656,0.4496,0.419867,22.939671,25.2,35.746373,39.190808
Florida,12.0,67.0,12067.0,1.0,11.707512,13.466735,12.579635,11.616696,6.960199,13.024876,...,15.010553,13.894426,0.332605,0.397094,0.388199,0.381073,12.719785,10.786567,12.522228,14.187656
Georgia,13.0,161.490566,13161.490566,1.0,10.71711,16.489166,12.916989,12.399014,2.578616,5.085954,...,28.129618,27.413011,0.351579,0.389488,0.386992,0.390188,15.153917,13.02956,14.851524,18.43475


In [15]:
# アリゾナだけ
data_table.query('STATE_NAME == "Arizona"')

Unnamed: 0,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,STFIPS,COFIPS,FIPSNO,SOUTH,HR60,...,BLK90,GI59,GI69,GI79,GI89,FH60,FH70,FH80,FH90,geometry
1707,Navajo,Arizona,4,17,4017,4,17,4017,0,5.263989,...,0.905251,0.366863,0.414135,0.401999,0.445299,13.146998,12.1,13.762783,18.033782,<pysal.cg.shapes.Polygon object at 0x000002198...
1708,Coconino,Arizona,4,5,4005,4,5,4005,0,3.185449,...,1.469081,0.301222,0.377785,0.381655,0.403188,9.475171,8.5,11.181563,15.267643,<pysal.cg.shapes.Polygon object at 0x000002198...
1722,Mohave,Arizona,4,15,4015,4,15,4015,0,0.0,...,0.324075,0.279339,0.34715,0.37579,0.374383,11.508554,4.8,7.018268,9.214294,<pysal.cg.shapes.Polygon object at 0x000002198...
1726,Apache,Arizona,4,1,4001,4,1,4001,0,10.951223,...,0.162361,0.395913,0.450552,0.431013,0.489132,15.014738,14.6,18.727548,22.933635,<pysal.cg.shapes.Polygon object at 0x000002198...
2002,Yavapai,Arizona,4,25,4025,4,25,4025,0,3.458771,...,0.298011,0.289509,0.378195,0.376313,0.384089,9.930032,8.6,7.516372,9.483521,<pysal.cg.shapes.Polygon object at 0x000002198...
2182,Gila,Arizona,4,7,4007,4,7,4007,0,6.473749,...,0.246171,0.265294,0.337519,0.353848,0.386976,10.470261,8.1,9.934237,11.706102,<pysal.cg.shapes.Polygon object at 0x000002198...
2262,Maricopa,Arizona,4,13,4013,4,13,4013,0,6.179259,...,3.499221,0.277828,0.352374,0.366015,0.372756,10.642382,9.8,11.85726,14.404902,<pysal.cg.shapes.Polygon object at 0x000002198...
2311,Greenlee,Arizona,4,11,4011,4,11,4011,0,2.896284,...,0.34965,0.177691,0.257158,0.283518,0.337256,9.806115,6.7,5.29511,10.453284,<pysal.cg.shapes.Polygon object at 0x000002198...
2326,Graham,Arizona,4,9,4009,4,9,4009,0,4.746648,...,1.890487,0.310256,0.362926,0.383554,0.408379,11.979335,10.1,11.961367,16.129032,<pysal.cg.shapes.Polygon object at 0x000002198...
2353,Pinal,Arizona,4,21,4021,4,21,4021,0,13.82839,...,3.134586,0.304294,0.369974,0.361193,0.40013,10.822965,8.8,10.341699,15.304144,<pysal.cg.shapes.Polygon object at 0x000002198...


In [16]:
# アリゾナだけ
data_table[data_table.STATE_NAME == 'Arizona']

Unnamed: 0,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,STFIPS,COFIPS,FIPSNO,SOUTH,HR60,...,BLK90,GI59,GI69,GI79,GI89,FH60,FH70,FH80,FH90,geometry
1707,Navajo,Arizona,4,17,4017,4,17,4017,0,5.263989,...,0.905251,0.366863,0.414135,0.401999,0.445299,13.146998,12.1,13.762783,18.033782,<pysal.cg.shapes.Polygon object at 0x000002198...
1708,Coconino,Arizona,4,5,4005,4,5,4005,0,3.185449,...,1.469081,0.301222,0.377785,0.381655,0.403188,9.475171,8.5,11.181563,15.267643,<pysal.cg.shapes.Polygon object at 0x000002198...
1722,Mohave,Arizona,4,15,4015,4,15,4015,0,0.0,...,0.324075,0.279339,0.34715,0.37579,0.374383,11.508554,4.8,7.018268,9.214294,<pysal.cg.shapes.Polygon object at 0x000002198...
1726,Apache,Arizona,4,1,4001,4,1,4001,0,10.951223,...,0.162361,0.395913,0.450552,0.431013,0.489132,15.014738,14.6,18.727548,22.933635,<pysal.cg.shapes.Polygon object at 0x000002198...
2002,Yavapai,Arizona,4,25,4025,4,25,4025,0,3.458771,...,0.298011,0.289509,0.378195,0.376313,0.384089,9.930032,8.6,7.516372,9.483521,<pysal.cg.shapes.Polygon object at 0x000002198...
2182,Gila,Arizona,4,7,4007,4,7,4007,0,6.473749,...,0.246171,0.265294,0.337519,0.353848,0.386976,10.470261,8.1,9.934237,11.706102,<pysal.cg.shapes.Polygon object at 0x000002198...
2262,Maricopa,Arizona,4,13,4013,4,13,4013,0,6.179259,...,3.499221,0.277828,0.352374,0.366015,0.372756,10.642382,9.8,11.85726,14.404902,<pysal.cg.shapes.Polygon object at 0x000002198...
2311,Greenlee,Arizona,4,11,4011,4,11,4011,0,2.896284,...,0.34965,0.177691,0.257158,0.283518,0.337256,9.806115,6.7,5.29511,10.453284,<pysal.cg.shapes.Polygon object at 0x000002198...
2326,Graham,Arizona,4,9,4009,4,9,4009,0,4.746648,...,1.890487,0.310256,0.362926,0.383554,0.408379,11.979335,10.1,11.961367,16.129032,<pysal.cg.shapes.Polygon object at 0x000002198...
2353,Pinal,Arizona,4,21,4021,4,21,4021,0,13.82839,...,3.134586,0.304294,0.369974,0.361193,0.40013,10.822965,8.8,10.341699,15.304144,<pysal.cg.shapes.Polygon object at 0x000002198...


In [17]:
# PySAL polygon の経度が-119未満だけ
data_table[data_table.geometry.apply(lambda x: x.centroid[0] < -119)].head()

Unnamed: 0,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,STFIPS,COFIPS,FIPSNO,SOUTH,HR60,...,BLK90,GI59,GI69,GI79,GI89,FH60,FH70,FH80,FH90,geometry
3,Okanogan,Washington,53,47,53047,53,47,53047,0,2.61233,...,0.155922,0.25854,0.371218,0.38124,0.394519,9.0399,8.1,10.084926,12.84034,<pysal.cg.shapes.Polygon object at 0x000002198...
27,Whatcom,Washington,53,73,53073,53,73,53073,0,1.422131,...,0.508687,0.24763,0.346935,0.369436,0.358418,9.174415,7.1,9.718054,11.135022,<pysal.cg.shapes.Polygon object at 0x000002198...
31,Skagit,Washington,53,57,53057,53,57,53057,0,2.59656,...,0.351958,0.239346,0.34483,0.364623,0.362265,8.611518,7.9,10.480031,11.382484,<pysal.cg.shapes.Polygon object at 0x000002198...
42,Chelan,Washington,53,7,53007,53,7,53007,0,4.908698,...,0.15311,0.246292,0.367681,0.374505,0.383486,8.787907,8.1,9.968454,12.236493,<pysal.cg.shapes.Polygon object at 0x000002198...
44,Clallam,Washington,53,9,53009,53,9,53009,0,3.330891,...,0.568504,0.240573,0.34932,0.361619,0.366854,8.788882,6.5,9.6609,12.28169,<pysal.cg.shapes.Polygon object at 0x000002198...


In [18]:
# PySAL polygon の経度が-119未満だけカウント
len(data_table[data_table.geometry.apply(lambda x: x.centroid[0] < -119)])

109