介绍地理信息数据处理过程中使用的基本库以及使用demo 
- urllib
- minidom:适合处理<20M的文件，过大的话会有效率问题
- zipfile
- tarfile
- ElementTree: 更高级的xml解析器，有多语言版本，cElementTree效率更加高效
- BeautifulSoup: 绝大多数xml文档没有按照规范编写，并且包含许多的格式错误以及无效字符串，在地理空间分析中这种情况更为明显，所以需要一款解析器来获取问题格式文件中的数据,beautifulsoup 可以优雅的使用那些基于标签的数据，即使他们格式非常糟糕
- lxml: 功能更加强大的xml解析库
- shapely: 读写wkt、wkb文本

    wkb(well-know binary):将wkt格式的字符串以二进制对象的形式存储在数据库中
- json/geojson : 处理geojson 数据；虽然可以通过eval(jsondata)直接将jsondata转换为python中的数据结构，但是这是不安全的，因为json支持一些python不支持的字符集，而不单单像看上去的那样是python字典和list对象的嵌套组合
- pyshp: 读写shapefile;只调用python标准库
- dbfpy3:专门处理dbf文件,安装


In [5]:
%config Completer.use_jedi = False

In [3]:
from osgeo import gdal
gdal.__version__

'3.6.2'

In [None]:
import urllib.request
import urllib.parse
import urllib.error


url="https://github.com/GeospatialPython/Learn/raw/master/hancock.zip"
filename="hancock.zip"
urllib.request.urlretrieve(url,filename)


import zipfile
zip=open("hancock.zip", "rb")
zipShape=zipfile.ZipFile(zip)
shpname,shxName,dbName= zipShape.namelist()
for filename in zipShape.namelist():
    out= open(filename,"wb")
    out.write(zipShape.read(filename))
    out.close()

In [None]:
import urllib.request
import urllib.parse
import urllib.error
import zipfile
import io
import struct

# 直接读取线上压缩包，无需下载到磁盘，内存中直接读取
url="https://github.com/GeospatialPython/Learn/raw/master/hancock.zip"
cloudshape=urllib.request.urlopen(url)
memoryshape= io.BytesIO(cloudshape.read())
zipshape= zipfile.ZipFile(memoryshape)
cloudshape= zipshape.read("hancock.shp")
struct.unpack("<dddd",cloudshape[36:68])



In [2]:
from xml.dom import minidom
kml= minidom.parse("time-stamp-point.kml")
placemarks=kml.getElementsByTagNamentsByTagName("Placemark")
len(placemarks)

first=placemarks[0]
first.toprettyxml()

coordinates= first.getElementsByTagName("coordinates")
point=coordinates[0].firstChild.data
point


'-122.536226,37.86047,0'

In [27]:
try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET


tree=ET.ElementTree(file="time-stamp-point.kml")

ns="{http://www.opengis.net/kml/2.2}"
placemarks= tree.find(".//{}Placemark".format(ns))
print(placemarks)
coordinates= placemarks.find("./{}Point/{}coordinates".format(ns,ns))
print(coordinates)
coordinates.text

# 构建xml
root=ET.Element("kml")
root.attrib["xmlns"]="http://www.opengis.net/kml/2.2"
placemark= ET.SubElement(root,"Placemark")
office= ET.SubElement(placemark,"name")
office.text="Office"

point= ET.SubElement(placemark,"Point")
coordinates= ET.SubElement(point,"coordinates")
coordinates.text="-122.536226,37.86047,0"

tree=ET.ElementTree(root)
tree.write("placemark.kml",xml_declaration=True,encoding="utf-8",method="xml")


<Element '{http://www.opengis.net/kml/2.2}Placemark' at 0x7f94b2bc1590>
<Element '{http://www.opengis.net/kml/2.2}coordinates' at 0x7f94b2bc1cc0>


In [33]:
from bs4 import  BeautifulSoup
import xml.etree.ElementTree as ET

#broken_data.gpx 文件最后缺失了一个</trkseg> 闭合标签
#ET.ElementTree(file="broken_data.gpx") # 报错

gpx=open("broken_data.gpx")
soup=BeautifulSoup(gpx.read(),features="xml")
soup.trkpt
tracks=soup.findAll("trkpt")
len(tracks)

# 重新输出正确的格式
fixed=open("fix_data.gpx","w")
fixed.write(soup.prettify())
fixed.close()



In [2]:
import shapely.wkt
import urllib.request
wktpoly="POLYGON((103.30266237500007 35.293222467212644,105.45598268750007 35.293222467212644,104.70891237500007 31.62819408858728,100.88567018750007 33.29662370215108,103.30266237500007 35.293222467212644))"
poly= shapely.wkt.loads(wktpoly)
poly.area

poly.wkt
poly.wkb

from osgeo  import ogr
shape=ogr.Open("polygon.shp")
layer= shape.GetLayer()
feature= layer.GetNextFeature()
geom= feature.GetGeometryRef()
wkt=geom.ExportToWkt()
wkt

poly=ogr.CreateGeometryFromWkt(wkt)
poly.GetEnvelope()


AttributeError: 'NoneType' object has no attribute 'Getlayer'

In [15]:
# geojson模块实现了python的__geo_interface__规范：该接口允许协作程序之间无缝传递数据，并且以python风格进行无需developer显式导入导出geojson.
# 所以如果想将geojson模块创建的点提供给shapely模块使用，只需直接读取geojson module 创建的点对象到shapely对象即可，之后就能够获取其wkt格式的数据。
import geojson
p=geojson.Point([-92,37])
geojs=geojson.dumps(p)
geojs



import shapely.geometry 
point= shapely.geometry.shape(p)
point.wkt

'POINT (-92 37)'

In [3]:
import shapefile
shp=shapefile.Reader("point/point.shp")
for feature in shp.shapeRecords():
    point=feature.shape.points[0]
    rec=feature.record[0]
    print(point[0],point[0],rec)

1.0 1.0 First
3.0 3.0 Second
4.0 4.0 Third
2.0 2.0 Fourth
0.0 0.0 Appended


In [8]:
import dbfpy3.dbf
db= dbfpy3.dbf.Dbf("GIS_CensusTract/GIS_CensusTract_poly.dbf")
print(db[0])

ValueError: dbf fields definition is corrupt, fields start does not match.