In [1]:
import sqlite3 as lite
import pandas as pd

In [2]:
pd.options.display.max_colwidth = 200

In [3]:
db = lite.connect('./products.db')

In [7]:
query = """
SELECT name
FROM
    sqlite_master
WHERE
    type='table'
;
"""
for table in pd.read_sql(query, db)["name"]:
    print(table, list(pd.read_sql("SELECT * FROM {};".format(table), db)))

Categories ['CategoryID', 'CategoryName', 'Description']
Customers ['CustomerID', 'CustomerName', 'ContactName', 'Address', 'City', 'PostalCode', 'Country']
Employees ['EmployeeID', 'LastName', 'FirstName', 'BirthDate', 'Photo', 'Notes']
Shippers ['ShipperID', 'ShipperName', 'Phone']
Suppliers ['SupplierID', 'SupplierName', 'ContactName', 'Address', 'City', 'PostalCode', 'Country', 'Phone']
Orders ['OrderID', 'CustomerID', 'EmployeeID', 'OrderDate', 'ShipperID']
OrderDetails ['OrderDetailID', 'OrderID', 'ProductID', 'Quantity']
Products ['ProductID', 'ProductName', 'SupplierID', 'CategoryID', 'Unit', 'Price']


In [9]:
query = """
SELECT *
FROM
    OrderDetails
;
"""
pd.read_sql(query, db).head()

Unnamed: 0,OrderDetailID,OrderID,ProductID,Quantity
0,1,10248,11,12
1,2,10248,42,10
2,3,10248,72,5
3,4,10249,14,9
4,5,10249,51,40


In [10]:
query = """
SELECT *
FROM
    OrderDetails D
    JOIN
        Products P
        ON
            P.ProductID = D.ProductID
;
"""
pd.read_sql(query, db).head()

Unnamed: 0,OrderDetailID,OrderID,ProductID,Quantity,ProductID.1,ProductName,SupplierID,CategoryID,Unit,Price
0,1,10248,11,12,11,Queso Cabrales,5,4,1 kg pkg.,21.0
1,2,10248,42,10,42,Singaporean Hokkien Fried Mee,20,5,32 - 1 kg pkgs.,14.0
2,3,10248,72,5,72,Mozzarella di Giovanni,14,4,24 - 200 g pkgs.,34.8
3,4,10249,14,9,14,Tofu,6,7,40 - 100 g pkgs.,23.25
4,5,10249,51,40,51,Manjimup Dried Apples,24,7,50 - 300 g pkgs.,53.0


In [4]:
# JOIN 단위 별로 묶음 괄호 추가
query = """
SELECT 
    substr(O.OrderDate,1,7) "MONTH", D.Quantity, P.Price, O.OrderDate
FROM
    ((OrderDetails D
    JOIN
        Products P
        ON
            P.ProductID = D.ProductID)
    JOIN
        Orders O
        ON
            O.OrderID = D.OrderID)
;
"""
pd.read_sql(query, db).head()

Unnamed: 0,MONTH,Quantity,Price,OrderDate
0,1996-07,12,21.0,1996-07-04
1,1996-07,10,14.0,1996-07-04
2,1996-07,5,34.8,1996-07-04
3,1996-07,9,23.25,1996-07-05
4,1996-07,40,53.0,1996-07-05


In [5]:
query = """
SELECT 
    substr(O.OrderDate,1,7) "MONTH", D.Quantity, P.Price, O.OrderDate
FROM
    ((OrderDetails D
    JOIN
        Products P
        ON
            P.ProductID = D.ProductID)
    JOIN
        Orders O
        ON
            O.OrderID = D.OrderID)
    GROUP BY
        substr(O.OrderDate,1,7)
;
"""
pd.read_sql(query, db).head()

Unnamed: 0,MONTH,Quantity,Price,OrderDate
0,1996-07,20,34.8,1996-07-31
1,1996-08,6,7.75,1996-08-30
2,1996-09,20,18.0,1996-09-30
3,1996-10,15,18.0,1996-10-31
4,1996-11,35,33.25,1996-11-29


In [6]:
query = """
SELECT 
    substr(O.OrderDate,1,7) "MONTH", D.Quantity, P.Price, O.OrderDate,
    SUM(D.Quantity) 'SalesCount'
FROM
    ((OrderDetails D
    JOIN
        Products P
        ON
            P.ProductID = D.ProductID)
    JOIN
        Orders O
        ON
            O.OrderID = D.OrderID)
    GROUP BY
        substr(O.OrderDate,1,7)
;
"""
pd.read_sql(query, db).head()

Unnamed: 0,MONTH,Quantity,Price,OrderDate,SalesCount
0,1996-07,20,34.8,1996-07-31,1462
1,1996-08,6,7.75,1996-08-30,1322
2,1996-09,20,18.0,1996-09-30,1124
3,1996-10,15,18.0,1996-10-31,1738
4,1996-11,35,33.25,1996-11-29,1735


In [7]:
query = """
SELECT 
    substr(O.OrderDate,1,7) "MONTH", D.Quantity, P.Price, O.OrderDate,
    SUM(D.Quantity) 'SalesCount',
    SUM(D.Quantity * P.Price) 'Revenue'
FROM
    ((OrderDetails D
    JOIN
        Products P
        ON
            P.ProductID = D.ProductID)
    JOIN
        Orders O
        ON
            O.OrderID = D.OrderID)
    GROUP BY
        substr(O.OrderDate,1,7)
;
"""
pd.read_sql(query, db).head()

Unnamed: 0,MONTH,Quantity,Price,OrderDate,SalesCount,Revenue
0,1996-07,20,34.8,1996-07-31,1462,37779.85
1,1996-08,6,7.75,1996-08-30,1322,33285.49
2,1996-09,20,18.0,1996-09-30,1124,34565.6
3,1996-10,15,18.0,1996-10-31,1738,51528.69
4,1996-11,35,33.25,1996-11-29,1735,62163.99


In [8]:
query = """
SELECT 
    substr(O.OrderDate,1,7) "MONTH", D.Quantity, P.Price, O.OrderDate,
    SUM(D.Quantity) 'SalesCount',
    SUM(D.Quantity * P.Price) 'Revenue',
    ROUND(AVG(D.Quantity * P.Price),2) 'Avg'
FROM
    ((OrderDetails D
    JOIN
        Products P
        ON
            P.ProductID = D.ProductID)
    JOIN
        Orders O
        ON
            O.OrderID = D.OrderID)
    GROUP BY
        substr(O.OrderDate,1,7)
;
"""
pd.read_sql(query, db).head()

Unnamed: 0,MONTH,Quantity,Price,OrderDate,SalesCount,Revenue,Avg
0,1996-07,20,34.8,1996-07-31,1462,37779.85,640.34
1,1996-08,6,7.75,1996-08-30,1322,33285.49,482.4
2,1996-09,20,18.0,1996-09-30,1124,34565.6,606.41
3,1996-10,15,18.0,1996-10-31,1738,51528.69,705.87
4,1996-11,35,33.25,1996-11-29,1735,62163.99,941.88


In [9]:
query = """
SELECT 
    substr(O.OrderDate,1,7) "MONTH", D.Quantity, P.Price, O.OrderDate,
    SUM(D.Quantity) 'SalesCount',
    SUM(D.Quantity * P.Price) 'Revenue',
    ROUND(AVG(D.Quantity * P.Price),2) 'Avg'
FROM
    ((OrderDetails D
    JOIN
        Products P
        ON
            P.ProductID = D.ProductID)
    JOIN
        Orders O
        ON
            O.OrderID = D.OrderID)
    GROUP BY
        substr(O.OrderDate,1,7)
    HAVING
        D.Quantity > 20
;
"""
pd.read_sql(query, db)

Unnamed: 0,MONTH,Quantity,Price,OrderDate,SalesCount,Revenue,Avg
0,1996-11,35,33.25,1996-11-29,1735,62163.99,941.88
1,1997-01,40,7.45,1997-01-31,2401,83400.47,981.18


## noSQL

In [37]:
import requests
from pymongo import MongoClient

In [42]:
mongo_uri = "mongodb://strongadmin:admin1234@ds135844.mlab.com:35844/mydbinstance"

In [43]:
client = MongoClient(mongo_uri)

In [46]:
client.mydbinstance.collection_names()

['system.indexes', 'users']

In [48]:
db = client.mydbinstance

In [50]:
db["users"] #이렇게 하실 수도 있습니다. db.users

Collection(Database(MongoClient(host=['ds135844.mlab.com:35844'], document_class=dict, tz_aware=False, connect=True), 'mydbinstance'), 'users')

In [51]:
users_collection = db.users

In [52]:
# INSERT INTO users(name,email) VALUES(a,b);
some_user = {
    "name": "Fastcampus Kim",
    "email": "fckim@fastcampus.co.kr",
}

users_collection.insert_one(some_user)

<pymongo.results.InsertOneResult at 0x10d4b3448>

In [53]:
# SELECT * FROM users;
query = {}
users_collection.find_one(query)

{'_id': ObjectId('5c398d81dabf947b0839b872'),
 'name': 'Fastcampus Kim',
 'email': 'fckim@fastcampus.co.kr'}

In [54]:
lots_of_users = [
    {
        "name": "jyp",
        "email": "jyp@fastcampus.co.kr",
    },
    {
        "name": "gdragoon",
        "address": "Seoul, Korea",
    },
    {
        "name": "Sbucks"
    },
]

users_collection.insert_many(lots_of_users)

<pymongo.results.InsertManyResult at 0x10d4b3e08>

In [55]:
query = {}
users_collection.find_one(query)

{'_id': ObjectId('5c398d81dabf947b0839b872'),
 'name': 'Fastcampus Kim',
 'email': 'fckim@fastcampus.co.kr'}

In [57]:
query = {}
list(users_collection.find(query))

[{'_id': ObjectId('5c398d81dabf947b0839b872'),
  'name': 'Fastcampus Kim',
  'email': 'fckim@fastcampus.co.kr'},
 {'_id': ObjectId('5c39935fdabf947b0839b873'),
  'name': 'jyp',
  'email': 'jyp@fastcampus.co.kr'},
 {'_id': ObjectId('5c39935fdabf947b0839b874'),
  'name': 'gdragoon',
  'address': 'Seoul, Korea'},
 {'_id': ObjectId('5c39935fdabf947b0839b875'), 'name': 'Sbucks'}]

In [62]:
[item["name"] for item in users_collection.find(query)]

['Fastcampus Kim', 'jyp', 'gdragoon', 'Sbucks']

In [64]:
# WHERE name = "JYP"
query = {
    "name":"jyp",
}
list(users_collection.find(query))

[{'_id': ObjectId('5c39935fdabf947b0839b873'),
  'name': 'jyp',
  'email': 'jyp@fastcampus.co.kr'}]

In [65]:
# WHERE name IN ('jyp','Sbucks')
query = {
    "name":{
        "$in":['jyp','Sbucks']
    }
}
list(users_collection.find(query))

[{'_id': ObjectId('5c39935fdabf947b0839b873'),
  'name': 'jyp',
  'email': 'jyp@fastcampus.co.kr'},
 {'_id': ObjectId('5c39935fdabf947b0839b875'), 'name': 'Sbucks'}]

In [66]:
# TODO: mlab에서 bigbang이라는 collection을 추가한 뒤, 아래의 문장 실행
# zigb... 에 접속해 원하는 동네의 매물 정보에 접근해 추가
# requests.get 을 이용하시면 되겠죠?

# Step 1 assign collection named "bigbang_collection"
bigbang_collection = db.bigbang

In [68]:
list(bigbang_collection.find({}))

[]

In [69]:
# Step 2 get real estate data from zig...

response = requests.get("https://apis.zigbang.com/v3/items?detail=true&item_ids=[14398188,14388831,14359880,14470986,14398265,14399002,14454456,14359830,14455741,14398108,14407326,14538654,14271650,14169764,14317786,14360392,14284983,14455772,14507858,14328611,14324683,14455800,14472148,14321635,14397994,14062510,14398156,14164631,14398229,14512053,14454383,14511262,14536531,14269988,14187437,14467241,14448306,14226360,14467013,14317887,14513370,13416109,14165519,14263914,14222733,14254448,14467099,14203858,14328661,14241599,14407024,14138253,14527248,14175178,14147978,14328791,14178200,14348743,14428203,14218025]")

In [74]:
data = response.json()['items']

In [75]:
# Step 3 insert into bigbang_collection
# SELECT count(*) FROM bigbang;
bigbang_collection.insert_many(data)

bigbang_collection.find({}).count()

60

In [91]:
# 보증금이 2000 이하 이거나 월세가 50 이하인 매물 검색 from bigbang_collection
# rent: 월세, deposit: 보증금
query = {
    "$or":[
    {
        "item.deposit":{
        "$lte":2000,
        "$gte":100,
        }
    },
        {"item.rent":{
        "$lte": 50,
        }
    }
    ]
}

len(list(bigbang_collection.find(query)))

55

In [101]:
# naver realtime keyword
# with beautifulsoup4
from bs4 import BeautifulSoup
from time import ctime


html = requests.get('https://www.naver.com/').text
executed_time = ctime() # value1

soup = BeautifulSoup(html, 'html.parser')
kw_list = soup.find("ul", attrs={"class":"ah_l"})
all_li = kw_list.find_all("span", attrs={"class":"ah_k"})
result = [] # value2
for li in all_li:
    result.append(li.text) 

#result = ['','','']
#{"executed": "2019-01-12 17:25:00",
# "keywords": ["스카이캐슬..",""] }
nv_collection = db.nvkwlist

data = {
    "executed": executed_time,
    "keywords": result,
}

nv_collection.insert_one(data)

<pymongo.results.InsertOneResult at 0x10d4b3888>

In [102]:
nv_collection.find_one({})

{'_id': ObjectId('5c39a65cdabf947b0839b8b2'),
 'executed': 'Sat Jan 12 17:33:32 2019',
 'keywords': ['스카이캐슬 스포',
  '주진모',
  '몽키킹3',
  '어뜨무러차',
  '박소연',
  '베트남 이란',
  '음악중심',
  '자이글 롤링쿡스',
  '모아나',
  '청파동 냉면집',
  '만물상아귀찜',
  '신애라',
  '허안나',
  '황교안',
  '미세먼지 비상저감조치',
  '한국 중국',
  '케어',
  '말리와 나',
  'sky 캐슬 스포',
  '쇼음악중심']}

# 숙제

## SQL

- products.db에 대해
    - Employees 의 월별 생일의 분포를 구하세요
    - OrderDetails 의 OrderID 별 주문 금액의 합을 구하세요
    
## noSQL

- 앞서 저장한 매물정보에서
    - 4층 이상인 건물 중 3층 이상인 매물의 목록을 구하세요
    - 면적이 33 이상이거나, 조회수가 50 이상인 매물의 목록을 구하세요