# 02. XML로 변환하기
- yes24의 IT 신간 페이지 http://www.yes24.com/24/Category/NewProductList/001001003?sumGb=04

## 데이터베이스를 만들고 테이블 등록하기

### 데이터베이스 만들기
- mysql에 접속(MySQL Shell에서 `\connect root@localhost:3306`, `\sql` 차례로 입력)
- 데이터베이스 생성(book_db)

```mysql
CREATE DATABASE book_db DEFAULT CHARACTER SET utf8;
use book_db;
```

### 데이터베이스에 테이블 등록하기
```mysql
CREATE TABLE `languages` (
    `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
    `name` varchar(8) NOT NULL DEFAULT '',
    `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8mb4;
```

```mysql
INSERT INTO `languages` (`id`, `name`)
VALUES
    (1, '한국어'),
    (2, '영어');
```

### 출판사를 저장할 테이블 추가하기
```mysql
CREATE TABLE `publishers` (
    `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
    `name` varchar(128) NOT NULL DEFAULT '',
    `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
```

```mysql
INSERT INTO `publishers` (`id`, `name`)
VALUES
    (1, '위키북스'),
    (2, '한빛미디어'),
    (3, 'Addison-Wesley');
```

### 도서를 저장할 테이블 만들기
- id=ISBN으로 AUTO_INCREMENT를 지정하지 않음

```mysql
CREATE TABLE `books` (
    `id` int(11) unsigned NOT NULL,
    `publisher_id` int(11) NOT NULL,
    `title` varchar(128) NOT NULL DEFAULT '',
    `language_id` int(11) NOT NULL,
    `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
```

```mysql
INSERT INTO `books` (`id`, `title`, `publisher_id`, `language_id`)
VALUES
    (34973284, 'HTML5 웹 프로그래밍 입문', 2, 1),
    (57556147, 'Hello Coding 파이썬', 2, 1),
    (71051687, '파이썬을 이용한 머신러닝, 딥러닝 실전 앱 개발', 1, 1),
    (32604814, 'The Art of Computer Programming 5', 3, 2);
```

## XML 사용하기
- 트리 구조로 되어있으며 RSS, Atom 등의 피드로도 사용됨

## Orator 사용하기
- `pip install mysqlclient orator`

### XML 만들기
- `logger = logging.getLogger('orator.connection.queries')` : Orator 쿼리 로거 이름을 전달
- `publishers.load('books', 'books.language')` : 관련된 데이터를 먼저 읽어 두는 처리로, 반복문 내부에서 데이터가 호출될 때마다 쿼리 발행하는 것을 막음

In [1]:
import xml.etree.ElementTree as ET
from xml.dom import minidom
import logging

from orator import DatabaseManager, Model
from orator.orm import belongs_to, has_many

# Orator가 어떤 SQL을 실행하는지 로그로 출력해서 확인하기
logger = logging.getLogger('orator.connection.queries')
logger.setLevel(logging.DEBUG)

formatter = logging.Formatter(
    'It took %(elapsed_time)sms to execute the query %(query)s')

handler = logging.StreamHandler()
handler.setFormatter(formatter)

logger.addHandler(handler)

# MySQL 접속 설정
config = {
    'mysql':{
        'driver':'mysql',
        'host':'localhost',
        'database':'book_db',
        'user':'root',
        'password':'3756',
        'prefix':'',
        'log_queries':True,
    }
}

db = DatabaseManager(config)
Model.set_connection_resolver(db)

# 각 테이블과 객체의 관계성 정의
# 클래스 이름이 소문자 또는 스네이크 케이스로 변경돼 테이블의 이름과 대응됨
class Language(Model):
    """언어의 종류"""
    pass

class Book(Model):
    """도서"""
    # books 테이블의 language_id에 해당하는 데이터 가져오기
    @belongs_to
    def language(self):
        """책의 언어"""
        return Language
    
    #publishers 테이블의 publisher_id에 해당하는 데이터 가져오기
    @belongs_to
    def publisher(self):
        """책의 출판사"""
        return Publisher

class Publisher(Model):
    """출판사"""
    # 하나의 출판사에는 여러 도서가 들어갈 수 있음
    @has_many
    def books(self):
        """출판사의 도서들"""
        return Book
    
# 변환 함수
def create_xml():
    """XML 만들기"""
    elm_root = ET.Element("catalog")
    publishers = Publisher.all()
    publishers.load('books', 'books.language') # Eager Loading
    for publisher in publishers:
        for book in publisher.books:
            elm_book = ET.SubElement(elm_root, "book", id=str(book.id))
            ET.SubElement(elm_book, "publisher", id=str(publisher.id)).text = publisher.name
            ET.SubElement(elm_book, "title").text = book.title
            ET.SubElement(elm_book, "language", id=str(book.language.id)).text = book.language.name
    
    with minidom.parseString(ET.tostring(elm_root, 'utf-8')) as dom:
        return dom.toprettyxml(indent='    ')
    
if __name__ == '__main__':
    xml_str = create_xml()
    print(xml_str)

It took 1.0ms to execute the query ('SELECT * FROM `publishers`', [])
It took 0.0ms to execute the query ('SELECT * FROM `books` WHERE `books`.`publisher_id` IN (%s, %s, %s)', [1, 2, 3])
It took 0.0ms to execute the query ('SELECT * FROM `languages` WHERE `languages`.`id` IN (%s, %s)', [2, 1])


<?xml version="1.0" ?>
<catalog>
    <book id="71051687">
        <publisher id="1">위키북스</publisher>
        <title>파이썬을 이용한 머신러닝, 딥러닝 실전 앱 개발</title>
        <language id="1">한국어</language>
    </book>
    <book id="34973284">
        <publisher id="2">한빛미디어</publisher>
        <title>HTML5 웹 프로그래밍 입문</title>
        <language id="1">한국어</language>
    </book>
    <book id="57556147">
        <publisher id="2">한빛미디어</publisher>
        <title>Hello Coding 파이썬</title>
        <language id="1">한국어</language>
    </book>
    <book id="32604814">
        <publisher id="3">Addison-Wesley</publisher>
        <title>The Art of Computer Programming 5</title>
        <language id="2">영어</language>
    </book>
</catalog>



In [4]:
# 변환 함수2
def create_xml2():
    """XML 만들기"""
    elm_root = ET.Element("catalog")
    publishers = Publisher.all()
    publishers.load('books', 'books.language') # Eager Loading
    for publisher in publishers:
        elm_publisher = ET.SubElement(elm_root, "publisher", id=str(publisher.id))
        for book in publisher.books:
            elm_book = ET.SubElement(elm_publisher, "book", id=str(book.id))
            ET.SubElement(elm_book, "title").text = book.title
            ET.SubElement(elm_book, "language", id=str(book.language.id)).text = book.language.name
    
    with minidom.parseString(ET.tostring(elm_root, 'utf-8')) as dom:
        return dom.toprettyxml(indent='    ')
    
if __name__ == '__main__':
    xml_str = create_xml2()
    print(xml_str)

It took 1.0ms to execute the query ('SELECT * FROM `publishers`', [])
It took 1.0ms to execute the query ('SELECT * FROM `books` WHERE `books`.`publisher_id` IN (%s, %s, %s)', [1, 2, 3])
It took 0.0ms to execute the query ('SELECT * FROM `languages` WHERE `languages`.`id` IN (%s, %s)', [2, 1])


<?xml version="1.0" ?>
<catalog>
    <publisher id="1">
        <book id="71051687">
            <title>파이썬을 이용한 머신러닝, 딥러닝 실전 앱 개발</title>
            <language id="1">한국어</language>
        </book>
    </publisher>
    <publisher id="2">
        <book id="34973284">
            <title>HTML5 웹 프로그래밍 입문</title>
            <language id="1">한국어</language>
        </book>
        <book id="57556147">
            <title>Hello Coding 파이썬</title>
            <language id="1">한국어</language>
        </book>
    </publisher>
    <publisher id="3">
        <book id="32604814">
            <title>The Art of Computer Programming 5</title>
            <language id="2">영어</language>
        </book>
    </publisher>
</catalog>



# 03. JSON으로 변환하기

## JSON 사용하기
- 자바스크립트의 객체를 작성할 때 사용하는 형식으로 XML처럼 트리 구조를 가짐
- XML과 다르게 모두 키와 값으로 표현되며 별도의 속성값을 부여할 수 없음
- 작성 방법이 굉장히 간단하고 프로그램이 표준으로 가진 자료 구조로 변환하기 쉬워서 쉽게 사용할 수 있음

In [7]:
import json

# 변환 함수
def create_json():
    """JSON 만들기"""
    books = []
    publishers = Publisher.all()
    publishers.load('books', 'books.language') # Eager Loading
    for publisher in publishers:
        for book in publisher.books:
            d = {}
            d['id'] = book.id
            d['title'] = book.title
            d['publisher'] = {'id':publisher.id, 'name':publisher.name}
            d['language'] = {'id':book.language.id, 'name':book.language.name}
            
            books.append(d)
    
    return json.dumps(books, ensure_ascii=False, indent=4)

if __name__ == '__main__':
    json_str = create_json()
    print(json_str)

It took 0.0ms to execute the query ('SELECT * FROM `publishers`', [])
It took 0.0ms to execute the query ('SELECT * FROM `books` WHERE `books`.`publisher_id` IN (%s, %s, %s)', [1, 2, 3])
It took 0.0ms to execute the query ('SELECT * FROM `languages` WHERE `languages`.`id` IN (%s, %s)', [2, 1])


[
    {
        "id": 71051687,
        "title": "파이썬을 이용한 머신러닝, 딥러닝 실전 앱 개발",
        "publisher": {
            "id": 1,
            "name": "위키북스"
        },
        "language": {
            "id": 1,
            "name": "한국어"
        }
    },
    {
        "id": 34973284,
        "title": "HTML5 웹 프로그래밍 입문",
        "publisher": {
            "id": 2,
            "name": "한빛미디어"
        },
        "language": {
            "id": 1,
            "name": "한국어"
        }
    },
    {
        "id": 57556147,
        "title": "Hello Coding 파이썬",
        "publisher": {
            "id": 2,
            "name": "한빛미디어"
        },
        "language": {
            "id": 1,
            "name": "한국어"
        }
    },
    {
        "id": 32604814,
        "title": "The Art of Computer Programming 5",
        "publisher": {
            "id": 3,
            "name": "Addison-Wesley"
        },
        "language": {
            "id": 2,
            "name": "영어"
        }
    }
]


# 04. CSV로 변환하기
- CSV 값 내부에 쉼표가 있을 수 있기 때문에 큰따옴표로 감싸며, 값에 큰따옴표가 있다면 큰따옴표를 두 번 입력하여 이스케이프 처리

## CSV 모듈 사용하기

In [9]:
import csv
import io

def create_csv():
    """CSV 만들기"""
    output = io.StringIO()
    csv_writer = csv.writer(output, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
    header = ['id', 'title', 'url', 'publisher_id', 'publisher_name', 'language_id', 'language_name']
    csv_writer.writerow(header)
    
    publishers = Publisher.all()
    publishers.load('books', 'books.language') # Eager Loading
    for publisher in publishers:
        for book in publisher.books:
            line = [
                book.id,
                book.title,
                book.publisher.id,
                book.publisher.name,
                book.language.id,
                book.language.name,
            ]
            csv_writer.writerow(line)
    return output.getvalue()

if __name__ == '__main__':
    csv_str = create_csv()
    print(csv_str)

It took 0.0ms to execute the query ('SELECT * FROM `publishers`', [])
It took 0.0ms to execute the query ('SELECT * FROM `books` WHERE `books`.`publisher_id` IN (%s, %s, %s)', [1, 2, 3])
It took 0.99ms to execute the query ('SELECT * FROM `languages` WHERE `languages`.`id` IN (%s, %s)', [2, 1])
It took 0.0ms to execute the query ('SELECT * FROM `publishers` WHERE `publishers`.`id` = %s LIMIT 1', [1])
It took 0.0ms to execute the query ('SELECT * FROM `publishers` WHERE `publishers`.`id` = %s LIMIT 1', [2])
It took 1.0ms to execute the query ('SELECT * FROM `publishers` WHERE `publishers`.`id` = %s LIMIT 1', [2])
It took 1.0ms to execute the query ('SELECT * FROM `publishers` WHERE `publishers`.`id` = %s LIMIT 1', [3])


"id","title","url","publisher_id","publisher_name","language_id","language_name"
"71051687","파이썬을 이용한 머신러닝, 딥러닝 실전 앱 개발","1","위키북스","1","한국어"
"34973284","HTML5 웹 프로그래밍 입문","2","한빛미디어","1","한국어"
"57556147","Hello Coding 파이썬","2","한빛미디어","1","한국어"
"32604814","The Art of Computer Programming 5","3","Addison-Wesley","2","영어"

