In [2]:
import requests
import os
from dotenv import load_dotenv
from pprint import pprint
import pandas as pd
import json

# .env 파일에서 환경 변수 로드
load_dotenv()

# 환경 변수에서 값 읽기
client_id = os.getenv("CLIENT_ID")  # .env 파일의 NAVER_CLIENT_ID
client_secret = os.getenv("CLIENT_SECRET")  # .env 파일의 NAVER_CLIENT_SECRET

headers = {
    'X-Naver-Client-Id': client_id,
    'X-Naver-Client-Secret': client_secret,
}

In [3]:

def search_books(query, display=50): 
    # query string 문자열을 dict 선언
    payload = {
        'query': query,
        'display': display,
        'sort': 'sim'
    }

    url = 'https://openapi.naver.com/v1/search/book.json' #?query=파이썬&display=100&sort=sim

    # requests get(url, params, headers) 요청 
    res = requests.get(url, params=payload, headers=headers)
    # json() 함수로 응답 결과 가져오기
    items_data = res.json()['items']

    return items_data  #[{},{}]

# def save_json(items_data):
#     with open('../data/books.json','w',encoding='utf-8') as file:
#         json.dump(items_data, file)

def search_shops(query, display=50): 
    # query string 문자열을 dict 선언
    payload = {
        'query': query,
        'display': display,
        'sort': 'sim'
    }

    url = 'https://openapi.naver.com/v1/search/shop.json' #?query=파이썬&display=100&sort=sim

    res = requests.get(url, params=payload, headers=headers)
    # json() 함수로 응답 결과 가져오기
    items_data = res.json()['items']
    return items_data

def save_json(items_data):
    with open('../data/books.json','w',encoding='utf-8') as file:
        json.dump(items_data, file)

def save_json_shop(items_data):
    with open('../data/shops.json','w',encoding='utf-8') as file:
        json.dump(items_data, file)


if __name__ == '__main__':
    save_json(search_books('파이썬'))        
    save_json_shop(search_shops('가디건'))

### 리팩토링 된 코드

In [4]:
import requests
import os
from dotenv import load_dotenv
import json

# .env 파일에서 환경 변수 로드
load_dotenv()

client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

headers = {
    'X-Naver-Client-Id': client_id,
    'X-Naver-Client-Secret': client_secret,
}


def search_naver_api(endpoint, query, display=50):
    """네이버 API 검색 함수"""
    payload = {
        'query': query,
        'display': display,
        'sort': 'sim'
    }
    url = f'https://openapi.naver.com/v1/search/{endpoint}.json'
    res = requests.get(url, params=payload, headers=headers)
    res.raise_for_status()  # 에러 발생 시 예외 처리
    return res.json().get('items', [])


def save_json(data, filepath):
    """JSON 파일 저장 함수"""
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)


if __name__ == '__main__':
    books = search_naver_api('book', '파이썬')
    save_json(books, '../data/books.json')

    shops = search_naver_api('shop', '가디건')
    save_json(shops, '../data/shops.json')


In [None]:
import pandas as pd

books_df = pd.read_json('../data/books.json')
print(type(books_df))
books_df.head(2)

In [None]:

books_df.loc[books_df['discount'] >= 20000,['title','author','discount','publisher','pubdate']]\
    .sort_values(by='discount', ascending=False).reset_index(drop=True)

In [None]:
# 함수로 정의
def filter_and_sort_books(df, min_discount=20000):
    """
    할인 금액이 min_discount 이상인 도서 필터링 후 정렬
    
    Parameters:
        df (DataFrame): 도서 데이터프레임
        min_discount (int): 최소 할인 금액 기준 (기본값 20000)

    Returns:
        DataFrame: 필터링 및 정렬된 결과
    """
    return (
        df.loc[df['discount'] >= min_discount, ['title', 'author', 'discount', 'publisher', 'pubdate']]
          .sort_values(by='discount', ascending=False)
          .reset_index(drop=True)
    )

filter_and_sort_books(books_df,10000)

In [7]:
books_df.columns

Index(['title', 'link', 'image', 'author', 'discount', 'publisher', 'pubdate',
       'isbn', 'description'],
      dtype='object')

In [11]:
print(type(books_df['publisher']))
print(type(books_df['publisher'].str))

<class 'pandas.core.series.Series'>
<class 'pandas.core.strings.accessor.StringMethods'>


In [13]:
books_df.columns.drop(['image','description'])

Index(['title', 'link', 'author', 'discount', 'publisher', 'pubdate', 'isbn'], dtype='object')

In [None]:
# image , description 컬럼은 제외한 모든 컬럼 출력하기
books_df.loc[books_df['publisher'].str.contains('인피니티북스'),\
    books_df.columns.drop(['image','description'])].reset_index(drop=True)

In [15]:
books_df['publisher'].unique()

array(['이지스퍼블리싱', '한빛미디어', '기한재', '생능출판', '렉스미디어닷넷', '북두', '인포앤북',
       '복두출판사', '에듀웨이', '정보문화사', '도서출판 홍릉(홍릉과학출판사)', '디지털북스', '다본',
       '에피스테메', '인피니티북스', '한빛아카데미', '길벗', '자유아카데미', '클라우드북스',
       '한국방송통신대학교출판문화원', '퍼플', '성안당', '북랩', '영진닷컴', '그린', '연두에디션',
       '탐진출판사', '에이콘출판', '시그마프레스', '길벗캠퍼스', '제이펍'], dtype=object)

In [None]:
def filter_books_by_publisher(df, publisher_name):
    """
    특정 출판사가 포함된 도서만 필터링 (image, description 컬럼 제외)

    Parameters:
        df (DataFrame): 도서 데이터프레임
        publisher_name (str): 포함할 출판사 이름

    Returns:
        DataFrame: 필터링된 결과
    """
    return (
        df.loc[df['publisher'].str.contains(publisher_name), df.columns.drop(['image', 'description'])]
          .reset_index(drop=True)
    )

filter_books_by_publisher(books_df,'한빛미디어')

In [None]:
import pandas as pd

shops_df = pd.read_json('../data/shops.json')
print(type(shops_df))
shops_df.head(2)

In [18]:
shops_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 14 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   title        50 non-null     object
 1   link         50 non-null     object
 2   image        50 non-null     object
 3   lprice       50 non-null     int64 
 4   hprice       50 non-null     object
 5   mallName     50 non-null     object
 6   productId    50 non-null     int64 
 7   productType  50 non-null     int64 
 8   brand        50 non-null     object
 9   maker        50 non-null     object
 10  category1    50 non-null     object
 11  category2    50 non-null     object
 12  category3    50 non-null     object
 13  category4    50 non-null     object
dtypes: int64(3), object(11)
memory usage: 5.6+ KB


In [19]:
shops_df.loc[shops_df['lprice'] <= 50000,['brand','lprice','mallName','link']]\
    .sort_values(by='lprice').reset_index(drop=True)

Unnamed: 0,brand,lprice,mallName,link
0,BNX,13595,네이버,https://search.shopping.naver.com/catalog/5638...
1,GGPX,15790,네이버,https://search.shopping.naver.com/catalog/5726...
2,스파오,15900,네이버,https://search.shopping.naver.com/catalog/5629...
3,BNX,17740,네이버,https://search.shopping.naver.com/catalog/5724...
4,로엠,17910,네이버,https://search.shopping.naver.com/catalog/5267...
5,제이플로우,18900,제이플로우,https://smartstore.naver.com/main/products/124...
6,GGPX,19920,네이버,https://search.shopping.naver.com/catalog/5724...
7,,22900,ARUMY,https://smartstore.naver.com/main/products/546...
8,후아유,23650,네이버,https://search.shopping.naver.com/catalog/5602...
9,올리브데올리브,26100,네이버,https://search.shopping.naver.com/catalog/5684...


In [20]:
def filter_and_sort_shops(df, max_price=50000):
    """
    최대 가격 이하 상품 필터링 후 가격 기준 오름차순 정렬

    Parameters:
        df (DataFrame): 쇼핑 데이터프레임
        max_price (int): 최대 가격 기준 (기본값 50000)

    Returns:
        DataFrame: 필터링 및 정렬된 결과
    """
    return (
        df.loc[df['lprice'] <= max_price, ['brand', 'lprice', 'mallName', 'link']]
          .sort_values(by='lprice')
          .reset_index(drop=True)
    )

filter_and_sort_shops(shops_df,20000)

Unnamed: 0,brand,lprice,mallName,link
0,BNX,13595,네이버,https://search.shopping.naver.com/catalog/5638...
1,GGPX,15790,네이버,https://search.shopping.naver.com/catalog/5726...
2,스파오,15900,네이버,https://search.shopping.naver.com/catalog/5629...
3,BNX,17740,네이버,https://search.shopping.naver.com/catalog/5724...
4,로엠,17910,네이버,https://search.shopping.naver.com/catalog/5267...
5,제이플로우,18900,제이플로우,https://smartstore.naver.com/main/products/124...
6,GGPX,19920,네이버,https://search.shopping.naver.com/catalog/5724...


In [21]:
shops_df.columns

Index(['title', 'link', 'image', 'lprice', 'hprice', 'mallName', 'productId',
       'productType', 'brand', 'maker', 'category1', 'category2', 'category3',
       'category4'],
      dtype='object')

In [None]:
shops_df.loc[shops_df['mallName'] == '네이버','lprice':'brand']\
    .sort_values(by='lprice').reset_index(drop=True)

In [23]:
shops_df['mallName'].unique()

array(['트위티 155', '부스더샵', '레이바쿠', '네이버', '세컨찬스라이프', 'ARUMY', '아임재팬',
       '나우인뉴욕', '미드시티 여성니트', '션타운', '엠클로', '제이플로우', '브랜드사는이쁜언니',
       '브랜드리퍼블릭'], dtype=object)

In [24]:
shops_df['brand'].unique()

array(['폴로랄프로렌', '꼼데가르송', '조르쥬레쉬', '', '라코스테', '지고트', 'BNX', '헤지스', '로엠',
       '수아레우먼', 'GGPX', '제너럴아이디어', '제이슨우', '제이플로우', '스파오', '올리브데올리브',
       '후아유', '에디션', '스튜디오톰보이', '유니클로', '마인드브릿지', '메종키츠네', '미쏘'],
      dtype=object)

In [25]:
def filter_shops_by_mall(df, mall_name='네이버'):
    """
    특정 쇼핑몰 상품만 필터링 후 가격 기준 오름차순 정렬

    Parameters:
        df (DataFrame): 쇼핑 데이터프레임
        mall_name (str): 쇼핑몰 이름 (기본값 '네이버')

    Returns:
        DataFrame: 필터링 및 정렬된 결과
    """
    return (
        df.loc[df['mallName'] == mall_name, 'lprice':'brand']
          .sort_values(by='lprice')
          .reset_index(drop=True)
    )

filter_shops_by_mall(shops_df,'나우인뉴욕')

Unnamed: 0,lprice,hprice,mallName,productId,productType,brand
0,87900,,나우인뉴욕,84216650390,2,폴로랄프로렌


#### mallName 또는 brand 로 검색하기

In [None]:
def filter_shops(df, keyword, search_type='mall'):
    """
    mallName 또는 brand 기준으로 필터링 후 가격 오름차순 정렬

    Parameters:
        df (DataFrame): 쇼핑 데이터프레임
        keyword (str): 검색할 값 (예: '네이버', '폴로랄프로렌')
        search_type (str): 'mall' or 'brand' (기본값 'mall')

    Returns:
        DataFrame: 필터링 및 정렬된 결과
    """
    
    if search_type == 'mall':
        condition = df['mallName'] == keyword
    elif search_type == 'brand':
        condition = df['brand'] == keyword
    else:
        raise ValueError("search_type은 'mall' 또는 'brand'만 가능합니다.")

    return (
        df.loc[condition, 'lprice':'brand']
          .sort_values(by='lprice')
          .reset_index(drop=True)
    )

filter_shops(shops_df,'라코스테','brand')

In [None]:
filter_shops(shops_df,'네이버')