본문 바로가기
Python/퀀트

크롤링 실습: 국내 주식 섹터 데이터 크롤링

by 훈영 2024. 11. 6.
# 섹터 데이터 크롤링
import json 
import requests as rq
import pandas as pd

url = f'''https://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt={biz_day}&sec_cd=G10'''
data = rq.get(url).json()

data_pd = pd.json_normalize(data['list'])


# 섹터 정보 크롤링
import time
import json
import requests as rq
import pandas as pd
from tqdm import tqdm

sector_code = ['G25', 'G35', 'G50', 'G40', 'G10', 'G20', 'G55', 'G30', 'G15', 'G45']

data_sector = []

for i in tqdm(sector_code):
    url = f'''https://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt={biz_day}&sec_cd={i}'''
    data = rq.get(url).json()
    data_pd = pd.json_normalize(data['list'])
    
    data_sector.append(data_pd)
    
    time.sleep(2)

kor_sector = pd.concat(data_sector, axis=0)
kor_sector = kor_sector[['IDX_CD', 'CMP_CD', 'CMP_KOR', 'SEC_NM_KOR']]
kor_sector['기준일'] = biz_day
kor_sector['기준일'] = pd.to_datetime(kor_sector['기준일'])


# sql DB에 저장
import pymysql

con = pymysql.connect(user='root',
                      passwd='0000',
                      host='127.0.0.1',
                      db='stock_db',
                      charset='utf8')

mycursor = con.cursor()
query = f"""
    insert into kor_sector (IDX_CD, CMP_CD, CMP_KOR, SEC_NM_KOR, 기준일)
    values (%s, %s, %s, %s, %s) as new
    on duplicate key update
    IDX_CD = new.IDX_CD, CMP_KOR = new.CMP_KOR, SEC_NM_KOR =new.SEC_NM_KOR
    """

args = kor_sector.values.tolist()

mycursor.executemany(query, args)
con.commit()

con.close()

댓글