python_apps/crwlers/naver_price_get.py
2023-11-03 14:49:12 +09:00

78 lines
2.7 KiB
Python

import requests, bs4, urllib, sys, re
term = str(sys.argv[1]);
query = urllib.parse.quote_plus(term)
targetUrl = "https://search.shopping.naver.com/search/all.nhn?origQuery=" + query + "&pagingIndex=1&pagingSize=80&productSet=model&viewType=list&sort=rel&frm=NVSHMDL&query=" + query
resp = requests.get(targetUrl)
resp.raise_for_status()
resp.encoding='UTF-8'
html = resp.text
bs = bs4.BeautifulSoup(html, 'html.parser')
res_count = bs.select('ul.snb_list li.snb_compare')
_lists = bs.select('li._itemSection div.info')
mallInfo = bs.select('li._itemSection div.info_mall')
#name = bs.select('li._itemSection div.info a.tit')
#prices = bs.select('li._itemSection div.info span.price')
#categories = bs.select('li._itemSection div.info span.depth')
#malls = bs.select('li._itemSection div.info_mall ul.mall_list')
if len(res_count) == 0 :
print(term + "\t" + 'Not Found')
else :
num = 1
for i in range(len(_lists)) :
name = _lists[i].select('a.tit')
productName = name[0].getText().strip()
linkStr = name[0].get('href')
catedepth = _lists[i].select('span.depth')
if len(catedepth) == 0 :
categoryText = ''
else :
categoryText = re.sub(r"\t|\n|\s\s","",catedepth[0].getText())
#mallsList = malls[i].select('li a._lowPriceByMall em span.mall_name')
mallsContent = ''
mallCounts = 0
malls = mallInfo[i].select('ul.mall_list')
mallsList = malls[0].select('li a._lowPriceByMall em span.mall_name')
for j in range(len(mallsList)) :
mallName = mallsList[j].getText().strip()
mallsContent = mallsContent + ',' + mallName
mallCounts = mallCounts + 1
if mallCounts == 0 :
priceText = '판매중단'
sellingItemCount = '0'
else :
priceInt = _lists[i].select('span.price em span.num')
priceText = priceInt[0].getText().strip()
sellingItemCounts = _lists[i].select('span.price a.btn_compare')
sellingItemCount = re.sub(r"판매처\ ","",sellingItemCounts[0].getText().strip())
reviewCounts = _lists[i].select('span.etc a.graph em')
if len(reviewCounts) == 0 :
reviewCountsText = '0'
else :
reviewCountsText = reviewCounts[0].getText().strip()
regDate = _lists[i].select('span.etc span.date')
regDateText = re.sub(r"^등록일\ |\.$","",regDate[0].getText().strip())
#JJimCount = _lists[i].select('span.etc a.jjim._jjim > em')
#JJimCountText = JJimCount[0].getText()
#print(JJimCount[0])
print(term + "\t" + re.sub(r"가격비교","",res_count[0].getText().strip()) + "\t" + str(num) + "\t" + productName + "\t" + priceText + "\t" + categoryText + "\t" + reviewCountsText + "\t" + regDateText + "\t" + sellingItemCount + "\t" + str(mallCounts) + "\t" + re.sub(r"^\,","",mallsContent) + "\t" + linkStr)
num = num + 1