python_apps/crwlers/navimro_search.py
2023-11-03 14:49:12 +09:00

104 lines
3.1 KiB
Python

#!/Users/maddiekorea/Workspace/bin/python
#version 20190601
import requests, bs4, urllib, sys, re, math
from datetime import datetime
startTime = datetime.now()
term = str(sys.argv[1]);
query = urllib.parse.quote_plus(term)
url = "https://www.navimro.com/s/"
data = { 'q': term, 'disp': '1' }
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Upgrade-Insecure-Requests': '1',
'Host': 'www.navimro.com'
}
resp = requests.get(url, params = data, headers = headers)
resp.raise_for_status()
resp.encoding='UTF-8'
html = resp.text
bs = bs4.BeautifulSoup(html, 'html.parser')
suggestions = ''
_suggestionLists = bs.select('ul#suggestion-kw li.dp-block a.dp-block p.dp-block')
_productList = bs.select('div.product-list table tr')
consumtime = datetime.now() - startTime
if len(_suggestionLists) != 0 :
for i in range(len(_suggestionLists)) :
suggestions = suggestions + _suggestionLists[i].getText().strip()
if i < (len(_suggestionLists) - 1) :
suggestions = suggestions + ','
if len(_productList) == 0 :
res_count = 'NoResult'
else :
_estimateResCount = bs.select('div.product-list div.paging div a')
pagingCount = len(_estimateResCount) - 4
if len(_productList) < 91 :
res_count = len(_productList) - 1
else :
res_count = pagingCount * 90
if len(_productList) != 0 :
for i in range(len(_productList)) :
if i != 0 :
productName = _productList[i].select('td.info p.product__description')[0].getText().strip()
skuCount = _productList[i].select('td.info div.product-count')
if len(skuCount) != 0 :
skuCountRes = skuCount[0].getText().strip()
skuCountRes = re.sub(r"총|종류|상품","",skuCountRes)
else :
skuCountRes = '1'
brand = _productList[i].select('td.btns')[0].getText().strip()
if len(_productList[i].select('td i.icon-txt')) != 0 :
delivery = _productList[i].select('td i.icon-txt')[0].getText().strip()
else : delivery = ''
priceSrc = _productList[i].select('td.btns div span.price')
if len(priceSrc) != 0 :
price = priceSrc[0].getText().strip()
price = re.sub(r"\,","",price)
price = math.ceil(int(price) * 1.1)
else :
price = _productList[i].select('td.btns div strong')[0].getText().strip()
discountSrc = _productList[i].select('td.btns div p i.icon-txt')
if len(discountSrc) != 0 :
discount = discountSrc[0].getText().strip()
else : discount = ''
detailinfo = _productList[i].select('td.info div.product-description-new')[0].getText("\t", strip=True)
linkstr = _productList[i].select('td.info a')[0].get('href')
linkstr = 'https://www.navimro.com' + linkstr
print (term + "\t" +
str(res_count) + "\t" +
str(i) + "\t" +
productName + "\t" +
skuCountRes + "\t" +
brand + "\t" +
delivery + "\t" +
str(price) + "\t" +
discount + "\t" +
linkstr + "\t" +
suggestions + "\t" +
detailinfo + "\t" +
str(consumtime)
)