#!/Users/maddiekorea/Workspace/bin/python #version 20190601 import requests, bs4, urllib, sys, re, math from datetime import datetime startTime = datetime.now() term = str(sys.argv[1]); query = urllib.parse.quote_plus(term) url = "https://www.navimro.com/s/" data = { 'q': term, 'disp': '1' } headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Upgrade-Insecure-Requests': '1', 'Host': 'www.navimro.com' } resp = requests.get(url, params = data, headers = headers) resp.raise_for_status() resp.encoding='UTF-8' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') suggestions = '' _suggestionLists = bs.select('ul#suggestion-kw li.dp-block a.dp-block p.dp-block') _productList = bs.select('div.product-list table tr') consumtime = datetime.now() - startTime if len(_suggestionLists) != 0 : for i in range(len(_suggestionLists)) : suggestions = suggestions + _suggestionLists[i].getText().strip() if i < (len(_suggestionLists) - 1) : suggestions = suggestions + ',' if len(_productList) == 0 : res_count = 'NoResult' else : _estimateResCount = bs.select('div.product-list div.paging div a') pagingCount = len(_estimateResCount) - 4 if len(_productList) < 91 : res_count = len(_productList) - 1 else : res_count = pagingCount * 90 if len(_productList) != 0 : for i in range(len(_productList)) : if i != 0 : productName = _productList[i].select('td.info p.product__description')[0].getText().strip() skuCount = _productList[i].select('td.info div.product-count') if len(skuCount) != 0 : skuCountRes = skuCount[0].getText().strip() skuCountRes = re.sub(r"총|종류|상품","",skuCountRes) else : skuCountRes = '1' brand = _productList[i].select('td.btns')[0].getText().strip() if len(_productList[i].select('td i.icon-txt')) != 0 : delivery = _productList[i].select('td i.icon-txt')[0].getText().strip() else : delivery = '' priceSrc = _productList[i].select('td.btns div span.price') if len(priceSrc) != 0 : price = priceSrc[0].getText().strip() price = re.sub(r"\,","",price) price = math.ceil(int(price) * 1.1) else : price = _productList[i].select('td.btns div strong')[0].getText().strip() discountSrc = _productList[i].select('td.btns div p i.icon-txt') if len(discountSrc) != 0 : discount = discountSrc[0].getText().strip() else : discount = '' detailinfo = _productList[i].select('td.info div.product-description-new')[0].getText("\t", strip=True) linkstr = _productList[i].select('td.info a')[0].get('href') linkstr = 'https://www.navimro.com' + linkstr print (term + "\t" + str(res_count) + "\t" + str(i) + "\t" + productName + "\t" + skuCountRes + "\t" + brand + "\t" + delivery + "\t" + str(price) + "\t" + discount + "\t" + linkstr + "\t" + suggestions + "\t" + detailinfo + "\t" + str(consumtime) )