152 lines
4.9 KiB
Python
152 lines
4.9 KiB
Python
import requests, bs4, urllib, sys, re, json
|
|
|
|
term = str(sys.argv[1]);
|
|
query = urllib.parse.quote_plus(term)
|
|
url = "http://search.danawa.com/ajax/getProductList.ajax.php"
|
|
|
|
data = {
|
|
'query': term,
|
|
'originalQuery': term,
|
|
'previousKeyword': urllib.parse.quote_plus('공구몰'),
|
|
'volumeType': 'vmvs',
|
|
'page': '1',
|
|
'limit': '80',
|
|
'sort': 'saveDESC',
|
|
'list': 'list',
|
|
'tab': 'main',
|
|
'boost': 'true',
|
|
'addDelivery': 'N'
|
|
}
|
|
|
|
headers = {
|
|
'Host': 'search.danawa.com',
|
|
'Content-Type': 'application/x-www-form-urlencoded',
|
|
'Referer': 'http://search.danawa.com/dsearch.php?k1=' + urllib.parse.quote_plus('공구몰') + '&module=goods&act=dispMain',
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36',
|
|
'Accept-Encoding': 'gzip, deflate'
|
|
}
|
|
|
|
resp = requests.post(url, data = data, headers = headers)
|
|
resp.raise_for_status()
|
|
|
|
resp.encoding='UTF-8'
|
|
html = resp.text
|
|
|
|
bs = bs4.BeautifulSoup(html, 'html.parser')
|
|
|
|
def getLowestPrice( pricelist ) :
|
|
priceValue = []
|
|
for i in range(len(pricelist)) :
|
|
price = pricelist[i].select('strong')[0].getText()
|
|
if price != '' or isinstance(price, int) == false : priceValue.append(re.sub(r"\,","",price))
|
|
if len(priceValue) != 0 : res = min(priceValue)
|
|
else : res = '단종/품절'
|
|
return res
|
|
|
|
def sumMallCount ( mctlist ) :
|
|
mallSum = 0
|
|
for i in range(len(mctlist)) :
|
|
mall = int(re.sub(r"몰$","",mctlist[i].getText().strip()))
|
|
mallSum = mallSum + mall
|
|
return str(mallSum)
|
|
|
|
def checkEmpty ( clist, nrText ) :
|
|
if len(clist) != 0 :
|
|
resText = clist[0].getText().strip()
|
|
else :
|
|
resText = nrText
|
|
return resText
|
|
|
|
def displayText ( plist ) :
|
|
res = '['
|
|
for i in range(len(plist)) :
|
|
mem_sect = plist[i].select('p.memory_sect')
|
|
if len(mem_sect) == 0 :
|
|
rankT = ''
|
|
descT = ''
|
|
else :
|
|
rank = plist[i].select('p.memory_sect span.rank')
|
|
if len(rank) != 0 :
|
|
rankT = re.sub(r"\n|\t|\s","",rank[0].getText().strip())
|
|
descT = re.sub(r"^[0-9]위|\n|\t|\s\s","",plist[i].select('p.memory_sect')[0].getText().strip())
|
|
else :
|
|
rankT = ''
|
|
descT = re.sub(r"\n|\t|\s\s","",plist[i].select('p.memory_sect')[0].getText().strip())
|
|
mallct = re.sub(r"\n|\t|\s\s","",plist[i].select('p.chk_sect')[0].getText().strip())
|
|
priceT = re.sub(r"\n|\t|\s\s","",plist[i].select('p.price_sect a strong')[0].getText().strip())
|
|
t = "{'" + rankT + "','" + descT + "','" + priceT + "','" + mallct + "'}"
|
|
if i != (len(plist) - 1) : t = t + ','
|
|
res = res + t
|
|
res = res + ']'
|
|
return res
|
|
|
|
checkNR = bs.select('div#nosearchArea')
|
|
res_count = bs.select('ul.goods_type li.selected a.vmTab span.qnt')
|
|
_lists = bs.select('ul.product_list li.prod_item')
|
|
|
|
if len(checkNR) != 0 :
|
|
print(term + "\t" + 'Not Found')
|
|
else :
|
|
rcText = re.sub(r"\(|\)","",res_count[0].getText().strip())
|
|
rank = 1
|
|
for i in range(len(_lists)) :
|
|
product = _lists[i].select('div.prod_main_info div.prod_info p.prod_name a')
|
|
productName = product[0].getText().strip()
|
|
linkStr = product[0].get('href')
|
|
|
|
priceList = _lists[i].select('div.prod_main_info div.prod_pricelist ul li p.price_sect')
|
|
priceText = getLowestPrice(priceList)
|
|
|
|
category = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info dl.prod_category_location dd a')
|
|
categoryText = checkEmpty(category,'')
|
|
|
|
reviewCounts = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info dl.meta_item.mt_comment dd div.cnt_opinion a strong')
|
|
reviewCountsText = checkEmpty(reviewCounts,'0')
|
|
|
|
regDate = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info div.prod_sub_meta dl.meta_item.mt_date dd')
|
|
regDateText = checkEmpty(regDate,'정보없음')
|
|
|
|
sellingItemCount = ''
|
|
|
|
mallctList = _lists[i].select('div.prod_main_info div.prod_pricelist ul li p.chk_sect')
|
|
mallCounts = sumMallCount(mallctList)
|
|
|
|
mallsContent = ''
|
|
|
|
priceAllinfo = _lists[i].select('div.prod_main_info div.prod_pricelist ul li')
|
|
dnwSortCount = len(priceAllinfo)
|
|
displayText_ = displayText(priceAllinfo)
|
|
|
|
print(
|
|
term + "\t" +
|
|
rcText + "\t" +
|
|
str(rank) + "\t" +
|
|
productName + "\t" +
|
|
priceText + "\t" +
|
|
categoryText + "\t" +
|
|
reviewCountsText + "\t" +
|
|
regDateText + "\t" +
|
|
sellingItemCount + "\t" +
|
|
mallCounts+ "\t" +
|
|
mallsContent + "\t" +
|
|
linkStr + "\t" +
|
|
str(dnwSortCount) + "\t" +
|
|
displayText_
|
|
)
|
|
rank = rank + 1
|
|
|
|
|
|
#cookies = {
|
|
# 'cookieGuestId': 'b622433e7e198e0970da17411eb614d7',
|
|
# 'ADWEBCOUNTER_UUID': 'cd6b0864-1ed3-2032-9de8-b007d87fe435',
|
|
# 'ADWEBCOUNTER_KEYWORD': '',
|
|
# 'ADWEBCOUNTER_URL': '',
|
|
# 'OAX' : 'QvlSrFr0bHgABCbb',
|
|
# 'cPreviousKeyword': urllib.parse.quote_plus('공구몰'),
|
|
# 'danawa-loggingApplicationClient' : 'fcfbe04a-003e-4c21-be87-4c23efe22a94',
|
|
# 'dable_uid': '32840650.1518265907167',
|
|
# 'RMFD': '011fH1fNO103Wt',
|
|
# '_INSIGHT_CK_8203': '5c13bd0d6fdf907db32d17d2a3b73851_67992|f4f74d23e7cf80f761a7918dee374a3e_20592:1526023796000',
|
|
# 'wcs_bt': 's_3b3fb74948b1:1526021996',
|
|
# 'cookSearchKeyword': query
|
|
#} |