python_apps/crwlers/danawa_price_get.py
2023-11-03 14:49:12 +09:00

152 lines
4.9 KiB
Python

import requests, bs4, urllib, sys, re, json
term = str(sys.argv[1]);
query = urllib.parse.quote_plus(term)
url = "http://search.danawa.com/ajax/getProductList.ajax.php"
data = {
'query': term,
'originalQuery': term,
'previousKeyword': urllib.parse.quote_plus('공구몰'),
'volumeType': 'vmvs',
'page': '1',
'limit': '80',
'sort': 'saveDESC',
'list': 'list',
'tab': 'main',
'boost': 'true',
'addDelivery': 'N'
}
headers = {
'Host': 'search.danawa.com',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'http://search.danawa.com/dsearch.php?k1=' + urllib.parse.quote_plus('공구몰') + '&module=goods&act=dispMain',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36',
'Accept-Encoding': 'gzip, deflate'
}
resp = requests.post(url, data = data, headers = headers)
resp.raise_for_status()
resp.encoding='UTF-8'
html = resp.text
bs = bs4.BeautifulSoup(html, 'html.parser')
def getLowestPrice( pricelist ) :
priceValue = []
for i in range(len(pricelist)) :
price = pricelist[i].select('strong')[0].getText()
if price != '' or isinstance(price, int) == false : priceValue.append(re.sub(r"\,","",price))
if len(priceValue) != 0 : res = min(priceValue)
else : res = '단종/품절'
return res
def sumMallCount ( mctlist ) :
mallSum = 0
for i in range(len(mctlist)) :
mall = int(re.sub(r"몰$","",mctlist[i].getText().strip()))
mallSum = mallSum + mall
return str(mallSum)
def checkEmpty ( clist, nrText ) :
if len(clist) != 0 :
resText = clist[0].getText().strip()
else :
resText = nrText
return resText
def displayText ( plist ) :
res = '['
for i in range(len(plist)) :
mem_sect = plist[i].select('p.memory_sect')
if len(mem_sect) == 0 :
rankT = ''
descT = ''
else :
rank = plist[i].select('p.memory_sect span.rank')
if len(rank) != 0 :
rankT = re.sub(r"\n|\t|\s","",rank[0].getText().strip())
descT = re.sub(r"^[0-9]위|\n|\t|\s\s","",plist[i].select('p.memory_sect')[0].getText().strip())
else :
rankT = ''
descT = re.sub(r"\n|\t|\s\s","",plist[i].select('p.memory_sect')[0].getText().strip())
mallct = re.sub(r"\n|\t|\s\s","",plist[i].select('p.chk_sect')[0].getText().strip())
priceT = re.sub(r"\n|\t|\s\s","",plist[i].select('p.price_sect a strong')[0].getText().strip())
t = "{'" + rankT + "','" + descT + "','" + priceT + "','" + mallct + "'}"
if i != (len(plist) - 1) : t = t + ','
res = res + t
res = res + ']'
return res
checkNR = bs.select('div#nosearchArea')
res_count = bs.select('ul.goods_type li.selected a.vmTab span.qnt')
_lists = bs.select('ul.product_list li.prod_item')
if len(checkNR) != 0 :
print(term + "\t" + 'Not Found')
else :
rcText = re.sub(r"\(|\)","",res_count[0].getText().strip())
rank = 1
for i in range(len(_lists)) :
product = _lists[i].select('div.prod_main_info div.prod_info p.prod_name a')
productName = product[0].getText().strip()
linkStr = product[0].get('href')
priceList = _lists[i].select('div.prod_main_info div.prod_pricelist ul li p.price_sect')
priceText = getLowestPrice(priceList)
category = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info dl.prod_category_location dd a')
categoryText = checkEmpty(category,'')
reviewCounts = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info dl.meta_item.mt_comment dd div.cnt_opinion a strong')
reviewCountsText = checkEmpty(reviewCounts,'0')
regDate = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info div.prod_sub_meta dl.meta_item.mt_date dd')
regDateText = checkEmpty(regDate,'정보없음')
sellingItemCount = ''
mallctList = _lists[i].select('div.prod_main_info div.prod_pricelist ul li p.chk_sect')
mallCounts = sumMallCount(mallctList)
mallsContent = ''
priceAllinfo = _lists[i].select('div.prod_main_info div.prod_pricelist ul li')
dnwSortCount = len(priceAllinfo)
displayText_ = displayText(priceAllinfo)
print(
term + "\t" +
rcText + "\t" +
str(rank) + "\t" +
productName + "\t" +
priceText + "\t" +
categoryText + "\t" +
reviewCountsText + "\t" +
regDateText + "\t" +
sellingItemCount + "\t" +
mallCounts+ "\t" +
mallsContent + "\t" +
linkStr + "\t" +
str(dnwSortCount) + "\t" +
displayText_
)
rank = rank + 1
#cookies = {
# 'cookieGuestId': 'b622433e7e198e0970da17411eb614d7',
# 'ADWEBCOUNTER_UUID': 'cd6b0864-1ed3-2032-9de8-b007d87fe435',
# 'ADWEBCOUNTER_KEYWORD': '',
# 'ADWEBCOUNTER_URL': '',
# 'OAX' : 'QvlSrFr0bHgABCbb',
# 'cPreviousKeyword': urllib.parse.quote_plus('공구몰'),
# 'danawa-loggingApplicationClient' : 'fcfbe04a-003e-4c21-be87-4c23efe22a94',
# 'dable_uid': '32840650.1518265907167',
# 'RMFD': '011fH1fNO103Wt',
# '_INSIGHT_CK_8203': '5c13bd0d6fdf907db32d17d2a3b73851_67992|f4f74d23e7cf80f761a7918dee374a3e_20592:1526023796000',
# 'wcs_bt': 's_3b3fb74948b1:1526021996',
# 'cookSearchKeyword': query
#}