import requests, bs4, urllib, sys, re, json term = str(sys.argv[1]); query = urllib.parse.quote_plus(term) url = "http://search.danawa.com/ajax/getProductList.ajax.php" data = { 'query': term, 'originalQuery': term, 'previousKeyword': urllib.parse.quote_plus('공구몰'), 'volumeType': 'vmvs', 'page': '1', 'limit': '80', 'sort': 'saveDESC', 'list': 'list', 'tab': 'main', 'boost': 'true', 'addDelivery': 'N' } headers = { 'Host': 'search.danawa.com', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': 'http://search.danawa.com/dsearch.php?k1=' + urllib.parse.quote_plus('공구몰') + '&module=goods&act=dispMain', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36', 'Accept-Encoding': 'gzip, deflate' } resp = requests.post(url, data = data, headers = headers) resp.raise_for_status() resp.encoding='UTF-8' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') def getLowestPrice( pricelist ) : priceValue = [] for i in range(len(pricelist)) : price = pricelist[i].select('strong')[0].getText() if price != '' or isinstance(price, int) == false : priceValue.append(re.sub(r"\,","",price)) if len(priceValue) != 0 : res = min(priceValue) else : res = '단종/품절' return res def sumMallCount ( mctlist ) : mallSum = 0 for i in range(len(mctlist)) : mall = int(re.sub(r"몰$","",mctlist[i].getText().strip())) mallSum = mallSum + mall return str(mallSum) def checkEmpty ( clist, nrText ) : if len(clist) != 0 : resText = clist[0].getText().strip() else : resText = nrText return resText def displayText ( plist ) : res = '[' for i in range(len(plist)) : mem_sect = plist[i].select('p.memory_sect') if len(mem_sect) == 0 : rankT = '' descT = '' else : rank = plist[i].select('p.memory_sect span.rank') if len(rank) != 0 : rankT = re.sub(r"\n|\t|\s","",rank[0].getText().strip()) descT = re.sub(r"^[0-9]위|\n|\t|\s\s","",plist[i].select('p.memory_sect')[0].getText().strip()) else : rankT = '' descT = re.sub(r"\n|\t|\s\s","",plist[i].select('p.memory_sect')[0].getText().strip()) mallct = re.sub(r"\n|\t|\s\s","",plist[i].select('p.chk_sect')[0].getText().strip()) priceT = re.sub(r"\n|\t|\s\s","",plist[i].select('p.price_sect a strong')[0].getText().strip()) t = "{'" + rankT + "','" + descT + "','" + priceT + "','" + mallct + "'}" if i != (len(plist) - 1) : t = t + ',' res = res + t res = res + ']' return res checkNR = bs.select('div#nosearchArea') res_count = bs.select('ul.goods_type li.selected a.vmTab span.qnt') _lists = bs.select('ul.product_list li.prod_item') if len(checkNR) != 0 : print(term + "\t" + 'Not Found') else : rcText = re.sub(r"\(|\)","",res_count[0].getText().strip()) rank = 1 for i in range(len(_lists)) : product = _lists[i].select('div.prod_main_info div.prod_info p.prod_name a') productName = product[0].getText().strip() linkStr = product[0].get('href') priceList = _lists[i].select('div.prod_main_info div.prod_pricelist ul li p.price_sect') priceText = getLowestPrice(priceList) category = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info dl.prod_category_location dd a') categoryText = checkEmpty(category,'') reviewCounts = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info dl.meta_item.mt_comment dd div.cnt_opinion a strong') reviewCountsText = checkEmpty(reviewCounts,'0') regDate = _lists[i].select('div.prod_main_info div.prod_info div.prod_sub_info div.prod_sub_meta dl.meta_item.mt_date dd') regDateText = checkEmpty(regDate,'정보없음') sellingItemCount = '' mallctList = _lists[i].select('div.prod_main_info div.prod_pricelist ul li p.chk_sect') mallCounts = sumMallCount(mallctList) mallsContent = '' priceAllinfo = _lists[i].select('div.prod_main_info div.prod_pricelist ul li') dnwSortCount = len(priceAllinfo) displayText_ = displayText(priceAllinfo) print( term + "\t" + rcText + "\t" + str(rank) + "\t" + productName + "\t" + priceText + "\t" + categoryText + "\t" + reviewCountsText + "\t" + regDateText + "\t" + sellingItemCount + "\t" + mallCounts+ "\t" + mallsContent + "\t" + linkStr + "\t" + str(dnwSortCount) + "\t" + displayText_ ) rank = rank + 1 #cookies = { # 'cookieGuestId': 'b622433e7e198e0970da17411eb614d7', # 'ADWEBCOUNTER_UUID': 'cd6b0864-1ed3-2032-9de8-b007d87fe435', # 'ADWEBCOUNTER_KEYWORD': '', # 'ADWEBCOUNTER_URL': '', # 'OAX' : 'QvlSrFr0bHgABCbb', # 'cPreviousKeyword': urllib.parse.quote_plus('공구몰'), # 'danawa-loggingApplicationClient' : 'fcfbe04a-003e-4c21-be87-4c23efe22a94', # 'dable_uid': '32840650.1518265907167', # 'RMFD': '011fH1fNO103Wt', # '_INSIGHT_CK_8203': '5c13bd0d6fdf907db32d17d2a3b73851_67992|f4f74d23e7cf80f761a7918dee374a3e_20592:1526023796000', # 'wcs_bt': 's_3b3fb74948b1:1526021996', # 'cookSearchKeyword': query #}