import requests, bs4, urllib, sys, re term = str(sys.argv[1]); query = urllib.parse.quote_plus(term) targetUrl = "https://search.shopping.naver.com/search/all.nhn?origQuery=" + query + "&pagingIndex=1&pagingSize=80&productSet=model&viewType=list&sort=rel&frm=NVSHMDL&query=" + query resp = requests.get(targetUrl) resp.raise_for_status() resp.encoding='UTF-8' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') res_count = bs.select('ul.snb_list li.snb_compare') _lists = bs.select('li._itemSection div.info') mallInfo = bs.select('li._itemSection div.info_mall') #name = bs.select('li._itemSection div.info a.tit') #prices = bs.select('li._itemSection div.info span.price') #categories = bs.select('li._itemSection div.info span.depth') #malls = bs.select('li._itemSection div.info_mall ul.mall_list') if len(res_count) == 0 : print(term + "\t" + 'Not Found') else : num = 1 for i in range(len(_lists)) : name = _lists[i].select('a.tit') productName = name[0].getText().strip() linkStr = name[0].get('href') catedepth = _lists[i].select('span.depth') if len(catedepth) == 0 : categoryText = '' else : categoryText = re.sub(r"\t|\n|\s\s","",catedepth[0].getText()) #mallsList = malls[i].select('li a._lowPriceByMall em span.mall_name') mallsContent = '' mallCounts = 0 malls = mallInfo[i].select('ul.mall_list') mallsList = malls[0].select('li a._lowPriceByMall em span.mall_name') for j in range(len(mallsList)) : mallName = mallsList[j].getText().strip() mallsContent = mallsContent + ',' + mallName mallCounts = mallCounts + 1 if mallCounts == 0 : priceText = '판매중단' sellingItemCount = '0' else : priceInt = _lists[i].select('span.price em span.num') priceText = priceInt[0].getText().strip() sellingItemCounts = _lists[i].select('span.price a.btn_compare') sellingItemCount = re.sub(r"판매처\ ","",sellingItemCounts[0].getText().strip()) reviewCounts = _lists[i].select('span.etc a.graph em') if len(reviewCounts) == 0 : reviewCountsText = '0' else : reviewCountsText = reviewCounts[0].getText().strip() regDate = _lists[i].select('span.etc span.date') regDateText = re.sub(r"^등록일\ |\.$","",regDate[0].getText().strip()) #JJimCount = _lists[i].select('span.etc a.jjim._jjim > em') #JJimCountText = JJimCount[0].getText() #print(JJimCount[0]) print(term + "\t" + re.sub(r"가격비교","",res_count[0].getText().strip()) + "\t" + str(num) + "\t" + productName + "\t" + priceText + "\t" + categoryText + "\t" + reviewCountsText + "\t" + regDateText + "\t" + sellingItemCount + "\t" + str(mallCounts) + "\t" + re.sub(r"^\,","",mallsContent) + "\t" + linkStr) num = num + 1