import requests, bs4, urllib, sys, re, math, logging from datetime import datetime startTime = datetime.now() def makeQuery(term) : query = urllib.parse.quote_plus(term) return query def makeURL(num, query) : url = "https://ad.search.naver.com/search.naver?where=ad&sm=svc_nrs&query=" + query + "&referenceId=&pagingIndex=" + str(num) return url def getHTML(url) : resp = requests.get(url) resp.raise_for_status() resp.encoding='UTF-8' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') return bs def resultCount(allHTML) : res_count = allHTML.select('div.search_result div.inner span.num_result')[0].getText().strip() res_count = re.sub(r"[0-9]+\-[0-9]+\ \/\s","",res_count) res_count = re.sub(r"건","",res_count) return int(res_count) def paging(resultCount) : paging = math.ceil(int(resultCount)/25) return paging term = str(sys.argv[1]) url = makeURL(1,makeQuery(str(sys.argv[1]))) content = getHTML(url) resultCount = resultCount(content) paging = paging(resultCount) if resultCount == 0 : print(term + "\t" + "NoResult") else : _list = content.select('div.ad_section ol.lst_type li.lst') for i in range(len(_list)) : title = _list[i].select('div.inner a.lnk_tit')[0].getText().strip() subTitle = _list[i].select('div.inner a.sub_tit') if len(subTitle) == 0 : SubT = "" else : SubT = subTitle[0].getText().strip() ad_thumb = _list[i].select('div.ad_thumb') if len(ad_thumb) == 0 : thumb = "" else : thumb = "Y" url = _list[i].select('div.inner div.url_area a.url')[0].getText().strip() menu = _list[i].select('div.inner ul.lst_link li.item a') menulink = '' if len(menu) != 0 : for j in range(len(menu)) : menulink = menulink + menu[j].getText().strip() if j != len(menu) - 1 : menulink = menulink + "||" ev = _list[i].select('div.inner p.promotion') if len(ev) == 0 : event = "" description = _list[i].select('div.inner p.ad_dsc_inner')[0].getText().strip() else : event = ev[0].getText().strip() event = re.sub(r"\n|\t|\s\s"," ",event) event = re.sub(r"\s\s+","|",event) description = _list[i].select('div.inner p.ad_dsc_inner')[0].getText().strip() description = re.sub(r"\n|\t|\s\s"," ",description) description = re.sub(r"\s\s+"," ",description) itemlst = '' lst_price = _list[i].select('div.inner ul.lst_price li.item a') for k in range(len(lst_price)) : itemlst = itemlst + lst_price[k].select('div.txt span')[0].getText().strip() + " (" + lst_price[k].select('span.price')[0].getText().strip() + ")" if k != (len(lst_price)-1) : itemlst = itemlst + "||" adPeriod = _list[i].select('div.inner div.period_area')[0].getText().strip() adPeriod = re.sub(r"광고집행기간|\n","",adPeriod) iconStats = '' iconArea = _list[i].select('div.inner div.url_area span.ico_area span.ico') if len(iconArea) != 0 : for l in range(len(iconArea)) : if len(iconArea[l].select('span.ico_npay')) != 0 : iconStats = iconStats + 'nPay' if len(iconArea[l].select('span.ico_nlogin')) != 0 : iconStats = iconStats + 'nLogin' if len(iconArea[l].select('span.ico_talktalk')) != 0 : iconStats = iconStats + 'nTalkTalk' if len(iconArea[l].select('span.ico_nreserve')) != 0 : iconStats = iconStats + 'nReserve' if len(iconArea) > 1 : if l < (len(iconArea)-1) : iconStats = iconStats + "," else : iconStats = iconStats + '' print( term + "\t" + str(resultCount) + "\t" + str(i+1) + "\t" + title + "\t" + SubT + "\t" + thumb + "\t" + url + "\t" + iconStats + "\t" + menulink + "\t" + event + "\t" + description + "\t" + itemlst + "\t" + adPeriod + "\t" ) consumtime = datetime.now() - startTime logging.warning(term + "\t" + str(consumtime))