74 lines
2.9 KiB
Python
74 lines
2.9 KiB
Python
import requests, bs4, urllib, sys, re, math, logging
|
|
from datetime import datetime
|
|
startTime = datetime.now()
|
|
|
|
term = str(sys.argv[1])
|
|
query = urllib.parse.quote_plus(term)
|
|
|
|
url = 'https://search.naver.com/search.naver'
|
|
url2 = '?sm=tab_hty.top&where=nexearch&query=' + query + '&oquery=' + query
|
|
url = url + url2
|
|
|
|
resp = requests.get(url)
|
|
resp.raise_for_status()
|
|
resp.encoding='UTF-8'
|
|
|
|
html = resp.text
|
|
|
|
bs = bs4.BeautifulSoup(html, 'html.parser')
|
|
|
|
correctedTerm = bs.select('div.sp_keyword dl dd em')
|
|
if len(correctedTerm) != 0 :
|
|
correctedKeyword = correctedTerm[0].getText().strip()
|
|
else : correctedKeyword = ''
|
|
|
|
powerlink = bs.select('div#power_link_body ul.lst_type li.lst')
|
|
|
|
if len(powerlink) > 0 :
|
|
for i in range(len(powerlink)) :
|
|
title = powerlink[i].select('div.inner a.lnk_tit')[0].getText().strip()
|
|
site = powerlink[i].select('div.inner a.lnk_url')[0].getText().strip()
|
|
descdiff = powerlink[i].select('div.inner p.ad_dsc_inner span')
|
|
if len(descdiff) != 0 :
|
|
desc = powerlink[i].select('div.inner p.ad_dsc_inner')[0].getText().strip()
|
|
desc = re.sub(r"\n|\t|\r"," ",desc)
|
|
desc = re.sub(r"\s\s+"," ",desc)
|
|
else :
|
|
desc = powerlink[i].select('div.inner p.ad_dsc_inner')[0].getText().strip()
|
|
thumbArray = powerlink[i].select('a.lnk_thumb img.img_thumb')
|
|
thumbYN = ''
|
|
if len(thumbArray) != 0 : thumbYN = 'Y'
|
|
sublinks = ''
|
|
sublinkArray = powerlink[i].select('div.inner ul.lst_link li.item a')
|
|
for j in range(len(sublinkArray)) :
|
|
sublinks = sublinks + sublinkArray[j].getText().strip()
|
|
if j != (len(sublinkArray)-1) : sublinks = sublinks + "||"
|
|
itemlst = ''
|
|
itemlstArray = powerlink[i].select('div.inner ul.lst_price li.item a')
|
|
for k in range(len(itemlstArray)) :
|
|
itemlst = itemlst + itemlstArray[k].select('div.txt span')[0].getText().strip() + " (" + itemlstArray[k].select('span.price')[0].getText().strip() + ")"
|
|
if k != (len(itemlstArray)-1) : itemlst = itemlst + "||"
|
|
iconStats = ''
|
|
iconArea = powerlink[i].select('div.inner div.url_area span.ico_area span.ico')
|
|
if len(iconArea) != 0 :
|
|
for l in range(len(iconArea)) :
|
|
if len(iconArea[l].select('span.ico_npay')) != 0 :
|
|
iconStats = iconStats + 'nPay'
|
|
if len(iconArea[l].select('span.ico_nlogin')) != 0 :
|
|
iconStats = iconStats + 'nLogin'
|
|
if len(iconArea[l].select('span.ico_talktalk')) != 0 :
|
|
iconStats = iconStats + 'nTalkTalk'
|
|
if len(iconArea[l].select('span.ico_nreserve')) != 0 :
|
|
iconStats = iconStats + 'nReserve'
|
|
if len(iconArea) > 1 :
|
|
if l < (len(iconArea)-1) :
|
|
iconStats = iconStats + ","
|
|
else : iconStats = iconStats + ''
|
|
|
|
print(term + "\t" + correctedKeyword + "\t" + "파워링크" + "\t" + str(len(powerlink)) + "\t" + str(i+1) + "\t" + title + "\t" + site + "\t" + iconStats + "\t" + thumbYN + "\t" + sublinks + "\t" + desc + "\t" + itemlst)
|
|
|
|
else : print(term + "\t" + correctedKeyword + "\t" + "NoPowerlink")
|
|
|
|
consumtime = datetime.now() - startTime
|
|
logging.warning(term + "\t" + str(consumtime))
|