123 lines
4.0 KiB
Python
123 lines
4.0 KiB
Python
#!/usr/bin/env python
|
|
from datetime import datetime
|
|
|
|
import bs4, re
|
|
import logging
|
|
import requests
|
|
|
|
class NaverGet:
|
|
|
|
def __init__(self, keyword):
|
|
self.startTime = datetime.now()
|
|
self.keyword = keyword.replace(" ","")
|
|
self.pwUrl = "https://ad.search.naver.com/search.naver"
|
|
self.comboUrl = "https://search.naver.com/search.naver"
|
|
self.combobs = self.getHTML(self.comboUrl,self.urlParameters("combo"))
|
|
self.Pwbs = self.getHTML(self.pwUrl,self.urlParameters("pw"))
|
|
self.comboList = self.getComboPWitems()
|
|
self.PWList = self.getAllAds()
|
|
|
|
#def __del__(self):
|
|
# consumtime = datetime.now() - self.startTime
|
|
# logging.warning(self.keyword + "_naver : " + str(consumtime))
|
|
|
|
def urlParameters(self,mode):
|
|
if mode == "pw":
|
|
data = {
|
|
'where': 'ad',
|
|
'query': self.keyword
|
|
}
|
|
if mode == "combo":
|
|
data = {
|
|
'sm': 'tab_hty.top',
|
|
'where': 'nexearch',
|
|
'query': self.keyword,
|
|
'oquery': self.keyword
|
|
}
|
|
return data
|
|
|
|
def getHTML(self,url,urlparam):
|
|
resp = requests.get(url, params=urlparam)
|
|
resp.raise_for_status()
|
|
resp.encoding = 'UTF-8'
|
|
html = resp.text
|
|
bs = bs4.BeautifulSoup(html, 'html.parser')
|
|
return bs
|
|
|
|
def getComboPWitems(self):
|
|
_list = self.combobs.select('div#power_link_body ul.lst_type li.lst')
|
|
return _list
|
|
|
|
def getAllAds(self):
|
|
_list = self.Pwbs.select('div.ad_section ol.lst_type li.lst')
|
|
return _list
|
|
|
|
def correctedTerm(self):
|
|
correctedTerm = self.combobs.select('div.sp_keyword dl dd em')
|
|
if len(correctedTerm) != 0:
|
|
correctedKeyword = correctedTerm[0].getText().strip()
|
|
else:
|
|
correctedKeyword = ''
|
|
return correctedKeyword
|
|
|
|
def AllAdvitiserCount(self):
|
|
res = self.Pwbs.select('div.search_result div.inner span.num_result')[0].getText().strip()
|
|
res = re.sub(r"[0-9]+\-[0-9]+\ \/\s", "", res)
|
|
res = re.sub(r"건", "", res)
|
|
return int(res)
|
|
|
|
def getComboSlotCount(self):
|
|
res = len(self.getComboPWitems())
|
|
return res
|
|
|
|
def getComboRank(self):
|
|
bs = self.comboList
|
|
dict = {
|
|
'ImarketCombo': '',
|
|
'NavimroCombo': '',
|
|
'MisumiCombo': '',
|
|
'SpeedmallCombo': ''
|
|
}
|
|
for i in range(len(bs)):
|
|
site = bs[i].select('div.inner a.lnk_url')[0].getText().strip()
|
|
if 'www.imarket.co.kr' in site:
|
|
dict['ImarketCombo'] = str(i + 1)
|
|
if 'www.navimro.com' in site:
|
|
dict['NavimroCombo'] = str(i + 1)
|
|
if 'kr.misumi-ec.com' in site:
|
|
dict['MisumiCombo'] = str(i + 1)
|
|
if 'www.speedmall.co.kr' in site:
|
|
dict['SpeedmallCombo'] = str(i + 1)
|
|
return dict
|
|
|
|
def getPWRank(self):
|
|
bs = self.PWList
|
|
dict = {
|
|
'ImarketPw': '',
|
|
'NavimroPw': '',
|
|
'MisumiPw': '',
|
|
'SpeedmallPw': ''
|
|
}
|
|
for i in range(len(bs)):
|
|
site = bs[i].select('div.inner div.url_area a.url')[0].getText().strip()
|
|
if 'www.imarket.co.kr' in site:
|
|
dict['ImarketPw'] = str(i + 1)
|
|
if 'www.navimro.com' in site:
|
|
dict['NavimroPw'] = str(i + 1)
|
|
if 'kr.misumi-ec.com' in site:
|
|
dict['MisumiPw'] = str(i + 1)
|
|
if 'www.speedmall.co.kr' in site:
|
|
dict['SpeedmallPw'] = str(i + 1)
|
|
return dict
|
|
|
|
def getSummaryResult(self):
|
|
resdic = {}
|
|
resdic['nTerm'] = self.keyword
|
|
resdic['correctedTerm'] = self.correctedTerm()
|
|
resdic['comboSlots'] = self.getComboSlotCount()
|
|
resdic['Advertisers'] = self.AllAdvitiserCount()
|
|
resdic.update(self.getComboRank())
|
|
resdic.update(self.getPWRank())
|
|
return resdic
|
|
|