#!/usr/bin/env python from datetime import datetime import bs4, re import logging import requests class NaverGet: def __init__(self, keyword): self.startTime = datetime.now() self.keyword = keyword.replace(" ","") self.pwUrl = "https://ad.search.naver.com/search.naver" self.comboUrl = "https://search.naver.com/search.naver" self.combobs = self.getHTML(self.comboUrl,self.urlParameters("combo")) self.Pwbs = self.getHTML(self.pwUrl,self.urlParameters("pw")) self.comboList = self.getComboPWitems() self.PWList = self.getAllAds() #def __del__(self): # consumtime = datetime.now() - self.startTime # logging.warning(self.keyword + "_naver : " + str(consumtime)) def urlParameters(self,mode): if mode == "pw": data = { 'where': 'ad', 'query': self.keyword } if mode == "combo": data = { 'sm': 'tab_hty.top', 'where': 'nexearch', 'query': self.keyword, 'oquery': self.keyword } return data def getHTML(self,url,urlparam): resp = requests.get(url, params=urlparam) resp.raise_for_status() resp.encoding = 'UTF-8' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') return bs def getComboPWitems(self): _list = self.combobs.select('div#power_link_body ul.lst_type li.lst') return _list def getAllAds(self): _list = self.Pwbs.select('div.ad_section ol.lst_type li.lst') return _list def correctedTerm(self): correctedTerm = self.combobs.select('div.sp_keyword dl dd em') if len(correctedTerm) != 0: correctedKeyword = correctedTerm[0].getText().strip() else: correctedKeyword = '' return correctedKeyword def AllAdvitiserCount(self): res = self.Pwbs.select('div.search_result div.inner span.num_result')[0].getText().strip() res = re.sub(r"[0-9]+\-[0-9]+\ \/\s", "", res) res = re.sub(r"건", "", res) return int(res) def getComboSlotCount(self): res = len(self.getComboPWitems()) return res def getComboRank(self): bs = self.comboList dict = { 'ImarketCombo': '', 'NavimroCombo': '', 'MisumiCombo': '', 'SpeedmallCombo': '' } for i in range(len(bs)): site = bs[i].select('div.inner a.lnk_url')[0].getText().strip() if 'www.imarket.co.kr' in site: dict['ImarketCombo'] = str(i + 1) if 'www.navimro.com' in site: dict['NavimroCombo'] = str(i + 1) if 'kr.misumi-ec.com' in site: dict['MisumiCombo'] = str(i + 1) if 'www.speedmall.co.kr' in site: dict['SpeedmallCombo'] = str(i + 1) return dict def getPWRank(self): bs = self.PWList dict = { 'ImarketPw': '', 'NavimroPw': '', 'MisumiPw': '', 'SpeedmallPw': '' } for i in range(len(bs)): site = bs[i].select('div.inner div.url_area a.url')[0].getText().strip() if 'www.imarket.co.kr' in site: dict['ImarketPw'] = str(i + 1) if 'www.navimro.com' in site: dict['NavimroPw'] = str(i + 1) if 'kr.misumi-ec.com' in site: dict['MisumiPw'] = str(i + 1) if 'www.speedmall.co.kr' in site: dict['SpeedmallPw'] = str(i + 1) return dict def getSummaryResult(self): resdic = {} resdic['nTerm'] = self.keyword resdic['correctedTerm'] = self.correctedTerm() resdic['comboSlots'] = self.getComboSlotCount() resdic['Advertisers'] = self.AllAdvitiserCount() resdic.update(self.getComboRank()) resdic.update(self.getPWRank()) return resdic