126 lines
4.2 KiB
Python
126 lines
4.2 KiB
Python
#!/usr/bin/env python
|
|
|
|
import os, sys, subprocess, argparse, re
|
|
|
|
ScriptLocation = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.append(ScriptLocation + "/lib")
|
|
|
|
def getSummaryResults(inlistrow):
|
|
import imarketlib
|
|
import naver
|
|
#attrDic = {'utm_campaign': inlistrow['utm_campaign'], 'utm_content': inlistrow['utm_content']}
|
|
attrDic = { 'utm_campaign': inlistrow['utm_campaign'], 'utm_keyword': inlistrow['nTerm'] }
|
|
imarket = imarketlib.imarketGet(inlistrow['term'], inlistrow['mode'], attrDic)
|
|
naver = naver.NaverGet(inlistrow['nTerm'])
|
|
res = {}
|
|
ia = imarket.getSummaryResult()
|
|
na = naver.getSummaryResult()
|
|
res.update(ia)
|
|
res.update(na)
|
|
return res
|
|
|
|
def getImarketCrawl(inlistrow):
|
|
import imarketlib
|
|
attrDic = {}
|
|
imarket = imarketlib.imarketGet(inlistrow['term'], inlistrow['mode'],attrDic)
|
|
res = imarket.getfullResult()
|
|
return res
|
|
|
|
def getinlineQuery(**kwargs):
|
|
qres = []
|
|
querylist = kwargs['query'].split(',')
|
|
for i in range(len(querylist)):
|
|
para = {}
|
|
termAr = querylist[i].split(':') # :로 텀을 분기할 수 있다. 앞엣것은 아이마켓 뒤엣것은 광고
|
|
if len(termAr) <= 1:
|
|
para['term'] = querylist[i]
|
|
para['nTerm'] = querylist[i].replace(" ","")
|
|
else:
|
|
para['term'] = termAr[0]
|
|
para['nTerm'] = termAr[1]
|
|
for key,value in kwargs.items():
|
|
if key != 'query':
|
|
para[key] = value
|
|
qres.append(para)
|
|
return qres
|
|
|
|
def GoCrawl(inlist):
|
|
res = []
|
|
for i in range(len(inlist)):
|
|
if inlist[i]['mode'] == "SEM":
|
|
resdic = getSummaryResults(inlist[i])
|
|
res.append(resdic)
|
|
else:
|
|
resdic = getImarketCrawl(inlist[i])
|
|
res.append(resdic)
|
|
return res
|
|
|
|
def goSearchTsv(inlist) :
|
|
res = ''
|
|
for i in range(len(inlist)) :
|
|
for j in range(len(inlist[i])) :
|
|
for key,value in inlist[i][j].items():
|
|
res = res + key + "\t"
|
|
res = res + "\n"
|
|
for key,value in inlist[i][j].items():
|
|
res = res + str(value) + "\t"
|
|
res = res + "\n"
|
|
return res
|
|
|
|
def goSemTsv(inlist) :
|
|
res = ''
|
|
for i in range(len(inlist)) :
|
|
#if i == 0:
|
|
#for key,value in inlist[i].items():
|
|
# res = res + key + "\t"
|
|
#res = res +"\n"
|
|
for key,value in inlist[i].items():
|
|
res = res + str(value) + "\t"
|
|
return res
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('-m', '--mode', type=str, required=True, help="select crawling mode: SEM, search, category")
|
|
#parser.add_argument('-r', '--resultType', type=str, help="select full or summary")
|
|
parser.add_argument('-q', '--query', type=str, help="query array separated by ',', not with -i option")
|
|
parser.add_argument('-u', '--utm_campaign', type=str, help="campaign name required by SEM mode")
|
|
#parser.add_argument('-t', '--utm_content', type=str, help="content name optionally required by SEM mode")
|
|
parser.add_argument('-o', '--output', type=str, help="output format")
|
|
parargs = parser.parse_args()
|
|
|
|
if parargs.query == None :
|
|
print("error: plz insert queries")
|
|
elif parargs.mode == None :
|
|
print("plz set mode. SEM or search")
|
|
else:
|
|
if parargs.mode == "SEM":
|
|
if parargs.utm_campaign == None:
|
|
print("error: plz add utm_campaign parameter by adding -u")
|
|
else :
|
|
res = getinlineQuery(query = parargs.query,
|
|
resultype = 'summary',
|
|
mode = parargs.mode,
|
|
utm_campaign = parargs.utm_campaign
|
|
)
|
|
#utm_content = parargs.utm_content)
|
|
result = GoCrawl(res)
|
|
elif parargs.mode == "search":
|
|
res = getinlineQuery(query = parargs.query,
|
|
resultType = 'full',
|
|
mode = parargs.mode)
|
|
result = GoCrawl(res)
|
|
|
|
if parargs.output == 'json' :
|
|
print(result)
|
|
elif parargs.output == 'db' :
|
|
import pymysql
|
|
conn = pymysql.connect(host='localhost', user='maddiekorea', password='mad(#lin',db='maddiekorea', unix_socket='/var/run/mysqld/mysqld.sock', charset='utf8')
|
|
if parargs.mode == 'search' :
|
|
print(goSearchTsv(result))
|
|
else :
|
|
print(goSemTsv(result))
|
|
else :
|
|
if parargs.mode == 'search' :
|
|
print(goSearchTsv(result))
|
|
else :
|
|
print(goSemTsv(result))
|