#!/usr/bin/env python import os, sys, subprocess, argparse, re ScriptLocation = os.path.dirname(os.path.abspath(__file__)) sys.path.append(ScriptLocation + "/lib") def getSummaryResults(inlistrow): import imarketlib import naver #attrDic = {'utm_campaign': inlistrow['utm_campaign'], 'utm_content': inlistrow['utm_content']} attrDic = { 'utm_campaign': inlistrow['utm_campaign'], 'utm_keyword': inlistrow['nTerm'] } imarket = imarketlib.imarketGet(inlistrow['term'], inlistrow['mode'], attrDic) naver = naver.NaverGet(inlistrow['nTerm']) res = {} ia = imarket.getSummaryResult() na = naver.getSummaryResult() res.update(ia) res.update(na) return res def getImarketCrawl(inlistrow): import imarketlib attrDic = {} imarket = imarketlib.imarketGet(inlistrow['term'], inlistrow['mode'],attrDic) res = imarket.getfullResult() return res def getinlineQuery(**kwargs): qres = [] querylist = kwargs['query'].split(',') for i in range(len(querylist)): para = {} termAr = querylist[i].split(':') # :로 텀을 분기할 수 있다. 앞엣것은 아이마켓 뒤엣것은 광고 if len(termAr) <= 1: para['term'] = querylist[i] para['nTerm'] = querylist[i].replace(" ","") else: para['term'] = termAr[0] para['nTerm'] = termAr[1] for key,value in kwargs.items(): if key != 'query': para[key] = value qres.append(para) return qres def GoCrawl(inlist): res = [] for i in range(len(inlist)): if inlist[i]['mode'] == "SEM": resdic = getSummaryResults(inlist[i]) res.append(resdic) else: resdic = getImarketCrawl(inlist[i]) res.append(resdic) return res def goSearchTsv(inlist) : res = '' for i in range(len(inlist)) : for j in range(len(inlist[i])) : for key,value in inlist[i][j].items(): res = res + key + "\t" res = res + "\n" for key,value in inlist[i][j].items(): res = res + str(value) + "\t" res = res + "\n" return res def goSemTsv(inlist) : res = '' for i in range(len(inlist)) : #if i == 0: #for key,value in inlist[i].items(): # res = res + key + "\t" #res = res +"\n" for key,value in inlist[i].items(): res = res + str(value) + "\t" return res parser = argparse.ArgumentParser() parser.add_argument('-m', '--mode', type=str, required=True, help="select crawling mode: SEM, search, category") #parser.add_argument('-r', '--resultType', type=str, help="select full or summary") parser.add_argument('-q', '--query', type=str, help="query array separated by ',', not with -i option") parser.add_argument('-u', '--utm_campaign', type=str, help="campaign name required by SEM mode") #parser.add_argument('-t', '--utm_content', type=str, help="content name optionally required by SEM mode") parser.add_argument('-o', '--output', type=str, help="output format") parargs = parser.parse_args() if parargs.query == None : print("error: plz insert queries") elif parargs.mode == None : print("plz set mode. SEM or search") else: if parargs.mode == "SEM": if parargs.utm_campaign == None: print("error: plz add utm_campaign parameter by adding -u") else : res = getinlineQuery(query = parargs.query, resultype = 'summary', mode = parargs.mode, utm_campaign = parargs.utm_campaign ) #utm_content = parargs.utm_content) result = GoCrawl(res) elif parargs.mode == "search": res = getinlineQuery(query = parargs.query, resultType = 'full', mode = parargs.mode) result = GoCrawl(res) if parargs.output == 'json' : print(result) elif parargs.output == 'db' : import pymysql conn = pymysql.connect(host='localhost', user='maddiekorea', password='mad(#lin',db='maddiekorea', unix_socket='/var/run/mysqld/mysqld.sock', charset='utf8') if parargs.mode == 'search' : print(goSearchTsv(result)) else : print(goSemTsv(result)) else : if parargs.mode == 'search' : print(goSearchTsv(result)) else : print(goSemTsv(result))