97 lines
3.4 KiB
Python
97 lines
3.4 KiB
Python
#!/usr/bin/env python
|
|
|
|
import os, sys, subprocess, argparse, re
|
|
|
|
ScriptLocation = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.append(ScriptLocation + "/lib")
|
|
|
|
def getSummaryResults(term,nTerm,mode,**attributes):
|
|
import imarketlib
|
|
import naver
|
|
attr = dict(attributes.items())
|
|
imarket = imarketlib.imarketGet(term, mode,attr)
|
|
naver = naver.NaverGet(nTerm)
|
|
res = {}
|
|
ia = imarket.getSummaryResult()
|
|
na = naver.getSummaryResult()
|
|
res.update(ia)
|
|
res.update(na)
|
|
return res
|
|
|
|
def getImarketCrawl(term,mode):
|
|
import imarketlib
|
|
attr = {}
|
|
imarket = imarketlib.imarketGet(term, mode, attr)
|
|
res = imarket.getfullResult()
|
|
return res
|
|
|
|
def getinlineQuery(string,mode):
|
|
qres = []
|
|
querylist = string.split(',')
|
|
for i in range(len(querylist)):
|
|
resdic = {}
|
|
resdic['term'] = querylist[i]
|
|
resdic['nTerm'] = querylist[i].replace(" ","")
|
|
resdic['mode'] = mode
|
|
qres.append(resdic)
|
|
return qres
|
|
|
|
def GoCrawl(inlist):
|
|
res = []
|
|
for i in range(len(inlist)):
|
|
if inlist[i]['mode'] == "SEM":
|
|
resdic = getSummaryResults(inlist[i]['term'], inlist[i]['nTerm'], inlist[i]['mode'])
|
|
res.append(resdic)
|
|
else:
|
|
resdic = getImarketCrawl(input[i]['term'], input[i]['mode'])
|
|
res.append(resdic)
|
|
return res
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('-m', '--mode', type=str, required=True, help="select crawling mode: SEM, search, category")
|
|
parser.add_argument('-r', '--resultType', type=str, help="select full or summary")
|
|
parser.add_argument('-i', '--input', type=str, help='input file separated by \n not with -q option')
|
|
parser.add_argument('-q', '--query', type=str, help="query array separated by ',', not with -i option")
|
|
parser.add_argument('-u', '--utm_campaign', type=str, help="campaign name required by SEM mode")
|
|
parser.add_argument('-t', '--utm_content', type=str, help="content name optionally required by SEM mode")
|
|
parser.add_argument('-o', '--output', type=str, help="output format")
|
|
args = parser.parse_args()
|
|
|
|
if args.input == None and args.query == None:
|
|
print("error: plz insert queries")
|
|
else:
|
|
if args.mode == "SEM":
|
|
args.resultType = "summary"
|
|
if args.utm_campaign == None:
|
|
print("error: plz add utm_campaign parameter by adding -u")
|
|
else:
|
|
res = GoCrawl(getinlineQuery(args.query,args.mode))
|
|
print(res)
|
|
elif args.mode == "search":
|
|
args.resultType = "full"
|
|
res = GoCrawl(getinlineQuery(args.query,args.mode))
|
|
print(res)
|
|
elif args.mode == "category":
|
|
if args.resultType == None:
|
|
print("error: plz add resultType parameter by adding -r")
|
|
else:
|
|
res = GoCrawl(getinlineQuery(args.query,args.mode))
|
|
print(res)
|
|
else:
|
|
print("error: unsupported parameter")
|
|
|
|
#print(args.mode)
|
|
#input = getinlineQuery("3m절단석,3m 1994,베세이 클램프","search")
|
|
#res = []
|
|
#for i in range(len(input)):
|
|
# if input[i]['mode'] == "SEM":
|
|
# resdic = getSummaryResults(input[i]['term'],input[i]['nTerm'],input[i]['mode'])
|
|
# res.append(resdic)
|
|
# else:
|
|
# resdic = getImarketCrawl(input[i]['term'],input[i]['mode'])
|
|
# res.append(resdic)
|
|
#print(res)
|
|
#print(getSummaryResults("3m 1994","3m1994","SEM",utm_campaign="mts"))
|
|
#print(getImarketCrawl("공구"))
|
|
|
|
#print(test("3m 1994","SEM",utm_campaign="mts",utm_content="category",BIZ_CD="1234456")) |