python_apps/crwlers/imarket_new.py
2023-11-03 14:49:12 +09:00

97 lines
3.4 KiB
Python

#!/usr/bin/env python
import os, sys, subprocess, argparse, re
ScriptLocation = os.path.dirname(os.path.abspath(__file__))
sys.path.append(ScriptLocation + "/lib")
def getSummaryResults(term,nTerm,mode,**attributes):
import imarketlib
import naver
attr = dict(attributes.items())
imarket = imarketlib.imarketGet(term, mode,attr)
naver = naver.NaverGet(nTerm)
res = {}
ia = imarket.getSummaryResult()
na = naver.getSummaryResult()
res.update(ia)
res.update(na)
return res
def getImarketCrawl(term,mode):
import imarketlib
attr = {}
imarket = imarketlib.imarketGet(term, mode, attr)
res = imarket.getfullResult()
return res
def getinlineQuery(string,mode):
qres = []
querylist = string.split(',')
for i in range(len(querylist)):
resdic = {}
resdic['term'] = querylist[i]
resdic['nTerm'] = querylist[i].replace(" ","")
resdic['mode'] = mode
qres.append(resdic)
return qres
def GoCrawl(inlist):
res = []
for i in range(len(inlist)):
if inlist[i]['mode'] == "SEM":
resdic = getSummaryResults(inlist[i]['term'], inlist[i]['nTerm'], inlist[i]['mode'])
res.append(resdic)
else:
resdic = getImarketCrawl(input[i]['term'], input[i]['mode'])
res.append(resdic)
return res
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--mode', type=str, required=True, help="select crawling mode: SEM, search, category")
parser.add_argument('-r', '--resultType', type=str, help="select full or summary")
parser.add_argument('-i', '--input', type=str, help='input file separated by \n not with -q option')
parser.add_argument('-q', '--query', type=str, help="query array separated by ',', not with -i option")
parser.add_argument('-u', '--utm_campaign', type=str, help="campaign name required by SEM mode")
parser.add_argument('-t', '--utm_content', type=str, help="content name optionally required by SEM mode")
parser.add_argument('-o', '--output', type=str, help="output format")
args = parser.parse_args()
if args.input == None and args.query == None:
print("error: plz insert queries")
else:
if args.mode == "SEM":
args.resultType = "summary"
if args.utm_campaign == None:
print("error: plz add utm_campaign parameter by adding -u")
else:
res = GoCrawl(getinlineQuery(args.query,args.mode))
print(res)
elif args.mode == "search":
args.resultType = "full"
res = GoCrawl(getinlineQuery(args.query,args.mode))
print(res)
elif args.mode == "category":
if args.resultType == None:
print("error: plz add resultType parameter by adding -r")
else:
res = GoCrawl(getinlineQuery(args.query,args.mode))
print(res)
else:
print("error: unsupported parameter")
#print(args.mode)
#input = getinlineQuery("3m절단석,3m 1994,베세이 클램프","search")
#res = []
#for i in range(len(input)):
# if input[i]['mode'] == "SEM":
# resdic = getSummaryResults(input[i]['term'],input[i]['nTerm'],input[i]['mode'])
# res.append(resdic)
# else:
# resdic = getImarketCrawl(input[i]['term'],input[i]['mode'])
# res.append(resdic)
#print(res)
#print(getSummaryResults("3m 1994","3m1994","SEM",utm_campaign="mts"))
#print(getImarketCrawl("공구"))
#print(test("3m 1994","SEM",utm_campaign="mts",utm_content="category",BIZ_CD="1234456"))