python_apps/crwlers/jason/detailcrawler.py
2023-11-03 14:49:12 +09:00

39 lines
1.6 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests, sys, json
#url = "http://192.168.0.177/search" #개발서버
#url = "https://esch.simsale.kr/search" #실서버
url = "https://qaesch.simsale.kr/search" #스테이징
#호출발식 : python3 $PATH/jasonSrchCrwl.py {키워드}
input = { "query": "{\"service\":\"018\",\"keyword\":\"" + str(sys.argv[1]) + "\",\"order\":\"hit\",\"page\":0,\"count\":100,\"dv_type\":2,\"highlight\":1}" }
headers = { #HTTP헤더 조작
'Host': '192.168.0.196',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding': 'gzip, deflate',
'Referer': 'http://192.168.0.196/',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'http://192.168.0.196',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache'
}
resp = requests.post(url, data=input, headers = headers)
resp.raise_for_status()
resp.encoding='UTF-8'
data = json.loads(resp.content)
rc = data['end']
if len(data['contents']) == 0:
print(str(sys.argv[1]) + "\t" + "0" + "\t" + str(rc))
else:
for i in range(len(data['contents'])):
print(str(sys.argv[1]) + "\t" + str(i+1) + "\t" + str(rc) + "\t" + data['contents'][i]['subject'] + "\t" + str(data['contents'][i]['score']) + "\t" + data['contents'][i]['po_order'] + "\t" + data['contents'][i]['po_keyword'] + "\t" + data['contents'][i]['cate_kwrd'])