python_apps/crwlers/Aff_11st_srch.py
2023-11-03 14:49:12 +09:00

48 lines
1.4 KiB
Python

#!/usr/bin/env python
# not working
import requests, bs4, urllib, sys, re, math, json
term = str(sys.argv[1]);
query = urllib.parse.quote_plus(term)
query = query.encode('euc-kr')
#http://search.11st.co.kr/Search.tmall?kwd=3m%25EB%258B%2588%25ED%258A%25B8%25EB%25A6%25B4%25EC%259E%25A5%25EA%25B0%2591
#http://search.11st.co.kr/Search.tmall?method=getCatalogPrdSearch&catalogYN=Y&kwd=
url = "http://search.11st.co.kr/Search.tmall"
data = { 'kwd': query }
#headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0)'}
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0)',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Upgrade-Insecure-Requests': '1',
'Host': 'search.11st.co.kr'
}
resp = requests.get(url, params = data, headers = headers)
resp.raise_for_status()
resp.encoding='EUC-KR'
html = resp.text
bs = bs4.BeautifulSoup(html, 'html.parser')
bs = str(bs)
sp = re.split('window\.searchDataFactory\.relatedKeywordsList\ \=\ ',bs)
res = re.split('\n',sp[1])
tub = re.sub('\;','',res[0])
data = json.loads(tub)
if len(data) == 0 :
print("11st" + "\t" + term + "\t" + "\t" + str(len(data)) + "\t" + str(0))
else :
for i in range(len(data)) :
aff_term = data[i]['relatedKwd']
print("11st" + "\t" + term + "\t" + aff_term +"\t" + str(len(data)) + "\t" + str(i+1))