#!/usr/bin/env python # not working import requests, bs4, urllib, sys, re, math, json term = str(sys.argv[1]); query = urllib.parse.quote_plus(term) query = query.encode('euc-kr') #http://search.11st.co.kr/Search.tmall?kwd=3m%25EB%258B%2588%25ED%258A%25B8%25EB%25A6%25B4%25EC%259E%25A5%25EA%25B0%2591 #http://search.11st.co.kr/Search.tmall?method=getCatalogPrdSearch&catalogYN=Y&kwd= url = "http://search.11st.co.kr/Search.tmall" data = { 'kwd': query } #headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0)'} headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0)', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Upgrade-Insecure-Requests': '1', 'Host': 'search.11st.co.kr' } resp = requests.get(url, params = data, headers = headers) resp.raise_for_status() resp.encoding='EUC-KR' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') bs = str(bs) sp = re.split('window\.searchDataFactory\.relatedKeywordsList\ \=\ ',bs) res = re.split('\n',sp[1]) tub = re.sub('\;','',res[0]) data = json.loads(tub) if len(data) == 0 : print("11st" + "\t" + term + "\t" + "\t" + str(len(data)) + "\t" + str(0)) else : for i in range(len(data)) : aff_term = data[i]['relatedKwd'] print("11st" + "\t" + term + "\t" + aff_term +"\t" + str(len(data)) + "\t" + str(i+1))