python_apps/crwlers/Aff_coupang_srch.py

#!/usr/bin/env python

import requests, bs4, urllib, sys, re, math

term = str(sys.argv[1]);

query = urllib.parse.quote_plus(term)

#https://www.coupang.com/np/search?component=&q=3m%EB%8B%88%ED%8A%B8%EB%A6%B4%EC%9E%A5%EA%B0%91&channel=user

url = "https://www.coupang.com/np/search?component=&q="

data = { 'q': term, 'channel': 'user' }

headers = {
	'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
	'Accept-Encoding': 'gzip, deflate, br',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
	'Upgrade-Insecure-Requests': '1',
	'Host': 'www.coupang.com'
}

resp = requests.get(url, params = data, headers = headers)
resp.raise_for_status()
resp.encoding='UTF-8'

html = resp.text

bs = bs4.BeautifulSoup(html, 'html.parser')

suggestions = ''

_suggestionLists = bs.select('dl.search-related-keyword dd a')

if len(_suggestionLists) == 0:
	print(
		"coupang" + "\t" +
		term + "\t" +
		"\t" +
		str(len(_suggestionLists)) + "\t" +
		str(0)
	)
else :
	for i in range(len(_suggestionLists)) :
		aff_terms = _suggestionLists[i].getText().strip()
		print(
			"coupang" + "\t" +
			term + "\t" +
			aff_terms + "\t" +
			str(len(_suggestionLists)) + "\t" +
			str(i+1)
		)