python_apps/crwlers/Aff_coupang_srch.py
2023-11-03 14:49:12 +09:00

53 lines
1.3 KiB
Python

#!/usr/bin/env python
import requests, bs4, urllib, sys, re, math
term = str(sys.argv[1]);
query = urllib.parse.quote_plus(term)
#https://www.coupang.com/np/search?component=&q=3m%EB%8B%88%ED%8A%B8%EB%A6%B4%EC%9E%A5%EA%B0%91&channel=user
url = "https://www.coupang.com/np/search?component=&q="
data = { 'q': term, 'channel': 'user' }
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Upgrade-Insecure-Requests': '1',
'Host': 'www.coupang.com'
}
resp = requests.get(url, params = data, headers = headers)
resp.raise_for_status()
resp.encoding='UTF-8'
html = resp.text
bs = bs4.BeautifulSoup(html, 'html.parser')
suggestions = ''
_suggestionLists = bs.select('dl.search-related-keyword dd a')
if len(_suggestionLists) == 0:
print(
"coupang" + "\t" +
term + "\t" +
"\t" +
str(len(_suggestionLists)) + "\t" +
str(0)
)
else :
for i in range(len(_suggestionLists)) :
aff_terms = _suggestionLists[i].getText().strip()
print(
"coupang" + "\t" +
term + "\t" +
aff_terms + "\t" +
str(len(_suggestionLists)) + "\t" +
str(i+1)
)