108 lines
3.9 KiB
Python
108 lines
3.9 KiB
Python
#!/Users/maddiekorea/Workspace/bin/python
|
|
#version 201904
|
|
import requests, bs4, urllib, sys, re, math, logging
|
|
from urllib import parse
|
|
from datetime import datetime
|
|
startTime = datetime.now()
|
|
|
|
#url = "http://www.imarket.co.kr/display/malls.do?_method=searchGoods&sc.entrNo=500000036863&sc.viewType=list&sc.row=1000"
|
|
url = str(sys.argv[1]) + "&sc.viewType=list"
|
|
# sc.row는 1일때 NR체크
|
|
|
|
def headers() :
|
|
headers = {
|
|
'Content-Type': 'application/x-www-form-urlencoded',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
'Cache-Control': 'no-cache',
|
|
'Connection': 'keep-alive',
|
|
'Host': 'www.imarket.co.kr',
|
|
'Pragma': 'no-cache',
|
|
'Referer': 'http://www.imarket.co.kr/',
|
|
'Save-Data': 'on',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
|
|
}
|
|
return headers
|
|
|
|
#def pageRequest( url, parameters, headers ) :
|
|
def pageRequest( url, headers ) :
|
|
resp = requests.get(url, headers = headers)
|
|
#resp = requests.get(url)
|
|
resp.raise_for_status()
|
|
resp.encoding='EUC-KR'
|
|
html = resp.text
|
|
return html
|
|
|
|
#term = str(sys.argv[1])
|
|
|
|
#urlterm = parse.quote(query(term))
|
|
|
|
htmlHead = pageRequest(url, headers())
|
|
bs = bs4.BeautifulSoup(htmlHead, 'html.parser')
|
|
|
|
_list = bs.select('ul.prd_list_type li')
|
|
|
|
if len(_list) == 0 :
|
|
print(sys.argv[1] + "\t" + "NoResult")
|
|
else :
|
|
rc = bs.select('div.tit_category_wrap h2.tit_result span em')[0].getText().strip()
|
|
rc = re.sub(r"\,","",rc)
|
|
for i in range(len(_list)) :
|
|
prdCode = _list[i].select('div.info_box span.prd_code')[0].getText().strip()
|
|
prdCode = re.sub(r"^상품코드\ \:\ ","",prdCode)
|
|
|
|
if len(_list[i].select('div.info_box a.tit')) < 1 : logging.error(term + " : " + str(i+1))
|
|
|
|
prdName = _list[i].select('div.info_box a.tit')[0].getText().strip()
|
|
|
|
promoMsg = _list[i].select('div.info_box p.prd_promo')[0].getText().strip()
|
|
|
|
price = _list[i].select('div.price_box span.sale_price em.num')[0].getText().strip()
|
|
price = re.sub(r"\,","",price)
|
|
|
|
couponArr = _list[i].select('div.price_box span.discount em.num')
|
|
if len(couponArr) != 0 : coupon = couponArr[0].getText().strip()
|
|
else : coupon = '';
|
|
|
|
moq = _list[i].select('div.amount_box span.btn_wrap label input.pr-number')[0].get('value')
|
|
|
|
imgURL = _list[i].select('div.img_box a img')[0].get('src')
|
|
|
|
imgTagArr = _list[i].select('div.img_box a span')
|
|
imgTags = ''
|
|
for l in range(len(imgTagArr)) :
|
|
imgTags = imgTags + imgTagArr[l].getText().strip()
|
|
if l != (len(imgTagArr)-1) : imgTags = imgTags + ","
|
|
|
|
tagArr = _list[i].select('div.info_box p.info_box02 span')
|
|
tagData = ''
|
|
for k in range(len(tagArr)) :
|
|
tagData = tagData + tagArr[k].getText().strip()
|
|
if k != (len(tagArr)-1) : tagData = tagData + ","
|
|
|
|
outofStock = _list[i].select('div.btns a')[0].getText().strip()
|
|
if outofStock == "장바구니" : outofStock = "판매중"
|
|
|
|
textArr = _list[i].select('div.info_box p.prd_info span')
|
|
txtdata = ''
|
|
for j in range(len(textArr)) :
|
|
txtdata = txtdata + textArr[j].getText().strip()
|
|
if j != (len(textArr)-1) : txtdata = txtdata + "\t"
|
|
|
|
#print(term + "\t" + urlterm + "\t"
|
|
print(str(sys.argv[1]) + str(rc) + "\t" + str(i+1) + "\t"
|
|
+ prdCode + "\t" + prdName + "\t"
|
|
+ promoMsg + "\t" + price + "\t"
|
|
+ coupon + "\t" + moq + "\t"
|
|
+ imgTags + "\t" + tagData + "\t"
|
|
+ imgURL + "\t"
|
|
+ outofStock + "\t"
|
|
+ txtdata
|
|
)
|
|
|
|
consumtime = datetime.now() - startTime
|
|
#logging.warning(term + "\t" + str(consumtime))
|
|
logging.warning(str(consumtime))
|