import requests, sys, bs4, urllib3, datetime, re from dateutil.relativedelta import relativedelta requests.packages.urllib3.disable_warnings() requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL' try: requests.packages.urllib3.contrib.pyopenssl.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL' except AttributeError: # no pyopenssl support used / needed / available pass term = sys.argv[1].encode('euc-kr') #term = urllib.parse.quote_plus(term) #startdate = datetime.datetime.now() #enddate = datetime.datetime.now() + relativedelta(months=2) #startdate = startdate.strftime('%Y-%m-%d') #enddate = enddate.strftime('%Y-%m-%d') #startdate = urllib.parse.quote_plus(startdate) #enddate = urllib.parse.quote_plus(enddate) url = "https://www.ebiz4u.co.kr/bid/bidding.do?" data = { "page": "", "cmd": "listPublic", "subcmd": "doPortalList", "srchAspUrl": "", "srchRfqNm": term, "srchOrgNm": "", "srchFrDt": "", "srchToDt": "" } resp = requests.get(url, params=data, verify=False) resp.raise_for_status() resp.encoding='EUC-KR' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') trs = bs.select('table.tbl_list tr') res = [] for i in range(len(trs) - 1): srcCode = trs[i + 1] tdsele = {} tdsele["src"] = "이비즈포유" tdsele["term"] = sys.argv[1] tdsele["type"] = "" tdsele["budget"] = "" tdsele["commitDate"] = "" tdsele["dept"] = srcCode.select('span.subject')[0].getText().strip() tdsele["name"] = srcCode.select('td')[0].getText().strip() pat = "^" + tdsele["dept"] tdsele["name"] = re.sub(pat,"",tdsele["name"]) targetUrl = srcCode.select('a')[0].get('href') targethead = "https://www.ebiz4u.co.kr/bid/bidding.do?cmd=viewPublic&subcmd=vwIndexOnPortal&rfqNo=" targetfoot = "&aspId=" target = targetUrl.split("'") tdsele["detailurl"] = targethead + target[1] + targetfoot + target[3] tdsele["dept"] = tdsele["dept"] + " " + srcCode.select('td')[1].getText().strip() timepat1 = srcCode.select('span.time')[0].getText().strip() timepat2 = srcCode.select('span.time')[1].getText().strip() tdsele["applyDateFrom"] = srcCode.select('td')[2].getText().strip() pat = timepat1 + "$" tdsele["applyDateFrom"] = re.sub(pat,"",tdsele["applyDateFrom"]) tdsele["applyDateFrom"] = tdsele["applyDateFrom"] + " " + timepat1 tdsele["applyDateTo"] = srcCode.select('td')[3].getText().strip() pat = timepat2 + "$" tdsele["applyDateTo"] = re.sub(pat,"",tdsele["applyDateTo"]) tdsele["applyDateTo"] = tdsele["applyDateTo"] + " " + timepat2 res.append(tdsele) print(res)