python_apps/crwlers/conteenew/bid_naramarket.py
2023-11-03 14:49:12 +09:00

68 lines
3.5 KiB
Python

import requests, sys, bs4, urllib, re, datetime
from dateutil.relativedelta import relativedelta
term = sys.argv[1].encode('euc-kr')
term = urllib.parse.quote_plus(term)
startdate = datetime.datetime.now() - relativedelta(months=1)
enddate = datetime.datetime.now() + relativedelta(months=3)
startdate = startdate.strftime('%Y/%m/%d')
enddate = enddate.strftime('%Y/%m/%d')
startdate = urllib.parse.quote_plus(startdate)
enddate = urllib.parse.quote_plus(enddate)
url = "http://www.g2b.go.kr:8101/ep/tbid/tbidList.do"
dataa = "?searchType=1&bidSearchType=1&taskClCds=1&bidNm=" + str(term) + "&searchDtType=1" + \
"&fromBidDt=" + startdate + "&toBidDt=" + enddate + \
"&fromOpenBidDt=&toOpenBidDt=&exceptEnd=Y&radOrgan=1&instNm=&instSearchRangeType=&refNo=&area=&areaNm=&industry=&industryCd=&budget=&budgetCompare=UP&detailPrdnmNo=&detailPrdnm=&procmntReqNo=&intbidYn=1&regYn=Y&recordCountPerPage=30"
url = url + dataa
headers = {
"Content-Type": 'application/x-www-form-urlencoded',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3',
'Connection': 'keep-alive',
#'Cookie' : 'ipmsperf_uuid=-1514895482682589748; JSESSIONID=25JThTdcFN12QpYxCYV6fcm4sTLBlZDBsLLc7hdvkHJfR0TbCbvN!-308104413; _KONEPS_PAGE=%3C%21%23%25%3ElistUrl%3C%40%25%7E%3E%2Fgtob%2Fall%2Fpr%2Festimate%2FreqEstimateOpenG2BList.do%3FbsnsDivCdSchs%3D0000%26cmd%3Dmain%26deadlineFromDt%3D2021%252F09%252F28%26deadlineToDt%3D2021%252F10%252F27%26estmtReqInsttNo%3D%26estmtSbjctSch%3D%25C8%25AB%25BA%25B8%26examinInstNm%3D%26instGuBun%3D%26insttCd%3D%26orderBy%3D1%26recordCountPerPage%3D10%3C%21%23%25%3EtbidListUrl%3C%40%25%7…tbidList.do%3Farea%3D%26areaNm%3D%26bidNm%3D%25C8%25AB%25BA%25B8%26bidSearchType%3D1%26budget%3D%26budgetCompare%3DUP%26detailPrdnm%3D%26detailPrdnmNo%3D%26fromBidDt%3D2021%252F08%252F29%26fromOpenBidDt%3D%26industry%3D%26industryCd%3D%26instNm%3D%26instSearchRangeType%3D%26intbidYn%3D1%26procmntReqNo%3D%26radOrgan%3D1%26recordCountPerPage%3D30%26refNo%3D%26regYn%3DY%26searchDtType%3D1%26searchType%3D1%26taskClCds%3D1%26toBidDt%3D2021%252F09%252F28%26toOpenBidDt%3D; ccsession=20210928194351000037c437c4cd36',
'Host': 'www.g2b.go.kr:8101',
'Referer': 'http://www.g2b.go.kr:8101/ep/tbid/tbidFwd.do?bidSearchType=1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:92.0) Gecko/20100101 Firefox/92.0'
}
resp = requests.get(url, headers=headers)
resp.raise_for_status()
resp.encoding='EUC-KR'
html = resp.text
bs = bs4.BeautifulSoup(html, 'html.parser')
trs = bs.select("table.table_list_tbidTbl tr")
res=[]
for i in range(len(trs) - 1):
tds = trs[i + 1].select('td')
tdsele = {}
for j in range(len(tds)):
if j == 0:
tdsele["src"] = "나라장터"
tdsele["term"] = sys.argv[1]
tdsele["type"] = tds[j].getText().strip()
tdsele["budget"] = ""
tdsele["commitDate"] = ""
elif j == 3:
tdsele["name"] = tds[j].getText().strip()
tdsele["detailurl"] = tds[j].select('a')[0].get('href').strip()
elif j == 4:
tdsele["dept"] = tds[j].getText().strip()
elif j == 7:
applyperiod = tds[j].getText().strip().split("(")
tdsele["applyDateFrom"] = applyperiod[0]
tdsele["applyDateTo"] = re.sub("\)","",applyperiod[1])
res.append(tdsele)
print(res)