42 lines
1.6 KiB
Python
42 lines
1.6 KiB
Python
import os, signal, sys, pymysql,time, logging
|
|
import urllib.parse as urlparse
|
|
from datetime import datetime
|
|
import subprocess
|
|
|
|
def startNumber() :
|
|
hour = int(time.strftime("%H"))
|
|
day = int(time.strftime("%d"))
|
|
if day in range(1,5) : vari = 1
|
|
if day in range(6,10) : vari = 5
|
|
if day in range(11,15) : vari = 10
|
|
if day in range(16,20) : vari = 15
|
|
if day in range(21,25) : vari = 20
|
|
if day in range(26,30) : vari = 25
|
|
else : vari = 30
|
|
startnum = (hour*1000)+((23000*day-vari)-1)+((day-vari)-1)*1000
|
|
return startnum
|
|
|
|
conn = pymysql.connect(host='localhost', user='root', password='dlsxjvkzmdkdlakzpt!',db='crawler', unix_socket='/var/run/mysqld/mysqld.sock', charset='utf8')
|
|
|
|
start = startNumber()
|
|
#print(int(time.strftime("%H")))
|
|
|
|
if start <= 100000 :
|
|
curs = conn.cursor(pymysql.cursors.DictCursor)
|
|
sql = "SELECT term FROM `naverpowerlinkterms` ORDER BY upTime ASC LIMIT " + str(start) + ",1000"
|
|
curs.execute(sql)
|
|
rows = curs.fetchall()
|
|
else :
|
|
curs = conn.cursor(pymysql.cursors.DictCursor)
|
|
sql = "SELECT extTerm AS term FROM relatedkeywords WHERE imarketRC > 0 AND adcheck = \"N\" ORDER BY upTime DESC LIMIT 0,1000"
|
|
curs.execute(sql)
|
|
rows = curs.fetchall()
|
|
|
|
for row in rows:
|
|
shellcmd = "/home/maddiekorea/anaconda3/bin/python3 /home/maddiekorea/py/relatedTermCollector.py \"" + row['term'] + "\""
|
|
executeCrawl = subprocess.Popen(shellcmd,stdout=subprocess.PIPE,shell=True)
|
|
try: executeCrawl.communicate(timeout=80)
|
|
except subprocess.TimeoutExpired:
|
|
executeCrawl.terminate()
|
|
continue
|
|
conn.close() |