23 lines
1012 B
Python
23 lines
1012 B
Python
import os, signal, sys, pymysql
|
|
import urllib.parse as urlparse
|
|
from datetime import datetime
|
|
import subprocess
|
|
|
|
conn = pymysql.connect(host='localhost', user='root', password='dlsxjvkzmdkdlakzpt!',db='crawler', unix_socket='/var/run/mysqld/mysqld.sock', charset='utf8')
|
|
|
|
curs = conn.cursor(pymysql.cursors.DictCursor)
|
|
sql = "SELECT * FROM `naverpowerlinkterms` ORDER BY upTime ASC LIMIT 0,2100"
|
|
curs.execute(sql)
|
|
rows = curs.fetchall()
|
|
|
|
for row in rows:
|
|
url = row['targetURL']
|
|
parsed = urlparse.urlparse(url)
|
|
shellcmd = "/home/maddiekorea/anaconda3/bin/python3 /home/maddiekorea/py/keywordCheck_db.py \"" + row['term'] + "\" \"" + row['PWTerm'] + "\" " + urlparse.parse_qs(parsed.query)['utm_campaign'][0] + " >> /var/log/SANR/" + str(datetime.now().date()) + "_crawl.log"
|
|
executeCrawl = subprocess.Popen(shellcmd,stdout=subprocess.PIPE,shell=True)
|
|
try: executeCrawl.communicate(timeout=5)
|
|
except subprocess.TimeoutExpired:
|
|
executeCrawl.terminate()
|
|
continue
|
|
conn.close()
|