108 lines
5.0 KiB
Python
108 lines
5.0 KiB
Python
import pandas as pd
|
|
import requests, bs4, urllib, sys, re, math, logging, os
|
|
from datetime import datetime
|
|
import subprocess
|
|
import pathlib
|
|
|
|
class imagemagickWrapper:
|
|
def __init__(self,logfile):
|
|
self.startTime = datetime.now()
|
|
self.targetDirectory = '/home/maddiekorea/imgs/crawl'
|
|
self.srcfile = logfile
|
|
self.resfile = self.convertLogFile()
|
|
self.df = self.dataFrame()
|
|
self.convertCommands = self.checkImages()
|
|
|
|
def dataFrame(self):
|
|
df = pd.read_csv(self.srcfile, sep='\t', header=0)
|
|
return df
|
|
|
|
def timeLog(self):
|
|
consumtime = datetime.now() - self.startTime
|
|
logging.warning("Converting Completed.\t" + str(consumtime))
|
|
|
|
def convertLogFile(self):
|
|
t = self.srcfile.split(".")
|
|
index = len(t) - 2
|
|
resfile = t[index] + "_convert.tsv"
|
|
return resfile
|
|
|
|
def getExtension(self,filename):
|
|
ext = filename.split(".")
|
|
index = len(ext) - 1
|
|
extension = ext[index].upper()
|
|
return extension
|
|
|
|
def dirname(self,projectname,mode):
|
|
dirname = "_" + projectname + "_" + mode + "_"
|
|
tail = re.sub("imgCrawl\_","",self.srcfile)
|
|
tail = re.sub("\.tsv","",tail)
|
|
return str(dirname + tail)
|
|
|
|
def logging(self,errormsg):
|
|
errorCmd = "echo \"" + errormsg + "\" >> " + self.convertLogFile() + ";"
|
|
loggingError = subprocess.Popen(errorCmd,stdout=subprocess.PIPE,shell=True)
|
|
loggingError.wait()
|
|
|
|
def execution(self,commandpointer):
|
|
startTime = datetime.now()
|
|
executeCrawl = subprocess.Popen(self.convertCommands[commandpointer],stdout=subprocess.PIPE,shell=True)
|
|
executeCrawl.wait()
|
|
endTime = datetime.now() - startTime
|
|
logging.warning("Converting... : " + str(commandpointer + 1) + "/" + str(len(self.convertCommands)) + "\t" + str(endTime))
|
|
|
|
def checkImages(self):
|
|
commands = []
|
|
for i in range(len(self.df)):
|
|
projectname = str(self.df.iloc[i]['프로젝트'])
|
|
mode = str(self.df.iloc[i]['모드'])
|
|
kind = str(self.df.iloc[i]['유형'])
|
|
prdCode = str(self.df.iloc[i]['기준코드'])
|
|
seq = str(self.df.iloc[i]['시퀀스'])
|
|
orgUrl = str(self.df.iloc[i]['원URL'])
|
|
filename = str(self.df.iloc[i]['파일명'])
|
|
convertedfilename = filename.split(".")[0] + ".JPG"
|
|
convertingfilename = filename.split(".")[0] + "_c.JPG"
|
|
downloadedfile = self.targetDirectory +"/" + self.dirname(projectname,mode) + "/" + filename
|
|
convertingfile = self.targetDirectory +"/" + self.dirname(projectname,mode) + "/" + convertingfilename
|
|
convertedfile = self.targetDirectory +"/" + self.dirname(projectname,mode) + "/" + convertedfilename
|
|
file = pathlib.Path(downloadedfile)
|
|
fileSize = os.stat(downloadedfile).st_size
|
|
extension = self.getExtension(filename)
|
|
if file.exists():
|
|
if fileSize != 0:
|
|
if extension == "JPG" or extension == "JEPG" or extension == "PNG":
|
|
if kind == "상세":
|
|
convertCmd = "convert " + downloadedfile + " -quality 90 -resize \"918x>\" " + convertingfile + "; "
|
|
convertCmd = convertCmd + "rm -rf " + downloadedfile + "; mv " + convertingfile + " " + convertedfile
|
|
convertCmd = convertCmd + "echo \"" + projectname + "\t" + mode + "\t" + kind + "\t" + prdCode + "\t" + seq + "\t" + orgUrl + "\t" + convertedfilename + "\t" + "successfully Converted." + "\" >> " + self.convertLogFile() + ";"
|
|
commands.append(convertCmd)
|
|
else:
|
|
try:
|
|
identify = subprocess.check_output("identify " + downloadedfile, shell=True)
|
|
identify = identify.decode()
|
|
identify = re.sub('^[\.\/0-9_A-Za-z\-\ ]+\s.[A-Z]+\s',"",identify)
|
|
identify = re.sub('\ .+\n',"",identify)
|
|
identify = identify.split("x")
|
|
width = int(identify[0])
|
|
height = int(identify[1])
|
|
maxValue = max(identify)
|
|
convertCmd = "convert " + downloadedfile + " -background white -gravity center -extent " + str(maxValue) + "x" + str(maxValue) +" -quality 90 -resize \"1000x>\" " + convertingfile + "; "
|
|
convertCmd = convertCmd + "rm -rf " + downloadedfile + "; mv " + convertingfile + " " + convertedfile + "; "
|
|
convertCmd = convertCmd + "echo \"" + projectname + "\t" + mode + "\t" + kind + "\t" + prdCode + "\t" + seq + "\t" + orgUrl + "\t" + convertedfilename + "\t" + "successfully Converted." + "\" >> " + self.convertLogFile() + ";"
|
|
commands.append(convertCmd)
|
|
except subprocess.CalledProcessError:
|
|
errorlog = projectname + "\t" + mode + "\t" + kind + "\t" + prdCode + "\t" + seq + "\t" + orgUrl + "\t" + filename + "\t" + "Converting Error."
|
|
self.logging(errorlog)
|
|
else:
|
|
errorlog = projectname + "\t" + mode + "\t" + kind + "\t" + prdCode + "\t" + seq + "\t" + orgUrl + "\t" + filename + "\t" + "Unsupported file format."
|
|
self.logging(errorlog)
|
|
else:
|
|
errorlog = projectname + "\t" + mode + "\t" + kind + "\t" + prdCode + "\t" + seq + "\t" + orgUrl + "\t" + filename + "\t" + "404 error from server"
|
|
self.logging(errorlog)
|
|
else:
|
|
errorlog = projectname + "\t" + mode + "\t" + kind + "\t" + prdCode + "\t" + seq + "\t" + orgUrl + "\t" + filename + "\t" + "file does not exist, crawler error"
|
|
self.logging(errorlog)
|
|
return commands
|
|
|