jmquintana79
8/6/2016 - 3:34 PM

Awesome recursive downloader with validations for FTP servers

Awesome recursive downloader with validations for FTP servers

import sys
import subprocess
import time


if 'darwin' in sys.platform:
	print('Running \'caffeinate\' on MacOSX to prevent the system from sleeping')
	proccoffe = subprocess.Popen('caffeinate')
	#subprocess.Popen(['caffeinate', '-is'])


to_run = [sys.executable] + sys.argv[1:]
print('Running$ %s' % ' '.join(to_run))

while True:
	try:
		stopped = proc.poll() # return None if it is running
	except:
		stopped = True

	if stopped:
		proc = subprocess.Popen(to_run)
	elif stopped==0:
		break

	time.sleep(10)


try:
	proccoffe.kill()
except:
	pass
quit()

#Use like:
#python supervisor.py my_script.py param1 param2

from leacher import Leacher
import os
import sys
from datetime import datetime


''' PARAMETERS '''

# Selection file
file_match = ['file_common_part*'] 

# Host settings
host = 'name_host' # without 'ftp//:' or 'http//:' . Example: 'ftp.ncdc.noaa.gov'
account = ''
passwd = ''

# Set folder / files
ftp_folder = 'folder_of_server' #Remote FTP folder
local_folder = "local_folder_where_download"

# Settting this to True will delete the file after download or if the file have zero bytes
delete_files = False


""" DOWNLOAD """

# build input folder
input_folder=ftp_folder

# build output folder
output_folder = local_folder

# if not exist, create folder
if not os.path.exists(output_folder):
    try:
        os.makedirs(output_folder)
        # write by screen
        print ("Created folder: %s"%output_folder)
    except Exception as e:
        sys.exit("ERROR Creating folder: "+e)   

#Init the leacher
try:
    leacher = Leacher(host, account, passwd, input_folder, output_folder, delete_files, file_match)
    leacher.log('\n************************\nProccessed %s items overall, deleted %s, saved %s\n************************\n' % (leacher.all_count, leacher.del_count, leacher.down_count), True)
except Exception as e:
        sys.exit("ERROR Launching the leacher: "+e)   

import os
import logging
import ftplib
from ftplib import FTP

class Leacher:
	
	def __init__(self, host, account, passwd, ftp_folder='', local_folder_path='', delete_files='False', file_match=''):
		
		logging.basicConfig(filename='leacher.log', format='%(asctime)s - %(levelname)s: %(message)s', level=logging.DEBUG)

		self.host = host
		self.account = account
		self.passwd = passwd
		self.ftp_folder = ftp_folder
		self.local_folder_path = local_folder_path
		self.delete_files = delete_files
		self.file_match = file_match

		self.files_count = 0
		self.down_count = 0
		self.del_count = 0
		self.curr_count = 0
		self.all_count = 0

		self.connect()
		self.get_files()


	def connect(self):
		try:
			self.ftp = FTP(self.host)
			self.log('\n\n')
			self.log('Connecting to %s' % self.host, True)
			self.ftp.login(self.account, self.passwd)
			self.ftp.cwd(self.ftp_folder)
			self.log('Switched to folder %s' % self.ftp_folder, True)

		except ftplib.all_errors as ex:
			self.log('Cant connect, ex: %s' % ex, True, logging.ERROR)


	def get_files(self):
		for ifile_match in self.file_match:
			try:
				filenames = self.ftp.nlst(ifile_match)
				files_count = len(filenames)
				self.log('Found %s files...' % files_count, True)
				self.all_count += files_count
				self.process_files(filenames)
			except:
				self.log("No possible find %s"%ifile_match)


	def process_files(self, filenames):

		self.curr_count = 0

		for filename in filenames:
			try:
				if self.ftp.size(filename) > 0:
  				local_file = open(os.path.join(self.local_folder_path, filename), 'wb')
  				self.ftp.retrbinary('RETR ' + filename, local_file.write)
  				local_file.close()
  				self.down_count += 1
  				self.log('Downloaded "%s"' % filename)
  				print('Downloaded "%s"' % filename)

				if self.delete_files:
					self.ftp.delete(filename)
					self.del_count += 1
					self.log('Deleted "%s"' % filename)

			except ftplib.error_perm as ex:
				#Ex: 550 I can only retrieve regular files
				self.log('"%s" is not a file.\nEx: %s' % (filename, ex))

			except ftplib.all_errors as ex:
				#IOError ex: [Errno 60] Operation timed out, [Errno 50] Network is down, [Errno 32] Broken pipe, [Errno 54] Connection reset by peer
				self.log('Ex: %s' % ex, True, logging.ERROR)
				self.log('...trying to recoonect', True)
				self.connect()
				break


			self.curr_count += 1

			if (self.curr_count == 10000 and self.delete_files):
				self.log('...get the next 10000 files', True)
				self.get_files()



	def log(self, msg, echo_it=False, lvl=logging.DEBUG):
		logging.log(lvl, msg)
		if echo_it:
			print(msg)


	#This is not used as it's not needed
	def is_file(filename):
		try:
			self.ftp.size(filename)
			return True
		except ftplib.error_perm as ex:
			return False