add filters for irrelevant files in ISISDATA (#5109) (d0a8d9aa) · Commits · aflab / astrogeology / ISIS3

isis/pytests/test_IsisDownload.py

+23 −8

Original line number	Original line	Diff line number	Diff line
	@@ -7,6 +7,7 @@ import pytest
	from unittest import mock		from unittest import mock
	from tempfile import TemporaryDirectory		from tempfile import TemporaryDirectory
	from pathlib import Path		from pathlib import Path
			import tempfile

	from importlib.util import spec_from_loader, module_from_spec		from importlib.util import spec_from_loader, module_from_spec
	from importlib.machinery import SourceFileLoader		from importlib.machinery import SourceFileLoader
	@@ -16,6 +17,7 @@ downloadIsisData = module_from_spec(spec)
	spec.loader.exec_module(downloadIsisData)		spec.loader.exec_module(downloadIsisData)
	did = downloadIsisData		did = downloadIsisData


	class MockedPopen:		class MockedPopen:
	def __init__(self, args, **kwargs):		def __init__(self, args, **kwargs):
	self.args = args		self.args = args
	@@ -35,7 +37,7 @@ class MockedPopen:
	else:		else:
	raise Exception()		raise Exception()

	return stdout, stderr		return {'out': stdout, 'stderr': stderr, 'args': self.args, 'returncode': self.returncode}


	class MockedBustedPopen:		class MockedBustedPopen:
	@@ -54,8 +56,21 @@ def test_rclone_unknown_exception():
	res = did.rclone("lsf", "test", extra_args=["-l", "-R", "--format", "p", "--files-only"], redirect_stdout=True, redirect_stderr=True)		res = did.rclone("lsf", "test", extra_args=["-l", "-R", "--format", "p", "--files-only"], redirect_stdout=True, redirect_stderr=True)


	def test_create_rclone_args():		def test_rclone():
	with TemporaryDirectory() as tdir:		with mock.patch("subprocess.Popen", MockedPopen):
	dest = Path(tdir)		res = did.rclone("lsf", "test", extra_args=["-l", "-R", "--format", "p", "--files-only"], redirect_stdout=True, redirect_stderr=True)
	args = did.create_rclone_arguments(str(dest), "lro_naifKernels:", ntransfers=100, rclone_kwargs=["--dry_run"])		assert res["out"].decode() == "Success"
	assert args == ['lro_naifKernels:', str(dest/"lro"/"kernels"), '--progress', '--checkers=100', '--transfers=100', '--track-renames', '--log-level=WARNING', '--dry_run']

			def test_rclone_unknown_exception():
			with mock.patch("subprocess.Popen", MockedBustedPopen):
			with pytest.raises(Exception, match="idk"):
			did.rclone("lsf", "test", extra_args=["-l", "-R", "--format", "p", "--files-only"], redirect_stdout=True, redirect_stderr=True)


			def test_rclone_with_auth():
			# Test the rclone function when auth is required
			with mock.patch("subprocess.Popen", MockedPopen):
			res = did.rclone("lsf", "test", extra_args=["-l", "-R", "--format", "p", "--files-only", "--rc-web-gui", "user:pass"], redirect_stdout=True, redirect_stderr=True)
			assert res["out"].decode() == "Success"
			assert '--rc-web-gui' in res['args']

isis/scripts/downloadIsisData

+95 −66

Original line number	Original line	Diff line number	Diff line
	@@ -11,21 +11,39 @@ import tempfile
	from shutil import which		from shutil import which
	from os import path		from os import path
	from collections import OrderedDict		from collections import OrderedDict
			from pathlib import Path
	SOURCE_PATH = {		import sys
	"naifKernels": "kernels",		import re
	"pck": "kernels/pck",
	"ck":"kernels/ck",		# priority is: lowest index is highest priority
	"spk":"kernels/spk",		filter_list = [
	"fk": "kernels/fk",		'+ calibration/**' # we generally want everything in calibration
	"iak":"kernels/iak",		'- source/',
	"sclk":"kernels/sclk",		'- /a_older_versions/',
	"tspk":"kernels/tspk",		'- /former_versions/',
	"usgs":""		'- corrupt_files/',
	}		'- zzarchive/',
			'- /original/',
			'- ck/prime_mission/',
			'- extended_mission/',
			'- /prime_mission/',
			'- ck/GEM/',
			'- ck/save/',
			'- spk/SAVE_SCS_2017-11-22/',
			'- spk/spk_psp_rec*',
			'- release*/',
			'- Archive/',
			'- ek/',
			'- *.lbl',
			'- *.txt',
			'- misc/',
			'- document/',
			'- *.csv',
			'- toolkit/',
			'- kernels_ORG/'
			]

	def find_conf():		def find_conf():
	from pathlib import Path
	local_path = Path("rclone.conf")		local_path = Path("rclone.conf")
	# this should be installed in scripts folder, so config is one directory up in etc		# this should be installed in scripts folder, so config is one directory up in etc
	install_path = Path(os.path.dirname(__file__)) / '..' / "etc" / "isis" / 'rclone.conf'		install_path = Path(os.path.dirname(__file__)) / '..' / "etc" / "isis" / 'rclone.conf'
	@@ -54,19 +72,7 @@ def call_subprocess(command, redirect_stdout=True, redirect_stderr=False):
	command,		command,
	stdout=stdout,		stdout=stdout,
	stderr=stderr) as proc:		stderr=stderr) as proc:
	(out, err) = proc.communicate()		return proc.communicate()

	if out:
	log.debug("Process output: ")
	log.debug(out.decode())
	if err:
	log.warning(err.decode("utf-8").replace("\\n", "\n"))

	return {
	"code": proc.returncode,
	"out": out,
	"error": err
	}


	def rclone(command, config=None, extra_args=[], redirect_stdout=True, redirect_stderr=False):		def rclone(command, config=None, extra_args=[], redirect_stdout=True, redirect_stderr=False):
	@@ -76,6 +82,9 @@ def rclone(command, config=None, extra_args=[], redirect_stdout=True, redirect_s

	# this is probably a config file on disk so pass it through		# this is probably a config file on disk so pass it through
	config_path = config		config_path = config

			for arg in extra_args:
			arg = re.sub(r'--filter=- ([^ ]+)', r'--filter="- \1"', arg)
	command_with_args = ["rclone", command, f'--config={config_path}', *extra_args]		command_with_args = ["rclone", command, f'--config={config_path}', *extra_args]
	log.debug("Invoking : %s", " ".join(command_with_args))		log.debug("Invoking : %s", " ".join(command_with_args))
	return call_subprocess(command_with_args, redirect_stdout, redirect_stderr)		return call_subprocess(command_with_args, redirect_stdout, redirect_stderr)
	@@ -89,6 +98,9 @@ def rclone(command, config=None, extra_args=[], redirect_stdout=True, redirect_s
	log.debug(f"USING CONFIG:\n{config}")		log.debug(f"USING CONFIG:\n{config}")

	f.write(config.encode())		f.write(config.encode())

			for arg in extra_args:
			arg = re.sub(r'--filter=- ([^ ]+)', r'--filter="- \1"', arg)
	command_with_args = ["rclone", command, f"--config={config_path}", *extra_args]		command_with_args = ["rclone", command, f"--config={config_path}", *extra_args]
	return call_subprocess(command_with_args, redirect_stdout, redirect_stderr)		return call_subprocess(command_with_args, redirect_stdout, redirect_stderr)
	except ProcessLookupError as not_found_e:		except ProcessLookupError as not_found_e:
	@@ -99,25 +111,12 @@ def rclone(command, config=None, extra_args=[], redirect_stdout=True, redirect_s
	log.exception(message)		log.exception(message)
	raise Exception(message)		raise Exception(message)

			def create_rclone_arguments(destination, mission_name, parsedArgs, rclone_kwargs=[]):
	def get_kernel_destination_path(source_type):

	try:
	source_path = SOURCE_PATH.get(source_type)
	except KeyError as e:

	raise KeyError(f"kernel path not found. Source type {source_type} is invalid")

	log.debug(f"source path for {source_type} is {source_path}" )
	return source_path


	def create_rclone_arguments(destination, mission_name, ntransfers=10, rclone_kwargs=[]):
	"""		"""
	Parameters		Parameters
	----------		----------

	destination str		destination : str
	path to location where files will be copied/downloaded too		path to location where files will be copied/downloaded too

	set_of_pub : set(str)		set_of_pub : set(str)
	@@ -125,22 +124,47 @@ def create_rclone_arguments(destination, mission_name, ntransfers=10, rclone_kwa
	"""		"""
	log.debug(f"Creating RClone command for {mission_name}")		log.debug(f"Creating RClone command for {mission_name}")
	mission_dir_name, source_type = mission_name.replace(":", "").split("_")		mission_dir_name, source_type = mission_name.replace(":", "").split("_")

	if (mission_dir_name == "legacybase"):		if (mission_dir_name == "legacybase"):
	# We still want things to go into base		# We still want things to go into base
	mission_dir_name = "base"		mission_dir_name = "base"

	log.debug(f"Mission_dir_name: {mission_dir_name}, source_type: {source_type}")		log.debug(f"Mission_dir_name: {mission_dir_name}, source_type: {source_type}")

	destination = os.path.join(destination, str(mission_dir_name).replace(":",""))		destination = os.path.join(destination, str(mission_dir_name).replace(":",""))
	#add kernel directory path if needed		if source_type == "naifKernels":
	destination = os.path.join(destination, get_kernel_destination_path(source_type))		destination = os.path.join(destination, "kernels")
	extra_args=[f"{mission_name}",f"{destination}", "--progress", f"--checkers={ntransfers}", f"--transfers={ntransfers}", "--track-renames", f"--log-level={log.getLevelName(log.getLogger().getEffectiveLevel())}"]
			if args.filter:
	extra_args.extend(rclone_kwargs)		filters = [f"- {arg}" for arg in args.filters]
			filter_list.extend(filters)

			# Check for additional include and exclude flags
			if args.include:
			includes = [f"+ {arg}" for arg in args.include]
			filter_list.extend(includes)

			if args.exclude:
			excludes = [f"- {arg}" for arg in args.exclude]
			filter_list.extend(excludes)

			# we need to add this to the end
			if args.include:
			filter_list.append("- *")

			filter_args = [f'--filter={item}' for item in filter_list]
			extra_args = [f"{mission_name}",
			f"{destination}",
			"--progress",
			f"--checkers={parsedArgs.num_transfers}",
			f"--transfers={parsedArgs.num_transfers}",
			"--track-renames",
			f"--log-level={log.getLevelName(log.getLogger().getEffectiveLevel())}"]
			extra_args.extend(filter_args)
	log.debug(f"Args created: {extra_args}")		log.debug(f"Args created: {extra_args}")
	return extra_args		return extra_args

			def main(mission, dest, cfg_path, parsedArgs, kwargs):
	def main(mission, dest, cfg_path, ntransfers, kwargs):
	"""		"""
	Parameters		Parameters
	----------		----------
	@@ -159,7 +183,7 @@ def main(mission, dest, cfg_path, ntransfers, kwargs):

	log.debug(f"Using config: {cfg_path}")		log.debug(f"Using config: {cfg_path}")
	result = rclone("listremotes", config=cfg_path)		result = rclone("listremotes", config=cfg_path)
	config_sources = result.get('out').decode("utf-8").split("\n")		config_sources = result[0].decode("utf-8").split("\n")
	if config_sources == ['']:		if config_sources == ['']:
	log.error("Remote sources came back empty. Get more info by re-running with verbose flag.")		log.error("Remote sources came back empty. Get more info by re-running with verbose flag.")
	quit(-1)		quit(-1)
	@@ -169,7 +193,7 @@ def main(mission, dest, cfg_path, ntransfers, kwargs):
	for source in sorted(config_sources, key=lambda x: x.split("_")[-1]):		for source in sorted(config_sources, key=lambda x: x.split("_")[-1]):
	parsed_name = source.split("_")		parsed_name = source.split("_")
	# If it is a mission, it should be in the format <mission_nam>_<source_type>		# If it is a mission, it should be in the format <mission_nam>_<source_type>
	if len(parsed_name) == 2 and parsed_name[1].replace(":","") in SOURCE_PATH.keys():		if len(parsed_name) == 2 and parsed_name[1] in ["usgs:", "naifKernels:"]:
	remotes_mission_name = parsed_name[0]		remotes_mission_name = parsed_name[0]
	supported_missions[remotes_mission_name] = supported_missions.get(remotes_mission_name, []) + [source]		supported_missions[remotes_mission_name] = supported_missions.get(remotes_mission_name, []) + [source]

	@@ -180,18 +204,18 @@ def main(mission, dest, cfg_path, ntransfers, kwargs):
	raise LookupError(f"{mission} is not in the list of supported missions: {supported_missions.keys()}")		raise LookupError(f"{mission} is not in the list of supported missions: {supported_missions.keys()}")

	if mission == "legacybase":		if mission == "legacybase":
	args = create_rclone_arguments(dest, "legacybase_usgs:", ntransfers, kwargs)		args = create_rclone_arguments(dest, "legacybase_usgs:", parsedArgs, kwargs)
	rclone(command="copy", extra_args=args, config=cfg_path, redirect_stdout=False, redirect_stderr=False)		rclone(command=parsedArgs.command, extra_args=args, config=cfg_path, redirect_stdout=False, redirect_stderr=False)
	elif(mission.upper() == "ALL"):		elif(mission.upper() == "ALL"):
	supported_missions.pop("legacybase")		supported_missions.pop("legacybase")
	for mission, remotes in supported_missions.items():		for mission, remotes in supported_missions.items():
	for remote in remotes:		for remote in remotes:
	args = create_rclone_arguments(dest, remote, ntransfers, kwargs)		args = create_rclone_arguments(dest, remote, parsedArgs, kwargs)
	rclone(command="copy", extra_args=args, config=cfg_path, redirect_stdout=False, redirect_stderr=False)		rclone(command=parsedArgs.command, extra_args=args, config=cfg_path, redirect_stdout=False, redirect_stderr=False)
	else:		else:
	for remote in supported_missions[mission]:		for remote in supported_missions[mission]:
	args = create_rclone_arguments(dest, remote, ntransfers, kwargs)		args = create_rclone_arguments(dest, remote, parsedArgs, kwargs)
	rclone(command="copy", extra_args=args, config=cfg_path, redirect_stdout=False, redirect_stderr=False)		rclone(command=parsedArgs.command, extra_args=args, config=cfg_path, redirect_stdout=False, redirect_stderr=False)


	if __name__ == '__main__':		if __name__ == '__main__':
	@@ -223,6 +247,11 @@ if __name__ == '__main__':
	parser.add_argument('-v', '--verbose', action='count', default=0)		parser.add_argument('-v', '--verbose', action='count', default=0)
	parser.add_argument('-n', '--num-transfers', action='store', default=10)		parser.add_argument('-n', '--num-transfers', action='store', default=10)
	parser.add_argument('--config', action='store', default=find_conf())		parser.add_argument('--config', action='store', default=find_conf())
			parser.add_argument('--filter', help='Additional filters for files', nargs='*')
			parser.add_argument('--include', help='files and patterns to include while downloading', nargs='*')
			parser.add_argument('--exclude', help='files and patterns to ignore while downloading', nargs='*')
			parser.add_argument('-c', '--command', choices=["copy", "sync", "ls", "lsd", "size"], help='files and patterns to ignore while downloading', default="copy")

	args, kwargs = parser.parse_known_args()		args, kwargs = parser.parse_known_args()

	log_kwargs = {		log_kwargs = {
	@@ -239,7 +268,7 @@ if __name__ == '__main__':
	log_kwargs['level'] = log.DEBUG		log_kwargs['level'] = log.DEBUG

	log.basicConfig(**log_kwargs)		log.basicConfig(**log_kwargs)
	log.debug("Additional Args:", kwargs)		log.debug("args: ", args)
			log.debug("Additional Args:", *kwargs)
	main(args.mission, args.dest, os.path.expanduser(args.config), args.num_transfers, kwargs)

			main(args.mission, args.dest, os.path.expanduser(args.config), args, kwargs)