Commit 6da4c125c8432fe4d2bf73a631f5f8b39ffb5985 - ctdconverter

Made -m/--macros optional, providing a default value; moved around macros.xml; updated README.md Luis de la Garza 7 years ago

5 changed file(s) with 1362 addition(s) and 1318 deletion(s). Raw diff Collapse all Expand all

-1297

~~CTD2Galaxy/generator.py~~ less more

0		#!/usr/bin/env python
1		# encoding: utf-8
2
3		"""
4		@author: delagarza
5		"""
6
7
8		import sys
9		import os
10		import traceback
11		import ntpath
12		import string
13
14		from argparse import ArgumentParser
15		from argparse import RawDescriptionHelpFormatter
16		from CTDopts.CTDopts import CTDModel, _InFile, _OutFile, ParameterGroup, _Choices, _NumericRange, \
17		_FileFormat, ModelError
18		from collections import OrderedDict
19		from string import strip
20		from lxml import etree
21		from lxml.etree import SubElement, Element, ElementTree, ParseError, parse
22
23		__all__ = []
24		__version__ = 1.0
25		__date__ = '2014-09-17'
26		__updated__ = '2016-05-09'
27
28		MESSAGE_INDENTATION_INCREMENT = 2
29
30		TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'data',
31		_OutFile: 'data', _Choices: 'select'}
32
33		STDIO_MACRO_NAME = "stdio"
34		REQUIREMENTS_MACRO_NAME = "requirements"
35		ADVANCED_OPTIONS_MACRO_NAME = "advanced_options"
36
37		REQUIRED_MACROS = [STDIO_MACRO_NAME, REQUIREMENTS_MACRO_NAME, ADVANCED_OPTIONS_MACRO_NAME]
38
39
40		class CLIError(Exception):
41		# Generic exception to raise and log different fatal errors.
42		def __init__(self, msg):
43		super(CLIError).__init__(type(self))
44		self.msg = "E: %s" % msg
45
46		def __str__(self):
47		return self.msg
48
49		def __unicode__(self):
50		return self.msg
51
52
53		class InvalidModelException(ModelError):
54		def __init__(self, message):
55		super(InvalidModelException, self).__init__()
56		self.message = message
57
58		def __str__(self):
59		return self.message
60
61		def __repr__(self):
62		return self.message
63
64
65		class ApplicationException(Exception):
66		def __init__(self, msg):
67		super(ApplicationException).__init__(type(self))
68		self.msg = msg
69
70		def __str__(self):
71		return self.msg
72
73		def __unicode__(self):
74		return self.msg
75
76
77		class ExitCode:
78		def __init__(self, code_range="", level="", description=None):
79		self.range = code_range
80		self.level = level
81		self.description = description
82
83
84		class DataType:
85		def __init__(self, extension, galaxy_extension=None, galaxy_type=None, mimetype=None):
86		self.extension = extension
87		self.galaxy_extension = galaxy_extension
88		self.galaxy_type = galaxy_type
89		self.mimetype = mimetype
90
91
92		class ParameterHardcoder:
93		def __init__(self):
94		# map whose keys are the composite names of tools and parameters in the following pattern:
95		# [ToolName][separator][ParameterName] -> HardcodedValue
96		# if the parameter applies to all tools, then the following pattern is used:
97		# [ParameterName] -> HardcodedValue
98
99		# examples (assuming separator is '#'):
100		# threads -> 24
101		# XtandemAdapter#adapter -> xtandem.exe
102		# adapter -> adapter.exe
103		self.separator = "!"
104		self.parameter_map = {}
105
106		# the most specific value will be returned in case of overlap
107		def get_hardcoded_value(self, parameter_name, tool_name):
108		# look for the value that would apply for all tools
109		generic_value = self.parameter_map.get(parameter_name, None)
110		specific_value = self.parameter_map.get(self.build_key(parameter_name, tool_name), None)
111		if specific_value is not None:
112		return specific_value
113
114		return generic_value
115
116		def register_parameter(self, parameter_name, parameter_value, tool_name=None):
117		self.parameter_map[self.build_key(parameter_name, tool_name)] = parameter_value
118
119		def build_key(self, parameter_name, tool_name):
120		if tool_name is None:
121		return parameter_name
122		return "%s%s%s" % (parameter_name, self.separator, tool_name)
123
124
125		def main(argv=None): # IGNORE:C0111
126		# Command line options.
127		if argv is None:
128		argv = sys.argv
129		else:
130		sys.argv.extend(argv)
131
132		program_version = "v%s" % __version__
133		program_build_date = str(__updated__)
134		program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
135		program_short_description = "CTD2Galaxy - A project from the GenericWorkflowNodes family " \
136		"(https://github.com/orgs/genericworkflownodes)"
137		program_usage = '''
138		USAGE:
139
140		I - Parsing a single CTD file and generate a Galaxy wrapper:
141
142		$ python generator.py -i input.ctd -o output.xml
143
144
145		II - Parsing all found CTD files (files with .ctd and .xml extension) in a given folder and
146		output converted Galaxy wrappers in a given folder:
147
148		$ python generator.py -i /home/user/*.ctd -o /home/user/galaxywrappers
149
150
151		III - Providing file formats, mimetypes
152
153		Galaxy supports the concept of file format in order to connect compatible ports, that is, input ports of a certain
154		data format will be able to receive data from a port from the same format. This converter allows you to provide
155		a personalized file in which you can relate the CTD data formats with supported Galaxy data formats. The layout of
156		this file consists of lines, each of either one or four columns separated by any amount of whitespace. The content
157		of each column is as follows:
158
159		* 1st column: file extension
160		* 2nd column: data type, as listed in Galaxy
161		* 3rd column: full-named Galaxy data type, as it will appear on datatypes_conf.xml
162		* 4th column: mimetype (optional)
163
164		The following is an example of a valid "file formats" file:
165
166		########################################## FILE FORMATS example ##########################################
167		# Every line starting with a # will be handled as a comment and will not be parsed.
168		# The first column is the file format as given in the CTD and second column is the Galaxy data format.
169		# The second, third, fourth and fifth column can be left empty if the data type has already been registered
170		# in Galaxy, otherwise, all but the mimetype must be provided.
171
172		# CTD type # Galaxy type # Long Galaxy data type # Mimetype
173		csv tabular galaxy.datatypes.data:Text
174		fasta
175		ini txt galaxy.datatypes.data:Text
176		txt
177		idxml txt galaxy.datatypes.xml:GenericXml application/xml
178		options txt galaxy.datatypes.data:Text
179		grid grid galaxy.datatypes.data:Grid
180
181		##########################################################################################################
182
183		Note that each line consists precisely of either one, three or four columns. In the case of data types already
184		registered in Galaxy (such as fasta and txt in the above example), only the first column is needed. In the case of
185		data types that haven't been yet registered in Galaxy, the first three columns are needed (mimetype is optional).
186
187		For information about Galaxy data types and subclasses, see the following page:
188		https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes
189
190
191		IV - Hardcoding parameters
192
193		It is possible to hardcode parameters. This makes sense if you want to set a tool in Galaxy in 'quiet' mode or if
194		your tools support multi-threading and accept the number of threads via a parameter, without giving the end user the
195		chance to change the values for these parameters.
196
197		In order to generate hardcoded parameters, you need to provide a simple file. Each line of this file contains two
198		or three columns separated by whitespace. Any line starting with a '#' will be ignored. The first column contains
199		the name of the parameter, the second column contains the value that will always be set for this parameter. The
200		first two columns are mandatory.
201
202		If the parameter is to be hardcoded only for a set of tools, then a third column can be added. This column includes
203		a comma-separated list of tool names for which the parameter will be hardcoded. If a third column is not included,
204		then all processed tools containing the given parameter will get a hardcoded value for it.
205
206		The following is an example of a valid file:
207
208		##################################### HARDCODED PARAMETERS example #####################################
209		# Every line starting with a # will be handled as a comment and will not be parsed.
210		# The first column is the name of the parameter and the second column is the value that will be used.
211
212		# Parameter name # Value # Tool(s)
213		threads \${GALAXY_SLOTS:-24}
214		mode quiet
215		xtandem_executable xtandem XTandemAdapter
216		verbosity high Foo, Bar
217
218		#########################################################################################################
219
220		Using the above file will produce a <command> similar to:
221
222		[tool_name] ... -threads \${GALAXY_SLOTS:-24} -mode quiet ...
223
224		For all tools. For XTandemAdapter, the <command> will be similar to:
225
226		XtandemAdapter ... -threads \${GALAXY_SLOTS:-24} -mode quiet -xtandem_executable xtandem ...
227
228		And for tools Foo and Bar, the <command> will be similar to:
229
230		Foo ... ... -threads \${GALAXY_SLOTS:-24} -mode quiet -verbosity high ...
231
232
233		V - Control which tools will be converted
234
235		Sometimes only a subset of CTDs needs to be converted. It is possible to either explicitly specify which tools will
236		be converted or which tools will not be converted.
237
238		The value of the -s/--skip-tools parameter is a file in which each line will be interpreted as the name of a tool
239		that will not be converted. Conversely, the value of the -r/--required-tools is a file in which each line will be
240		interpreted as a tool that is required. Only one of these parameters can be specified at a given time.
241
242		The format of both files is exactly the same. As stated before, each line will be interpreted as the name of a tool;
243		any line starting with a '#' will be ignored.
244
245		'''
246		program_license = '''%(short_description)s
247		Copyright 2015, Luis de la Garza
248
249		Licensed under the Apache License, Version 2.0 (the "License");
250		you may not use this file except in compliance with the License.
251		You may obtain a copy of the License at
252
253		http://www.apache.org/licenses/LICENSE-2.0
254
255		Unless required by applicable law or agreed to in writing, software
256		distributed under the License is distributed on an "AS IS" BASIS,
257		WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
258		See the License for the specific language governing permissions and
259		limitations under the License.
260
261		%(usage)s
262		''' % {'short_description': program_short_description, 'usage': program_usage}
263
264		try:
265		# Setup argument parser
266		parser = ArgumentParser(prog="CTD2Galaxy", description=program_license,
267		formatter_class=RawDescriptionHelpFormatter, add_help=True)
268		parser.add_argument("-i", "--input", dest="input_files", default=[], required=True, nargs="+", action="append",
269		help="List of CTD files to convert.")
270		parser.add_argument("-o", "--output-destination", dest="output_destination", required=True,
271		help="If multiple input files are given, then a folder in which all generated "
272		"XMLs will be generated is expected;"
273		"if a single input file is given, then a destination file is expected.")
274		parser.add_argument("-f", "--formats-file", dest="formats_file",
275		help="File containing the supported file formats. Run with '-h' or '--help' to see a "
276		"brief example on the layout of this file.", default=None, required=False)
277		parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line",
278		help="Adds content to the command line", default="", required=False)
279		parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination",
280		help="Specify the location of a datatypes_conf.xml to modify and add the registered "
281		"data types. If the provided destination does not exist, a new file will be created.",
282		default=None, required=False)
283		parser.add_argument("-x", "--default-executable-path", dest="default_executable_path",
284		help="Use this executable path when <executablePath> is not present in the CTD",
285		default=None, required=False)
286		parser.add_argument("-b", "--blacklist-parameters", dest="blacklisted_parameters", default=[], nargs="+", action="append",
287		help="List of parameters that will be ignored and won't appear on the galaxy stub",
288		required=False)
289		parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False,
290		help="Default category to use for tools lacking a category when generating tool_conf.xml")
291		parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False,
292		help="Specify the location of an existing tool_conf.xml that will be modified to include "
293		"the converted tools. If the provided destination does not exist, a new file will"
294		"be created.")
295		parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False,
296		help="The path that will be prepended to the file names when generating tool_conf.xml")
297		parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False,
298		help="Each line of the file will be interpreted as a tool name that needs translation. "
299		"Run with '-h' or '--help' to see a brief example on the format of this file.")
300		parser.add_argument("-s", "--skip-tools", dest="skip_tools_file", default=None, required=False,
301		help="File containing a list of tools for which a Galaxy stub will not be generated. "
302		"Run with '-h' or '--help' to see a brief example on the format of this file.")
303		parser.add_argument("-m", "--macros", dest="macros_files", default=[], nargs="+", action="append",
304		help="Import the additional given file(s) as macros. The macros stdio, requirements and"
305		"advanced_options are required. Please see sample_files/macros.xml for an example"
306		"of a valid macros file. All defined macros will be imported.",
307		required=True)
308		parser.add_argument("-p", "--hardcoded-parameters", dest="hardcoded_parameters", default=None, required=False,
309		help="File containing hardcoded values for the given parameters. Run with '-h' or '--help' "
310		"to see a brief example on the format of this file.")
311		# TODO: add verbosity, maybe?
312		parser.add_argument("-V", "--version", action='version', version=program_version_message)
313
314		# Process arguments
315		args = parser.parse_args()
316
317		# validate and prepare the passed arguments
318		validate_and_prepare_args(args)
319
320		# extract the names of the macros and check that we have found the ones we need
321		macros_file_names = args.macros_files
322		macros_to_expand = parse_macros_files(macros_file_names)
323
324		# parse the given supported file-formats file
325		supported_file_formats = parse_file_formats(args.formats_file)
326
327		# parse the hardcoded parameters file¬
328		parameter_hardcoder = parse_hardcoded_parameters(args.hardcoded_parameters)
329
330		# parse the skip/required tools files
331		skip_tools = parse_tools_list_file(args.skip_tools_file)
332		required_tools = parse_tools_list_file(args.required_tools_file)
333
334		#if verbose > 0:
335		# print("Verbose mode on")
336		parsed_models = convert(args.input_files,
337		args.output_destination,
338		supported_file_formats=supported_file_formats,
339		default_executable_path=args.default_executable_path,
340		add_to_command_line=args.add_to_command_line,
341		blacklisted_parameters=args.blacklisted_parameters,
342		required_tools=required_tools,
343		skip_tools=skip_tools,
344		macros_file_names=macros_file_names,
345		macros_to_expand=macros_to_expand,
346		parameter_hardcoder=parameter_hardcoder)
347
348		#TODO: add some sort of warning if a macro that doesn't exist is to be expanded
349
350		# it is not needed to copy the macros files, since the user has provided them
351
352		# generation of galaxy stubs is ready... now, let's see if we need to generate a tool_conf.xml
353		if args.tool_conf_destination is not None:
354		generate_tool_conf(parsed_models, args.tool_conf_destination,
355		args.galaxy_tool_path, args.default_category)
356
357		# now datatypes_conf.xml
358		if args.data_types_destination is not None:
359		generate_data_type_conf(supported_file_formats, args.data_types_destination)
360
361		return 0
362
363		except KeyboardInterrupt:
364		# handle keyboard interrupt
365		return 0
366		except ApplicationException, e:
367		error("CTD2Galaxy could not complete the requested operation.", 0)
368		error("Reason: " + e.msg, 0)
369		return 1
370		except ModelError, e:
371		error("There seems to be a problem with one of your input CTDs.", 0)
372		error("Reason: " + e.msg, 0)
373		return 1
374		except Exception, e:
375		traceback.print_exc()
376		return 2
377
378
379		def parse_tools_list_file(tools_list_file):
380		tools_list = None
381		if tools_list_file is not None:
382		tools_list = []
383		with open(tools_list_file) as f:
384		for line in f:
385		if line is None or not line.strip() or line.strip().startswith("#"):
386		continue
387		else:
388		tools_list.append(line.strip())
389
390		return tools_list
391
392
393		def parse_macros_files(macros_file_names):
394		macros_to_expand = set()
395
396		for macros_file_name in macros_file_names:
397		try:
398		macros_file = open(macros_file_name)
399		root = parse(macros_file).getroot()
400		for xml_element in root.findall("xml"):
401		name = xml_element.attrib["name"]
402		if name in macros_to_expand:
403		warning("Macro %s has already been found. Duplicate found in file %s." %
404		(name, macros_file_name), 0)
405		else:
406		macros_to_expand.add(name)
407		except ParseError, e:
408		raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " +
409		str(e))
410		except IOError, e:
411		raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " +
412		str(e))
413
414		# we depend on "stdio", "requirements" and "advanced_options" to exist on all the given macros files
415		missing_needed_macros = []
416		for required_macro in REQUIRED_MACROS:
417		if required_macro not in macros_to_expand:
418		missing_needed_macros.append(required_macro)
419
420		if missing_needed_macros:
421		raise ApplicationException(
422		"The following required macro(s) were not found in any of the given macros files: %s, "
423		"see sample_files/macros.xml for an example of a valid macros file."
424		% ", ".join(missing_needed_macros))
425
426		# we do not need to "expand" the advanced_options macro
427		macros_to_expand.remove(ADVANCED_OPTIONS_MACRO_NAME)
428		return macros_to_expand
429
430		def parse_hardcoded_parameters(hardcoded_parameters_file):
431		parameter_hardcoder = ParameterHardcoder()
432		if hardcoded_parameters_file is not None:
433		line_number = 0
434		with open(hardcoded_parameters_file) as f:
435		for line in f:
436		line_number += 1
437		if line is None or not line.strip() or line.strip().startswith("#"):
438		pass
439		else:
440		# the third column must not be obtained as a whole, and not split
441		parsed_hardcoded_parameter = line.strip().split(None, 2)
442		# valid lines contain two or three columns
443		if len(parsed_hardcoded_parameter) != 2 and len(parsed_hardcoded_parameter) != 3:
444		warning("Invalid line at line number %d of the given hardcoded parameters file. Line will be"
445		"ignored:\n%s" % (line_number, line), 0)
446		continue
447
448		parameter_name = parsed_hardcoded_parameter[0]
449		hardcoded_value = parsed_hardcoded_parameter[1]
450		tool_names = None
451		if len(parsed_hardcoded_parameter) == 3:
452		tool_names = parsed_hardcoded_parameter[2].split(',')
453		if tool_names:
454		for tool_name in tool_names:
455		parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name.strip())
456		else:
457		parameter_hardcoder.register_parameter(parameter_name, hardcoded_value)
458
459		return parameter_hardcoder
460
461
462		def parse_file_formats(formats_file):
463		supported_formats = {}
464		if formats_file is not None:
465		line_number = 0
466		with open(formats_file) as f:
467		for line in f:
468		line_number += 1
469		if line is None or not line.strip() or line.strip().startswith("#"):
470		# ignore (it'd be weird to have something like:
471		# if line is not None and not (not line.strip()) ...
472		pass
473		else:
474		# not an empty line, no comment
475		# strip the line and split by whitespace
476		parsed_formats = line.strip().split()
477		# valid lines contain either one or four columns
478		if not (len(parsed_formats) == 1 or len(parsed_formats) == 3 or len(parsed_formats) == 4):
479		warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" %
480		(line_number, line), 0)
481		# ignore the line
482		continue
483		elif len(parsed_formats) == 1:
484		supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[0])
485		else:
486		mimetype = None
487		# check if mimetype was provided
488		if len(parsed_formats) == 4:
489		mimetype = parsed_formats[3]
490		supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[1],
491		parsed_formats[2], mimetype)
492		return supported_formats
493
494
495		def validate_and_prepare_args(args):
496		# check that only one of skip_tools_file and required_tools_file has been provided
497		if args.skip_tools_file is not None and args.required_tools_file is not None:
498		raise ApplicationException(
499		"You have provided both a file with tools to ignore and a file with required tools.\n"
500		"Only one of -s/--skip-tools, -r/--required-tools can be provided.")
501
502		# first, we convert all list of lists in args to flat lists
503		lists_to_flatten = ["input_files", "blacklisted_parameters", "macros_files"]
504		for list_to_flatten in lists_to_flatten:
505		setattr(args, list_to_flatten, [item for sub_list in getattr(args, list_to_flatten) for item in sub_list])
506
507		# if input is a single file, we expect output to be a file (and not a dir that already exists)
508		if len(args.input_files) == 1:
509		if os.path.isdir(args.output_destination):
510		raise ApplicationException("If a single input file is provided, output (%s) is expected to be a file "
511		"and not a folder.\n" % args.output_destination)
512
513		# if input is a list of files, we expect output to be a folder
514		if len(args.input_files) > 1:
515		if not os.path.isdir(args.output_destination):
516		raise ApplicationException("If several input files are provided, output (%s) is expected to be an "
517		"existing directory.\n" % args.output_destination)
518
519		# check that the provided input files, if provided, contain a valid file path
520		input_variables_to_check = ["skip_tools_file", "required_tools_file", "macros_files",
521		"input_files", "formats_file", "hardcoded_parameters"]
522
523		for variable_name in input_variables_to_check:
524		paths_to_check = []
525		# check if we are handling a single file or a list of files
526		member_value = getattr(args, variable_name)
527		if member_value is not None:
528		if isinstance(member_value, list):
529		for file_name in member_value:
530		paths_to_check.append(strip(str(file_name)))
531		else:
532		paths_to_check.append(strip(str(member_value)))
533
534		for path_to_check in paths_to_check:
535		if not os.path.isfile(path_to_check) or not os.path.exists(path_to_check):
536		raise ApplicationException(
537		"The provided input file (%s) does not exist or is not a valid file path."
538		% path_to_check)
539
540		# check that the provided output files, if provided, contain a valid file path (i.e., not a folder)
541		output_variables_to_check = ["data_types_destination", "tool_conf_destination"]
542
543		for variable_name in output_variables_to_check:
544		file_name = getattr(args, variable_name)
545		if file_name is not None and os.path.isdir(file_name):
546		raise ApplicationException("The provided output file name (%s) points to a directory." % file_name)
547
548
549		def convert(input_files, output_destination, **kwargs):
550		# first, generate a model
551		is_converting_multiple_ctds = len(input_files) > 1
552		parsed_models = []
553		for input_file in input_files:
554		try:
555		model = CTDModel(from_file=input_file)
556		except Exception, e:
557		error(str(e), 1)
558		continue
559
560		if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]:
561		info("Skipping tool %s" % model.name, 0)
562		continue
563		elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]:
564		info("Tool %s is not required, skipping it" % model.name, 0)
565		continue
566		else:
567		info("Converting from %s " % input_file, 0)
568		tool = create_tool(model)
569		write_header(tool, model)
570		create_description(tool, model)
571		expand_macros(tool, model, **kwargs)
572		create_command(tool, model, **kwargs)
573		create_inputs(tool, model, **kwargs)
574		create_outputs(tool, model, **kwargs)
575		create_help(tool, model)
576
577		# finally, serialize the tool
578		output_file = output_destination
579		# if multiple inputs are being converted,
580		# then we need to generate a different output_file for each input
581		if is_converting_multiple_ctds:
582		output_file = os.path.join(output_file, get_filename_without_suffix(input_file) + ".xml")
583		# wrap our tool element into a tree to be able to serialize it
584		tree = ElementTree(tool)
585		tree.write(open(output_file, 'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
586		# let's use model to hold the name of the output file
587		parsed_models.append([model, get_filename(output_file)])
588
589		return parsed_models
590
591
592		def write_header(tool, model):
593		tool.addprevious(etree.Comment(
594		"This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). "
595		"This file was automatically generated using CTD2Galaxy."))
596		tool.addprevious(etree.Comment('Proposed Tool Section: [%s]' % model.opt_attribs.get("category", "")))
597
598
599		def generate_tool_conf(parsed_models, tool_conf_destination, galaxy_tool_path, default_category):
600		# for each category, we keep a list of models corresponding to it
601		categories_to_tools = dict()
602		for model in parsed_models:
603		category = strip(model[0].opt_attribs.get("category", ""))
604		if not category.strip():
605		category = default_category
606		if category not in categories_to_tools:
607		categories_to_tools[category] = []
608		categories_to_tools[category].append(model[1])
609
610		# at this point, we should have a map for all categories->tools
611		toolbox_node = Element("toolbox")
612
613		if galaxy_tool_path is not None and not galaxy_tool_path.strip().endswith("/"):
614		galaxy_tool_path = galaxy_tool_path.strip() + "/"
615		if galaxy_tool_path is None:
616		galaxy_tool_path = ""
617
618		for category, file_names in categories_to_tools.iteritems():
619		section_node = add_child_node(toolbox_node, "section")
620		section_node.attrib["id"] = "section-id-" + "".join(category.split())
621		section_node.attrib["name"] = category
622
623		for filename in file_names:
624		tool_node = add_child_node(section_node, "tool")
625		tool_node.attrib["file"] = galaxy_tool_path + filename
626
627		toolconf_tree = ElementTree(toolbox_node)
628		toolconf_tree.write(open(tool_conf_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
629		info("Generated Galaxy tool_conf.xml in %s" % tool_conf_destination, 0)
630
631
632		def generate_data_type_conf(supported_file_formats, data_types_destination):
633		data_types_node = Element("datatypes")
634		registration_node = add_child_node(data_types_node, "registration")
635		registration_node.attrib["converters_path"] = "lib/galaxy/datatypes/converters"
636		registration_node.attrib["display_path"] = "display_applications"
637
638		for format_name in supported_file_formats:
639		data_type = supported_file_formats[format_name]
640		# add only if it's a data type that does not exist in Galaxy
641		if data_type.galaxy_type is not None:
642		data_type_node = add_child_node(registration_node, "datatype")
643		# we know galaxy_extension is not None
644		data_type_node.attrib["extension"] = data_type.galaxy_extension
645		data_type_node.attrib["type"] = data_type.galaxy_type
646		if data_type.mimetype is not None:
647		data_type_node.attrib["mimetype"] = data_type.mimetype
648
649		data_types_tree = ElementTree(data_types_node)
650		data_types_tree.write(open(data_types_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
651		info("Generated Galaxy datatypes_conf.xml in %s" % data_types_destination, 0)
652
653
654		# taken from
655		# http://stackoverflow.com/questions/8384737/python-extract-file-name-from-path-no-matter-what-the-os-path-format
656		def get_filename(path):
657		head, tail = ntpath.split(path)
658		return tail or ntpath.basename(head)
659
660
661		def get_filename_without_suffix(path):
662		root, ext = os.path.splitext(os.path.basename(path))
663		return root
664
665
666		def create_tool(model):
667		return Element("tool", OrderedDict([("id", model.name), ("name", model.name), ("version", model.version)]))
668
669
670		def create_description(tool, model):
671		if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None:
672		description = SubElement(tool,"description")
673		description.text = model.opt_attribs["description"]
674
675
676		def get_param_name(param):
677		# we generate parameters with colons for subgroups, but not for the topmost parents (OpenMS legacy)
678		if type(param.parent) == ParameterGroup and param.parent.parent != None:
679		return get_param_name(param.parent) + ":" + resolve_param_mapping(param)
680		else:
681		return resolve_param_mapping(param)
682
683
684		# some parameters are mapped to command line options, this method helps resolve those mappings, if any
685		# TODO: implement mapping of parameters!!!
686		def resolve_param_mapping(param):
687		return param.name
688
689
690		def create_command(tool, model, **kwargs):
691		final_command = get_tool_executable_path(model, kwargs["default_executable_path"]) + '\n'
692		final_command += kwargs["add_to_command_line"] + '\n'
693		advanced_command_start = "#if $adv_opts.adv_opts_selector=='advanced':\n"
694		advanced_command_end = '#end if'
695		advanced_command = ''
696		parameter_hardcoder = kwargs["parameter_hardcoder"]
697
698		found_output_parameter = False
699		for param in extract_parameters(model):
700		if param.type is _OutFile:
701		found_output_parameter = True
702		command = ''
703		param_name = get_param_name(param)
704
705		if param.name in kwargs["blacklisted_parameters"]:
706		continue
707
708		hardcoded_value = parameter_hardcoder.get_hardcoded_value(param_name, model.name)
709		if hardcoded_value:
710		command += '-%s %s\n' % (param_name, hardcoded_value)
711		else:
712		# parameter is neither blacklisted nor hardcoded...
713		galaxy_parameter_name = get_galaxy_parameter_name(param)
714		repeat_galaxy_parameter_name = get_repeat_galaxy_parameter_name(param)
715
716		# logic for ITEMLISTs
717		if param.is_list:
718		if param.type is _InFile:
719		command += "-" + str(param_name) + "\n"
720		command += " #for token in $" + galaxy_parameter_name + ":\n"
721		command += " $token\n"
722		command += " #end for\n"
723		else:
724		command += "\n#if $" + repeat_galaxy_parameter_name + ":\n"
725		command += "-" + str(param_name) + "\n"
726		command += " #for token in $" + repeat_galaxy_parameter_name + ":\n"
727		command += " #if \" \" in str(token):\n"
728		command += " \"$token." + galaxy_parameter_name + "\"\n"
729		command += " #else\n"
730		command += " $token." + galaxy_parameter_name + "\n"
731		command += " #end if\n"
732		command += " #end for\n"
733		command += "#end if\n"
734		# logic for other ITEMs
735		else:
736		if param.advanced and param.type is not _OutFile:
737		actual_parameter = "$adv_opts.%s" % galaxy_parameter_name
738		else:
739		actual_parameter = "$%s" % galaxy_parameter_name
740		## if whitespace_validation has been set, we need to generate, for each parameter:
741		## #if str( $t ).split() != '':
742		## -t "$t"
743		## #end if
744		## TODO only useful for text fields, integers or floats
745		## not useful for choices, input fields ...
746
747		if not is_boolean_parameter(param) and type(param.restrictions) is _Choices :
748		command += "#if " + actual_parameter + ":\n"
749		command += ' -%s\n' % param_name
750		command += " #if \" \" in str(" + actual_parameter + "):\n"
751		command += " \"" + actual_parameter + "\"\n"
752		command += " #else\n"
753		command += " " + actual_parameter + "\n"
754		command += " #end if\n"
755		command += "#end if\n"
756		elif is_boolean_parameter(param):
757		command += "#if " + actual_parameter + ":\n"
758		command += ' -%s\n' % param_name
759		command += "#end if\n"
760		elif TYPE_TO_GALAXY_TYPE[param.type] is 'text':
761		command += "#if " + actual_parameter + ":\n"
762		command += " -%s " % param_name
763		command += " \"" + actual_parameter + "\"\n"
764		command += "#end if\n"
765		else:
766		command += "#if " + actual_parameter + ":\n"
767		command += ' -%s ' % param_name
768		command += actual_parameter + "\n"
769		command += "#end if\n"
770
771		if param.advanced and param.type is not _OutFile:
772		advanced_command += " %s" % command
773		else:
774		final_command += command
775
776		if advanced_command:
777		final_command += "%s%s%s\n" % (advanced_command_start, advanced_command, advanced_command_end)
778
779		if not found_output_parameter:
780		final_command += "> $param_stdout\n"
781
782		command_node = add_child_node(tool, "command")
783		command_node.text = final_command
784
785
786		# creates the xml elements needed to import the needed macros files
787		# and to "expand" the macros
788		def expand_macros(tool, model, **kwargs):
789		macros_node = add_child_node(tool, "macros")
790		token_node = add_child_node(macros_node, "token")
791		token_node.attrib["name"] = "@EXECUTABLE@"
792		token_node.text = get_tool_executable_path(model, kwargs["default_executable_path"])
793
794		# add <import> nodes
795		for macro_file_name in kwargs["macros_file_names"]:
796		macro_file = open(macro_file_name)
797		import_node = add_child_node(macros_node, "import")
798		# do not add the path of the file, rather, just its basename
799		import_node.text = os.path.basename(macro_file.name)
800
801		# add <expand> nodes
802		for expand_macro in kwargs["macros_to_expand"]:
803		expand_node = add_child_node(tool, "expand")
804		expand_node.attrib["macro"] = expand_macro
805
806
807		def get_tool_executable_path(model, default_executable_path):
808		# rules to build the galaxy executable path:
809		# if executablePath is null, then use default_executable_path and store it in executablePath
810		# if executablePath is null and executableName is null, then the name of the tool will be used
811		# if executablePath is null and executableName is not null, then executableName will be used
812		# if executablePath is not null and executableName is null,
813		# then executablePath and the name of the tool will be used
814		# if executablePath is not null and executableName is not null, then both will be used
815
816		# first, check if the model has executablePath / executableName defined
817		executable_path = model.opt_attribs.get("executablePath", None)
818		executable_name = model.opt_attribs.get("executableName", None)
819
820		# check if we need to use the default_executable_path
821		if executable_path is None:
822		executable_path = default_executable_path
823
824		# fix the executablePath to make sure that there is a '/' in the end
825		if executable_path is not None:
826		executable_path = executable_path.strip()
827		if not executable_path.endswith('/'):
828		executable_path += '/'
829
830		# assume that we have all information present
831		command = str(executable_path) + str(executable_name)
832		if executable_path is None:
833		if executable_name is None:
834		command = model.name
835		else:
836		command = executable_name
837		else:
838		if executable_name is None:
839		command = executable_path + model.name
840		return command
841
842
843		def get_galaxy_parameter_name(param):
844		return "param_%s" % get_param_name(param).replace(':', '_').replace('-', '_')
845
846
847		def get_input_with_same_restrictions(out_param, model, supported_file_formats):
848		for param in extract_parameters(model):
849		if param.type is _InFile:
850		if param.restrictions is not None:
851		in_param_formats = get_supported_file_types(param.restrictions.formats, supported_file_formats)
852		out_param_formats = get_supported_file_types(out_param.restrictions.formats, supported_file_formats)
853		if in_param_formats == out_param_formats:
854		return param
855
856
857		def create_inputs(tool, model, **kwargs):
858		inputs_node = SubElement(tool, "inputs")
859
860		# some suites (such as OpenMS) need some advanced options when handling inputs
861		expand_advanced_node = add_child_node(tool, "expand", OrderedDict([("macro", ADVANCED_OPTIONS_MACRO_NAME)]))
862		parameter_hardcoder = kwargs["parameter_hardcoder"]
863
864		# treat all non output-file parameters as inputs
865		for param in extract_parameters(model):
866		# no need to show hardcoded parameters
867		hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name)
868		if param.name in kwargs["blacklisted_parameters"] or hardcoded_value:
869		# let's not use an extra level of indentation and use NOP
870		continue
871		if param.type is not _OutFile:
872		if param.advanced:
873		if expand_advanced_node is not None:
874		parent_node = expand_advanced_node
875		else:
876		# something went wrong... we are handling an advanced parameter and the
877		# advanced input macro was not set... inform the user about it
878		info("The parameter %s has been set as advanced, but advanced_input_macro has "
879		"not been set." % param.name, 1)
880		# there is not much we can do, other than use the inputs_node as a parent node!
881		parent_node = inputs_node
882		else:
883		parent_node = inputs_node
884
885		# for lists we need a repeat tag
886		if param.is_list and param.type is not _InFile:
887		rep_node = add_child_node(parent_node, "repeat")
888		create_repeat_attribute_list(rep_node, param)
889		parent_node = rep_node
890
891		param_node = add_child_node(parent_node, "param")
892		create_param_attribute_list(param_node, param, kwargs["supported_file_formats"])
893
894		# advanced parameter selection should be at the end
895		# and only available if an advanced parameter exists
896		if expand_advanced_node is not None and len(expand_advanced_node) > 0:
897		inputs_node.append(expand_advanced_node)
898
899
900		def get_repeat_galaxy_parameter_name(param):
901		return "rep_" + get_galaxy_parameter_name(param)
902
903
904		def create_repeat_attribute_list(rep_node, param):
905		rep_node.attrib["name"] = get_repeat_galaxy_parameter_name(param)
906		if param.required:
907		rep_node.attrib["min"] = "1"
908		else:
909		rep_node.attrib["min"] = "0"
910		# for the ITEMLISTs which have LISTITEM children we only
911		# need one parameter as it is given as a string
912		if param.default is not None:
913		rep_node.attrib["max"] = "1"
914		rep_node.attrib["title"] = get_galaxy_parameter_name(param)
915
916
917		def create_param_attribute_list(param_node, param, supported_file_formats):
918		param_node.attrib["name"] = get_galaxy_parameter_name(param)
919
920		param_type = TYPE_TO_GALAXY_TYPE[param.type]
921		if param_type is None:
922		raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s"
923		% {"type": param.type, "name": param.name})
924
925		if param.is_list:
926		param_type = "text"
927
928		if is_selection_parameter(param):
929		param_type = "select"
930
931		if is_boolean_parameter(param):
932		param_type = "boolean"
933
934		if param.type is _InFile:
935		# assume it's just text unless restrictions are provided
936		param_format = "text"
937		if param.restrictions is not None:
938		# join all supported_formats for the file... this MUST be a _FileFormat
939		if type(param.restrictions) is _FileFormat:
940		param_format = ','.join(get_supported_file_types(param.restrictions.formats, supported_file_formats))
941		else:
942		raise InvalidModelException("Expected 'file type' restrictions for input file [%(name)s], "
943		"but instead got [%(type)s]"
944		% {"name": param.name, "type": type(param.restrictions)})
945		param_node.attrib["type"] = "data"
946		param_node.attrib["format"] = param_format
947		# in the case of multiple input set multiple flag
948		if param.is_list:
949		param_node.attrib["multiple"] = "true"
950
951		else:
952		param_node.attrib["type"] = param_type
953
954		# check for parameters with restricted values (which will correspond to a "select" in galaxy)
955		if param.restrictions is not None:
956		# it could be either _Choices or _NumericRange, with special case for boolean types
957		if param_type == "boolean":
958		create_boolean_parameter(param_node, param)
959		elif type(param.restrictions) is _Choices:
960		# create as many <option> elements as restriction values
961		for choice in param.restrictions.choices:
962		option_node = add_child_node(param_node, "option", OrderedDict([("value", str(choice))]))
963		option_node.text = str(choice)
964
965		elif type(param.restrictions) is _NumericRange:
966		if param.type is not int and param.type is not float:
967		raise InvalidModelException("Expected either 'int' or 'float' in the numeric range restriction for "
968		"parameter [%(name)s], but instead got [%(type)s]" %
969		{"name": param.name, "type": type(param.restrictions)})
970		# extract the min and max values and add them as attributes
971		# validate the provided min and max values
972		if param.restrictions.n_min is not None:
973		param_node.attrib["min"] = str(param.restrictions.n_min)
974		if param.restrictions.n_max is not None:
975		param_node.attrib["max"] = str(param.restrictions.n_max)
976		elif type(param.restrictions) is _FileFormat:
977		param_node.attrib["format"] = ",".join(
978		get_supported_file_types(param.restrictions.formats, supported_file_formats))
979		else:
980		raise InvalidModelException("Unrecognized restriction type [%(type)s] for parameter [%(name)s]"
981		% {"type": type(param.restrictions), "name": param.name})
982
983		param_node.attrib["optional"] = str(not param.required)
984
985		if param_type == "text":
986		# add size attribute... this is the length of a textbox field in Galaxy (it could also be 15x2, for instance)
987		param_node.attrib["size"] = "30"
988		# add sanitizer nodes, this is needed for special character like "["
989		# which are used for example by FeatureFinderMultiplex
990		sanitizer_node = SubElement(param_node, "sanitizer")
991
992		valid_node = SubElement(sanitizer_node, "valid", OrderedDict([("initial", "string.printable")]))
993		add_child_node(valid_node, "remove", OrderedDict([("value", '\'')]))
994		add_child_node(valid_node, "remove", OrderedDict([("value", '"')]))
995
996		# check for default value
997		if param.default is not None:
998		if type(param.default) is list:
999		# we ASSUME that a list of parameters looks like:
1000		# $ tool -ignore He Ar Xe
1001		# meaning, that, for example, Helium, Argon and Xenon will be ignored
1002		param_node.attrib["value"] = ' '.join(map(str, param.default))
1003
1004		elif param_type != "boolean":
1005		# boolean parameters handle default values by using the "checked" attribute
1006		# there isn't much we can do... just stringify the value
1007		param_node.attrib["value"] = str(param.default)
1008		else:
1009		if param.type is int or param.type is float:
1010		# galaxy requires "value" to be included for int/float
1011		# since no default was included, we need to figure out one in a clever way... but let the user know
1012		# that we are "thinking" for him/her
1013		warning("Generating default value for parameter [%s]. "
1014		"Galaxy requires the attribute 'value' to be set for integer/floats. "
1015		"Edit the CTD file and provide a suitable default value." % param.name, 1)
1016		# check if there's a min/max and try to use them
1017		default_value = None
1018		if param.restrictions is not None:
1019		if type(param.restrictions) is _NumericRange:
1020		default_value = param.restrictions.n_min
1021		if default_value is None:
1022		default_value = param.restrictions.n_max
1023		if default_value is None:
1024		# no min/max provided... just use 0 and see what happens
1025		default_value = 0
1026		else:
1027		# should never be here, since we have validated this anyway...
1028		# this code is here just for documentation purposes
1029		# however, better safe than sorry!
1030		# (it could be that the code changes and then we have an ugly scenario)
1031		raise InvalidModelException("Expected either a numeric range for parameter [%(name)s], "
1032		"but instead got [%(type)s]"
1033		% {"name": param.name, "type": type(param.restrictions)})
1034		else:
1035		# no restrictions and no default value provided...
1036		# make up something
1037		default_value = 0
1038		param_node.attrib["value"] = str(default_value)
1039
1040		label = "%s parameter" % param.name
1041		help_text = ""
1042
1043		if param.description is not None:
1044		label, help_text = generate_label_and_help(param.description)
1045
1046		param_node.attrib["label"] = label
1047		param_node.attrib["help"] = "(-%s)" % param.name + " " + help_text
1048
1049
1050		def generate_label_and_help(desc):
1051		label = ""
1052		help_text = ""
1053		# This tag is found in some descriptions
1054		desc = str(desc).replace("#br#", " <br>")
1055		# Get rid of dots in the end
1056		if desc.endswith("."):
1057		desc = desc.rstrip(".")
1058		# Check if first word is a normal word and make it uppercase
1059		if str(desc).find(" ") > -1:
1060		first_word, rest = str(desc).split(" ", 1)
1061		if str(first_word).islower():
1062		# check if label has a quotient of the form a/b
1063		if first_word.find("/") != 1 :
1064		first_word.capitalize()
1065		desc = first_word + " " + rest
1066		label = desc
1067
1068		# Try to split the label if it is too long
1069		if len(desc) > 50:
1070		# find an example and put everything before in the label and the e.g. in the help
1071		if desc.find("e.g.") > 1 :
1072		label, help_text = desc.split("e.g.",1)
1073		help_text = "e.g." + help_text
1074		else:
1075		# find the end of the first sentence
1076		# look for ". " because some labels contain .file or something similar
1077		delimiter = ""
1078		if desc.find(". ") > 1 and desc.find("? ") > 1:
1079		if desc.find(". ") < desc.find("? "):
1080		delimiter = ". "
1081		else:
1082		delimiter = "? "
1083		elif desc.find(". ") > 1:
1084		delimiter = ". "
1085		elif desc.find("? ") > 1:
1086		delimiter = "? "
1087		if delimiter != "":
1088		label, help_text = desc.split(delimiter, 1)
1089
1090		# add the question mark back
1091		if delimiter == "? ":
1092		label += "? "
1093
1094		# remove all linebreaks
1095		label = label.rstrip().rstrip('<br>').rstrip()
1096		return label, help_text
1097
1098
1099		def get_indented_text(text, indentation_level):
1100		return ("%(indentation)s%(text)s" %
1101		{"indentation": " " * (MESSAGE_INDENTATION_INCREMENT * indentation_level),
1102		"text": text})
1103
1104
1105		def warning(warning_text, indentation_level):
1106		sys.stdout.write(get_indented_text("WARNING: %s\n" % warning_text, indentation_level))
1107
1108
1109		def error(error_text, indentation_level):
1110		sys.stderr.write(get_indented_text("ERROR: %s\n" % error_text, indentation_level))
1111
1112
1113		def info(info_text, indentation_level):
1114		sys.stdout.write(get_indented_text("INFO: %s\n" % info_text, indentation_level))
1115
1116
1117		# determines if the given choices are boolean (basically, if the possible values are yes/no, true/false)
1118		def is_boolean_parameter(param):
1119		is_choices = False
1120		if type(param.restrictions) is _Choices:
1121		# for a true boolean experience, we need 2 values
1122		# and also that those two values are either yes/no or true/false
1123		if len(param.restrictions.choices) == 2:
1124		choices = get_lowercase_list(param.restrictions.choices)
1125		if ("yes" in choices and "no" in choices) or ("true" in choices and "false" in choices):
1126		is_choices = True
1127		return is_choices
1128
1129
1130		# determines if there are choices for the parameter
1131		def is_selection_parameter(param):
1132		return type(param.restrictions) is _Choices
1133
1134
1135		def get_lowercase_list(some_list):
1136		lowercase_list = map(str, some_list)
1137		lowercase_list = map(string.lower, lowercase_list)
1138		lowercase_list = map(strip, lowercase_list)
1139		return lowercase_list
1140
1141
1142		# creates a galaxy boolean parameter type
1143		# this method assumes that param has restrictions, and that only two restictions are present
1144		# (either yes/no or true/false)
1145		def create_boolean_parameter(param_node, param):
1146		# first, determine the 'truevalue' and the 'falsevalue'
1147		"""TODO: true and false values can be way more than 'true' and 'false'
1148		but for that we need CTD support
1149		"""
1150		# by default, 'true' and 'false' are handled as flags, like the verbose flag (i.e., -v)
1151		true_value = "-%s" % get_param_name(param)
1152		false_value = ""
1153		choices = get_lowercase_list(param.restrictions.choices)
1154		if "yes" in choices:
1155		true_value = "yes"
1156		false_value = "no"
1157		param_node.attrib["truevalue"] = true_value
1158		param_node.attrib["falsevalue"] = false_value
1159
1160		# set the checked attribute
1161		if param.default is not None:
1162		checked_value = "false"
1163		default = strip(string.lower(param.default))
1164		if default == "yes" or default == "true":
1165		checked_value = "true"
1166		#attribute_list["checked"] = checked_value
1167		param_node.attrib["checked"] = checked_value
1168
1169
1170		def create_outputs(parent, model, **kwargs):
1171		outputs_node = add_child_node(parent, "outputs")
1172		parameter_hardcoder = kwargs["parameter_hardcoder"]
1173
1174		for param in extract_parameters(model):
1175
1176		# no need to show hardcoded parameters
1177		hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name)
1178		if param.name in kwargs["blacklisted_parameters"] or hardcoded_value:
1179		# let's not use an extra level of indentation and use NOP
1180		continue
1181		if param.type is _OutFile:
1182		create_output_node(outputs_node, param, model, kwargs["supported_file_formats"])
1183
1184		# If there are no outputs defined in the ctd the node will have no children
1185		# and the stdout will be used as output
1186		if len(outputs_node) == 0:
1187		add_child_node(outputs_node, "data",
1188		OrderedDict([("name", "param_stdout"), ("format", "text"), ("label", "Output from stdout")]))
1189
1190
1191		def create_output_node(parent, param, model, supported_file_formats):
1192		data_node = add_child_node(parent, "data")
1193		data_node.attrib["name"] = get_galaxy_parameter_name(param)
1194
1195		data_format = "data"
1196		if param.restrictions is not None:
1197		if type(param.restrictions) is _FileFormat:
1198		# set the first data output node to the first file format
1199
1200		# check if there are formats that have not been registered yet...
1201		output = ""
1202		for format_name in param.restrictions.formats:
1203		if not format_name in supported_file_formats.keys():
1204		output += " " + str(format_name)
1205
1206		# warn only if there's about to complain
1207		if output:
1208		warning("Parameter " + param.name + " has the following unsupported format(s):" + output, 1)
1209
1210		formats = get_supported_file_types(param.restrictions.formats, supported_file_formats)
1211		try:
1212		data_format = formats.pop()
1213		except KeyError:
1214		# there is not much we can do, other than catching the exception
1215		pass
1216		# if there are more than one output file formats try to take the format from the input parameter
1217		if formats:
1218		corresponding_input = get_input_with_same_restrictions(param, model, supported_file_formats)
1219		if corresponding_input is not None:
1220		data_format = "input"
1221		data_node.attrib["metadata_source"] = get_galaxy_parameter_name(corresponding_input)
1222		else:
1223		raise InvalidModelException("Unrecognized restriction type [%(type)s] "
1224		"for output [%(name)s]" % {"type": type(param.restrictions),
1225		"name": param.name})
1226		data_node.attrib["format"] = data_format
1227
1228		#TODO: find a smarter label ?
1229		#if param.description is not None:
1230		# data_node.setAttribute("label", param.description)
1231		return data_node
1232
1233
1234		def get_supported_file_types(formats, supported_file_formats):
1235		return set([supported_file_formats.get(format_name, DataType(format_name, format_name)).galaxy_extension
1236		for format_name in formats if format_name in supported_file_formats.keys()])
1237
1238
1239		def create_change_format_node(parent, data_formats, input_ref):
1240		# <change_format>
1241		# <when input="secondary_structure" value="true" format="text"/>
1242		# </change_format>
1243		change_format_node = add_child_node(parent, "change_format")
1244		for data_format in data_formats:
1245		add_child_node(change_format_node, "when",
1246		OrderedDict([("input", input_ref), ("value", data_format), ("format", data_format)]))
1247
1248
1249		# Shows basic information about the file, such as data ranges and file type.
1250		def create_help(tool, model):
1251		manual = ''
1252		doc_url = None
1253		if 'manual' in model.opt_attribs.keys():
1254		manual += '%s\n\n' % model.opt_attribs["manual"]
1255		if 'docurl' in model.opt_attribs.keys():
1256		doc_url = model.opt_attribs["docurl"]
1257
1258		help_text = "No help available"
1259		if manual is not None:
1260		help_text = manual
1261		if doc_url is not None:
1262		help_text = ("" if manual is None else manual) + "\nFor more information, visit %s" % doc_url
1263		help_node = add_child_node(tool, "help")
1264		# TODO: do we need CDATA Section here?
1265		help_node.text = help_text
1266
1267
1268		# since a model might contain several ParameterGroup elements,
1269		# we want to simply 'flatten' the parameters to generate the Galaxy wrapper
1270		def extract_parameters(model):
1271		parameters = []
1272		if len(model.parameters.parameters) > 0:
1273		# use this to put parameters that are to be processed
1274		# we know that CTDModel has one parent ParameterGroup
1275		pending = [model.parameters]
1276		while len(pending) > 0:
1277		# take one element from 'pending'
1278		parameter = pending.pop()
1279		if type(parameter) is not ParameterGroup:
1280		parameters.append(parameter)
1281		else:
1282		# append the first-level children of this ParameterGroup
1283		pending.extend(parameter.parameters.values())
1284		# returned the reversed list of parameters (as it is now,
1285		# we have the last parameter in the CTD as first in the list)
1286		return reversed(parameters)
1287
1288
1289		# adds and returns a child node using the given name to the given parent node
1290		def add_child_node(parent_node, child_node_name, attributes=OrderedDict([])):
1291		child_node = SubElement(parent_node, child_node_name, attributes)
1292		return child_node
1293
1294
1295		if __name__ == "__main__":
1296		sys.exit(main())

+35

-8

README.md less more

2	2
3	3	Given one or more CTD files, `CTD2Galaxy` generates the needed Galaxy wrappers to include them in a Galaxy instance.
4	4
5		## How to install
	5	## Dependencies
	6
	7	`CTD2Galaxy` has the following python dependencies:
	8
	9	1. `lxml`.
	10	1. [CTDopts]
	11
	12	You can install the [CTDopts] and `lxml` modules via `conda`, like so:
	13
	14	```sh
	15	$ conda install lxml
	16	$ conda install -c workflowconversion ctdopts
	17	```
	18
	19	Note that the [CTDopts] module is available on the `workflowconversion` channel.
	20
	21	Of course, you can just download [CTDopts] and make it available through your `PYTHONPATH` environment variable. To get more information about how to install python modules, visit: https://docs.python.org/2/install/.
	22
	23
	24	## How to install CTD2Galaxy
6	25
7	26	1. Download the source code from https://github.com/genericworkflownodes/CTD2Galaxy.
8		2. Download CTDopts from https://github.com/genericworkflownodes/CTDopts.
9		3. You can install the `CTDopts` and `CTD2Galaxy` modules, or just make them available through your `PYTHONPATH` environment variable. To get more information about how to install python modules, visit: https://docs.python.org/2/install/.
10	27
11	28	## How to use: most common tasks
12	29
13	30	The generator takes several parameters and a varying number of inputs and outputs. The following sub-sections show how to perform the most common operations.
14	31
15	32	Running the generator with the `-h/--help` parameter will print extended information about each of the parameters.
	33
	34	### Macros
	35
	36	Galaxy supports the use of macros via a `macros.xml` file (`CTD2Galaxy` provides a sample macros file in `supported_formats/macros.xml`). Instead of repeating sections, macros can be used and expanded. If you want fine control over the macros, you can use the `-m` / `--macros` parameter to provide your own macros file.
	37
	38	Please note that the used macros file must be copied to your Galaxy installation on the same location in which you place the generated ToolConfig files.
16	39
17	40	### One input, one output
18	41

71	94
72	95	Any of the following invocations will convert `/data/input_one.ctd` and `/data/input_two.ctd`:
73	96
74		$ python generator.py -i /data/input_one.ctd -i /data/input_two.ctd -o /data/generated
	97	$ python generator.py -i /data/input_one.ctd -i /data/input_two.ctd -o /data/generated
75	98	$ python generator.py -i /data/input_one.ctd /data/input_two.ctd -o /data/generated
76	99	$ python generator.py --input /data/input_one.ctd /data/input_two.ctd -o /data/generated
77		$ python generator.py --input /data/input_one.ctd --input /data/input_two.ctd -o /data/generated
	100	$ python generator.py --input /data/input_one.ctd --input /data/input_two.ctd -o /data/generated
78	101
79	102	The following invocation will convert `/data/input.ctd` into `/data/output.xml`:
80	103
81		$ python generator.py -i /data/input.ctd -o /data/output.xml
	104	$ python generator.py -i /data/input.ctd -o /data/output.xml -m sample_files/macros.xml
82	105
83	106	Of course, you can also use wildcards, which will be automatically expanded by any modern operating system. This is extremely useful if you want to convert several files at a time. Imagine that the folder `/data/ctds` contains three files, `input_one.ctd`, `input_two.ctd` and `input_three.ctd`. The following two invocations will produce the same output in the `/data/galaxy`:
84	107

232	255
233	256	* Purpose: Include external macros files.
234	257	* Short/long version: `-m` / `--macros`
235		* Required: yes.
	258	* Required: no.
	259	* Default: `macros.xml`
236	260	* Taken values: List of paths of macros files to include.
237	261
238	262	ToolConfig supports elaborate sections such as `<stdio>`, `<requirements>`, etc., that are identical across tools of the same suite. Macros files assist in the task of including external xml sections into ToolConfig files. For more information about the syntax of macros files, see: https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#Reusing_Repeated_Configuration_Elements
239	263
240		There are some macros that are required, namely `stdio`, `requirements` and `advanced_options`. A template macro file is included under `support_files/macros.xml`. Although this is a required file, it can be edited to suit your needs and you could add extra macros or leave it as it is and include additional files.
	264	There are some macros that are required, namely `stdio`, `requirements` and `advanced_options`. A template macro file is included in [macros.xml]. It can be edited to suit your needs and you could add extra macros or leave it as it is and include additional files.
241	265
242	266	Every macro found in the included files and in `support_files/macros.xml` will be expanded. Users are responsible for copying the given macros files in their corresponding galaxy folders.
243	267

320	344	* MapAlignerPoseClustering
321	345	* MapAlignerSpectrum
322	346	* MapAlignerRTTransformer
	347
	348	[CTDopts]: https://github.com/genericworkflownodes/CTDopts.
	349	[macros.xml]: https://github.com/WorkflowConversion/CTD2Galaxy/blob/master/macros.xml⏎

+1296

-0

generator.py less more

	0	#!/usr/bin/env python
	1	# encoding: utf-8
	2
	3	"""
	4	@author: delagarza
	5	"""
	6
	7
	8	import sys
	9	import os
	10	import traceback
	11	import ntpath
	12	import string
	13
	14	from argparse import ArgumentParser
	15	from argparse import RawDescriptionHelpFormatter
	16	from CTDopts.CTDopts import CTDModel, _InFile, _OutFile, ParameterGroup, _Choices, _NumericRange, \
	17	_FileFormat, ModelError
	18	from collections import OrderedDict
	19	from string import strip
	20	from lxml import etree
	21	from lxml.etree import SubElement, Element, ElementTree, ParseError, parse
	22
	23	__all__ = []
	24	__version__ = 1.0
	25	__date__ = '2014-09-17'
	26	__updated__ = '2016-05-09'
	27
	28	MESSAGE_INDENTATION_INCREMENT = 2
	29
	30	TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'data',
	31	_OutFile: 'data', _Choices: 'select'}
	32
	33	STDIO_MACRO_NAME = "stdio"
	34	REQUIREMENTS_MACRO_NAME = "requirements"
	35	ADVANCED_OPTIONS_MACRO_NAME = "advanced_options"
	36
	37	REQUIRED_MACROS = [STDIO_MACRO_NAME, REQUIREMENTS_MACRO_NAME, ADVANCED_OPTIONS_MACRO_NAME]
	38
	39
	40	class CLIError(Exception):
	41	# Generic exception to raise and log different fatal errors.
	42	def __init__(self, msg):
	43	super(CLIError).__init__(type(self))
	44	self.msg = "E: %s" % msg
	45
	46	def __str__(self):
	47	return self.msg
	48
	49	def __unicode__(self):
	50	return self.msg
	51
	52
	53	class InvalidModelException(ModelError):
	54	def __init__(self, message):
	55	super(InvalidModelException, self).__init__()
	56	self.message = message
	57
	58	def __str__(self):
	59	return self.message
	60
	61	def __repr__(self):
	62	return self.message
	63
	64
	65	class ApplicationException(Exception):
	66	def __init__(self, msg):
	67	super(ApplicationException).__init__(type(self))
	68	self.msg = msg
	69
	70	def __str__(self):
	71	return self.msg
	72
	73	def __unicode__(self):
	74	return self.msg
	75
	76
	77	class ExitCode:
	78	def __init__(self, code_range="", level="", description=None):
	79	self.range = code_range
	80	self.level = level
	81	self.description = description
	82
	83
	84	class DataType:
	85	def __init__(self, extension, galaxy_extension=None, galaxy_type=None, mimetype=None):
	86	self.extension = extension
	87	self.galaxy_extension = galaxy_extension
	88	self.galaxy_type = galaxy_type
	89	self.mimetype = mimetype
	90
	91
	92	class ParameterHardcoder:
	93	def __init__(self):
	94	# map whose keys are the composite names of tools and parameters in the following pattern:
	95	# [ToolName][separator][ParameterName] -> HardcodedValue
	96	# if the parameter applies to all tools, then the following pattern is used:
	97	# [ParameterName] -> HardcodedValue
	98
	99	# examples (assuming separator is '#'):
	100	# threads -> 24
	101	# XtandemAdapter#adapter -> xtandem.exe
	102	# adapter -> adapter.exe
	103	self.separator = "!"
	104	self.parameter_map = {}
	105
	106	# the most specific value will be returned in case of overlap
	107	def get_hardcoded_value(self, parameter_name, tool_name):
	108	# look for the value that would apply for all tools
	109	generic_value = self.parameter_map.get(parameter_name, None)
	110	specific_value = self.parameter_map.get(self.build_key(parameter_name, tool_name), None)
	111	if specific_value is not None:
	112	return specific_value
	113
	114	return generic_value
	115
	116	def register_parameter(self, parameter_name, parameter_value, tool_name=None):
	117	self.parameter_map[self.build_key(parameter_name, tool_name)] = parameter_value
	118
	119	def build_key(self, parameter_name, tool_name):
	120	if tool_name is None:
	121	return parameter_name
	122	return "%s%s%s" % (parameter_name, self.separator, tool_name)
	123
	124
	125	def main(argv=None): # IGNORE:C0111
	126	# Command line options.
	127	if argv is None:
	128	argv = sys.argv
	129	else:
	130	sys.argv.extend(argv)
	131
	132	program_version = "v%s" % __version__
	133	program_build_date = str(__updated__)
	134	program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
	135	program_short_description = "CTD2Galaxy - A project from the GenericWorkflowNodes family " \
	136	"(https://github.com/orgs/genericworkflownodes)"
	137	program_usage = '''
	138	USAGE:
	139
	140	I - Parsing a single CTD file and generate a Galaxy wrapper:
	141
	142	$ python generator.py -i input.ctd -o output.xml
	143
	144
	145	II - Parsing all found CTD files (files with .ctd and .xml extension) in a given folder and
	146	output converted Galaxy wrappers in a given folder:
	147
	148	$ python generator.py -i /home/user/*.ctd -o /home/user/galaxywrappers
	149
	150
	151	III - Providing file formats, mimetypes
	152
	153	Galaxy supports the concept of file format in order to connect compatible ports, that is, input ports of a certain
	154	data format will be able to receive data from a port from the same format. This converter allows you to provide
	155	a personalized file in which you can relate the CTD data formats with supported Galaxy data formats. The layout of
	156	this file consists of lines, each of either one or four columns separated by any amount of whitespace. The content
	157	of each column is as follows:
	158
	159	* 1st column: file extension
	160	* 2nd column: data type, as listed in Galaxy
	161	* 3rd column: full-named Galaxy data type, as it will appear on datatypes_conf.xml
	162	* 4th column: mimetype (optional)
	163
	164	The following is an example of a valid "file formats" file:
	165
	166	########################################## FILE FORMATS example ##########################################
	167	# Every line starting with a # will be handled as a comment and will not be parsed.
	168	# The first column is the file format as given in the CTD and second column is the Galaxy data format.
	169	# The second, third, fourth and fifth column can be left empty if the data type has already been registered
	170	# in Galaxy, otherwise, all but the mimetype must be provided.
	171
	172	# CTD type # Galaxy type # Long Galaxy data type # Mimetype
	173	csv tabular galaxy.datatypes.data:Text
	174	fasta
	175	ini txt galaxy.datatypes.data:Text
	176	txt
	177	idxml txt galaxy.datatypes.xml:GenericXml application/xml
	178	options txt galaxy.datatypes.data:Text
	179	grid grid galaxy.datatypes.data:Grid
	180
	181	##########################################################################################################
	182
	183	Note that each line consists precisely of either one, three or four columns. In the case of data types already
	184	registered in Galaxy (such as fasta and txt in the above example), only the first column is needed. In the case of
	185	data types that haven't been yet registered in Galaxy, the first three columns are needed (mimetype is optional).
	186
	187	For information about Galaxy data types and subclasses, see the following page:
	188	https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes
	189
	190
	191	IV - Hardcoding parameters
	192
	193	It is possible to hardcode parameters. This makes sense if you want to set a tool in Galaxy in 'quiet' mode or if
	194	your tools support multi-threading and accept the number of threads via a parameter, without giving the end user the
	195	chance to change the values for these parameters.
	196
	197	In order to generate hardcoded parameters, you need to provide a simple file. Each line of this file contains two
	198	or three columns separated by whitespace. Any line starting with a '#' will be ignored. The first column contains
	199	the name of the parameter, the second column contains the value that will always be set for this parameter. The
	200	first two columns are mandatory.
	201
	202	If the parameter is to be hardcoded only for a set of tools, then a third column can be added. This column includes
	203	a comma-separated list of tool names for which the parameter will be hardcoded. If a third column is not included,
	204	then all processed tools containing the given parameter will get a hardcoded value for it.
	205
	206	The following is an example of a valid file:
	207
	208	##################################### HARDCODED PARAMETERS example #####################################
	209	# Every line starting with a # will be handled as a comment and will not be parsed.
	210	# The first column is the name of the parameter and the second column is the value that will be used.
	211
	212	# Parameter name # Value # Tool(s)
	213	threads \${GALAXY_SLOTS:-24}
	214	mode quiet
	215	xtandem_executable xtandem XTandemAdapter
	216	verbosity high Foo, Bar
	217
	218	#########################################################################################################
	219
	220	Using the above file will produce a <command> similar to:
	221
	222	[tool_name] ... -threads \${GALAXY_SLOTS:-24} -mode quiet ...
	223
	224	For all tools. For XTandemAdapter, the <command> will be similar to:
	225
	226	XtandemAdapter ... -threads \${GALAXY_SLOTS:-24} -mode quiet -xtandem_executable xtandem ...
	227
	228	And for tools Foo and Bar, the <command> will be similar to:
	229
	230	Foo ... ... -threads \${GALAXY_SLOTS:-24} -mode quiet -verbosity high ...
	231
	232
	233	V - Control which tools will be converted
	234
	235	Sometimes only a subset of CTDs needs to be converted. It is possible to either explicitly specify which tools will
	236	be converted or which tools will not be converted.
	237
	238	The value of the -s/--skip-tools parameter is a file in which each line will be interpreted as the name of a tool
	239	that will not be converted. Conversely, the value of the -r/--required-tools is a file in which each line will be
	240	interpreted as a tool that is required. Only one of these parameters can be specified at a given time.
	241
	242	The format of both files is exactly the same. As stated before, each line will be interpreted as the name of a tool;
	243	any line starting with a '#' will be ignored.
	244
	245	'''
	246	program_license = '''%(short_description)s
	247	Copyright 2015, Luis de la Garza
	248
	249	Licensed under the Apache License, Version 2.0 (the "License");
	250	you may not use this file except in compliance with the License.
	251	You may obtain a copy of the License at
	252
	253	http://www.apache.org/licenses/LICENSE-2.0
	254
	255	Unless required by applicable law or agreed to in writing, software
	256	distributed under the License is distributed on an "AS IS" BASIS,
	257	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	258	See the License for the specific language governing permissions and
	259	limitations under the License.
	260
	261	%(usage)s
	262	''' % {'short_description': program_short_description, 'usage': program_usage}
	263
	264	try:
	265	# Setup argument parser
	266	parser = ArgumentParser(prog="CTD2Galaxy", description=program_license,
	267	formatter_class=RawDescriptionHelpFormatter, add_help=True)
	268	parser.add_argument("-i", "--input", dest="input_files", default=[], required=True, nargs="+", action="append",
	269	help="List of CTD files to convert.")
	270	parser.add_argument("-o", "--output-destination", dest="output_destination", required=True,
	271	help="If multiple input files are given, then a folder in which all generated "
	272	"XMLs will be generated is expected;"
	273	"if a single input file is given, then a destination file is expected.")
	274	parser.add_argument("-f", "--formats-file", dest="formats_file",
	275	help="File containing the supported file formats. Run with '-h' or '--help' to see a "
	276	"brief example on the layout of this file.", default=None, required=False)
	277	parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line",
	278	help="Adds content to the command line", default="", required=False)
	279	parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination",
	280	help="Specify the location of a datatypes_conf.xml to modify and add the registered "
	281	"data types. If the provided destination does not exist, a new file will be created.",
	282	default=None, required=False)
	283	parser.add_argument("-x", "--default-executable-path", dest="default_executable_path",
	284	help="Use this executable path when <executablePath> is not present in the CTD",
	285	default=None, required=False)
	286	parser.add_argument("-b", "--blacklist-parameters", dest="blacklisted_parameters", default=[], nargs="+", action="append",
	287	help="List of parameters that will be ignored and won't appear on the galaxy stub",
	288	required=False)
	289	parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False,
	290	help="Default category to use for tools lacking a category when generating tool_conf.xml")
	291	parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False,
	292	help="Specify the location of an existing tool_conf.xml that will be modified to include "
	293	"the converted tools. If the provided destination does not exist, a new file will"
	294	"be created.")
	295	parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False,
	296	help="The path that will be prepended to the file names when generating tool_conf.xml")
	297	parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False,
	298	help="Each line of the file will be interpreted as a tool name that needs translation. "
	299	"Run with '-h' or '--help' to see a brief example on the format of this file.")
	300	parser.add_argument("-s", "--skip-tools", dest="skip_tools_file", default=None, required=False,
	301	help="File containing a list of tools for which a Galaxy stub will not be generated. "
	302	"Run with '-h' or '--help' to see a brief example on the format of this file.")
	303	parser.add_argument("-m", "--macros", dest="macros_files", default=[['macros.xml']], nargs="+",
	304	action="append", required=None, help="Import the additional given file(s) as macros. "
	305	"The macros stdio, requirements and advanced_options are required. Please see "
	306	"macros.xml for an example of a valid macros file. Al defined macros will be imported.")
	307	parser.add_argument("-p", "--hardcoded-parameters", dest="hardcoded_parameters", default=None, required=False,
	308	help="File containing hardcoded values for the given parameters. Run with '-h' or '--help' "
	309	"to see a brief example on the format of this file.")
	310	# TODO: add verbosity, maybe?
	311	parser.add_argument("-V", "--version", action='version', version=program_version_message)
	312
	313	# Process arguments
	314	args = parser.parse_args()
	315
	316	# validate and prepare the passed arguments
	317	validate_and_prepare_args(args)
	318
	319	# extract the names of the macros and check that we have found the ones we need
	320	macros_file_names = args.macros_files
	321	macros_to_expand = parse_macros_files(macros_file_names)
	322
	323	# parse the given supported file-formats file
	324	supported_file_formats = parse_file_formats(args.formats_file)
	325
	326	# parse the hardcoded parameters file¬
	327	parameter_hardcoder = parse_hardcoded_parameters(args.hardcoded_parameters)
	328
	329	# parse the skip/required tools files
	330	skip_tools = parse_tools_list_file(args.skip_tools_file)
	331	required_tools = parse_tools_list_file(args.required_tools_file)
	332
	333	#if verbose > 0:
	334	# print("Verbose mode on")
	335	parsed_models = convert(args.input_files,
	336	args.output_destination,
	337	supported_file_formats=supported_file_formats,
	338	default_executable_path=args.default_executable_path,
	339	add_to_command_line=args.add_to_command_line,
	340	blacklisted_parameters=args.blacklisted_parameters,
	341	required_tools=required_tools,
	342	skip_tools=skip_tools,
	343	macros_file_names=macros_file_names,
	344	macros_to_expand=macros_to_expand,
	345	parameter_hardcoder=parameter_hardcoder)
	346
	347	#TODO: add some sort of warning if a macro that doesn't exist is to be expanded
	348
	349	# it is not needed to copy the macros files, since the user has provided them
	350
	351	# generation of galaxy stubs is ready... now, let's see if we need to generate a tool_conf.xml
	352	if args.tool_conf_destination is not None:
	353	generate_tool_conf(parsed_models, args.tool_conf_destination,
	354	args.galaxy_tool_path, args.default_category)
	355
	356	# now datatypes_conf.xml
	357	if args.data_types_destination is not None:
	358	generate_data_type_conf(supported_file_formats, args.data_types_destination)
	359
	360	return 0
	361
	362	except KeyboardInterrupt:
	363	# handle keyboard interrupt
	364	return 0
	365	except ApplicationException, e:
	366	error("CTD2Galaxy could not complete the requested operation.", 0)
	367	error("Reason: " + e.msg, 0)
	368	return 1
	369	except ModelError, e:
	370	error("There seems to be a problem with one of your input CTDs.", 0)
	371	error("Reason: " + e.msg, 0)
	372	return 1
	373	except Exception, e:
	374	traceback.print_exc()
	375	return 2
	376
	377
	378	def parse_tools_list_file(tools_list_file):
	379	tools_list = None
	380	if tools_list_file is not None:
	381	tools_list = []
	382	with open(tools_list_file) as f:
	383	for line in f:
	384	if line is None or not line.strip() or line.strip().startswith("#"):
	385	continue
	386	else:
	387	tools_list.append(line.strip())
	388
	389	return tools_list
	390
	391
	392	def parse_macros_files(macros_file_names):
	393	macros_to_expand = set()
	394
	395	for macros_file_name in macros_file_names:
	396	try:
	397	macros_file = open(macros_file_name)
	398	root = parse(macros_file).getroot()
	399	for xml_element in root.findall("xml"):
	400	name = xml_element.attrib["name"]
	401	if name in macros_to_expand:
	402	warning("Macro %s has already been found. Duplicate found in file %s." %
	403	(name, macros_file_name), 0)
	404	else:
	405	macros_to_expand.add(name)
	406	except ParseError, e:
	407	raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " +
	408	str(e))
	409	except IOError, e:
	410	raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " +
	411	str(e))
	412
	413	# we depend on "stdio", "requirements" and "advanced_options" to exist on all the given macros files
	414	missing_needed_macros = []
	415	for required_macro in REQUIRED_MACROS:
	416	if required_macro not in macros_to_expand:
	417	missing_needed_macros.append(required_macro)
	418
	419	if missing_needed_macros:
	420	raise ApplicationException(
	421	"The following required macro(s) were not found in any of the given macros files: %s, "
	422	"see sample_files/macros.xml for an example of a valid macros file."
	423	% ", ".join(missing_needed_macros))
	424
	425	# we do not need to "expand" the advanced_options macro
	426	macros_to_expand.remove(ADVANCED_OPTIONS_MACRO_NAME)
	427	return macros_to_expand
	428
	429	def parse_hardcoded_parameters(hardcoded_parameters_file):
	430	parameter_hardcoder = ParameterHardcoder()
	431	if hardcoded_parameters_file is not None:
	432	line_number = 0
	433	with open(hardcoded_parameters_file) as f:
	434	for line in f:
	435	line_number += 1
	436	if line is None or not line.strip() or line.strip().startswith("#"):
	437	pass
	438	else:
	439	# the third column must not be obtained as a whole, and not split
	440	parsed_hardcoded_parameter = line.strip().split(None, 2)
	441	# valid lines contain two or three columns
	442	if len(parsed_hardcoded_parameter) != 2 and len(parsed_hardcoded_parameter) != 3:
	443	warning("Invalid line at line number %d of the given hardcoded parameters file. Line will be"
	444	"ignored:\n%s" % (line_number, line), 0)
	445	continue
	446
	447	parameter_name = parsed_hardcoded_parameter[0]
	448	hardcoded_value = parsed_hardcoded_parameter[1]
	449	tool_names = None
	450	if len(parsed_hardcoded_parameter) == 3:
	451	tool_names = parsed_hardcoded_parameter[2].split(',')
	452	if tool_names:
	453	for tool_name in tool_names:
	454	parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name.strip())
	455	else:
	456	parameter_hardcoder.register_parameter(parameter_name, hardcoded_value)
	457
	458	return parameter_hardcoder
	459
	460
	461	def parse_file_formats(formats_file):
	462	supported_formats = {}
	463	if formats_file is not None:
	464	line_number = 0
	465	with open(formats_file) as f:
	466	for line in f:
	467	line_number += 1
	468	if line is None or not line.strip() or line.strip().startswith("#"):
	469	# ignore (it'd be weird to have something like:
	470	# if line is not None and not (not line.strip()) ...
	471	pass
	472	else:
	473	# not an empty line, no comment
	474	# strip the line and split by whitespace
	475	parsed_formats = line.strip().split()
	476	# valid lines contain either one or four columns
	477	if not (len(parsed_formats) == 1 or len(parsed_formats) == 3 or len(parsed_formats) == 4):
	478	warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" %
	479	(line_number, line), 0)
	480	# ignore the line
	481	continue
	482	elif len(parsed_formats) == 1:
	483	supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[0])
	484	else:
	485	mimetype = None
	486	# check if mimetype was provided
	487	if len(parsed_formats) == 4:
	488	mimetype = parsed_formats[3]
	489	supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[1],
	490	parsed_formats[2], mimetype)
	491	return supported_formats
	492
	493
	494	def validate_and_prepare_args(args):
	495	# check that only one of skip_tools_file and required_tools_file has been provided
	496	if args.skip_tools_file is not None and args.required_tools_file is not None:
	497	raise ApplicationException(
	498	"You have provided both a file with tools to ignore and a file with required tools.\n"
	499	"Only one of -s/--skip-tools, -r/--required-tools can be provided.")
	500
	501	# first, we convert all list of lists in args to flat lists
	502	lists_to_flatten = ["input_files", "blacklisted_parameters", "macros_files"]
	503	for list_to_flatten in lists_to_flatten:
	504	setattr(args, list_to_flatten, [item for sub_list in getattr(args, list_to_flatten) for item in sub_list])
	505
	506	# if input is a single file, we expect output to be a file (and not a dir that already exists)
	507	if len(args.input_files) == 1:
	508	if os.path.isdir(args.output_destination):
	509	raise ApplicationException("If a single input file is provided, output (%s) is expected to be a file "
	510	"and not a folder.\n" % args.output_destination)
	511
	512	# if input is a list of files, we expect output to be a folder
	513	if len(args.input_files) > 1:
	514	if not os.path.isdir(args.output_destination):
	515	raise ApplicationException("If several input files are provided, output (%s) is expected to be an "
	516	"existing directory.\n" % args.output_destination)
	517
	518	# check that the provided input files, if provided, contain a valid file path
	519	input_variables_to_check = ["skip_tools_file", "required_tools_file", "macros_files",
	520	"input_files", "formats_file", "hardcoded_parameters"]
	521
	522	for variable_name in input_variables_to_check:
	523	paths_to_check = []
	524	# check if we are handling a single file or a list of files
	525	member_value = getattr(args, variable_name)
	526	if member_value is not None:
	527	if isinstance(member_value, list):
	528	for file_name in member_value:
	529	paths_to_check.append(strip(str(file_name)))
	530	else:
	531	paths_to_check.append(strip(str(member_value)))
	532
	533	for path_to_check in paths_to_check:
	534	if not os.path.isfile(path_to_check) or not os.path.exists(path_to_check):
	535	raise ApplicationException(
	536	"The provided input file (%s) does not exist or is not a valid file path."
	537	% path_to_check)
	538
	539	# check that the provided output files, if provided, contain a valid file path (i.e., not a folder)
	540	output_variables_to_check = ["data_types_destination", "tool_conf_destination"]
	541
	542	for variable_name in output_variables_to_check:
	543	file_name = getattr(args, variable_name)
	544	if file_name is not None and os.path.isdir(file_name):
	545	raise ApplicationException("The provided output file name (%s) points to a directory." % file_name)
	546
	547
	548	def convert(input_files, output_destination, **kwargs):
	549	# first, generate a model
	550	is_converting_multiple_ctds = len(input_files) > 1
	551	parsed_models = []
	552	for input_file in input_files:
	553	try:
	554	model = CTDModel(from_file=input_file)
	555	except Exception, e:
	556	error(str(e), 1)
	557	continue
	558
	559	if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]:
	560	info("Skipping tool %s" % model.name, 0)
	561	continue
	562	elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]:
	563	info("Tool %s is not required, skipping it" % model.name, 0)
	564	continue
	565	else:
	566	info("Converting from %s " % input_file, 0)
	567	tool = create_tool(model)
	568	write_header(tool, model)
	569	create_description(tool, model)
	570	expand_macros(tool, model, **kwargs)
	571	create_command(tool, model, **kwargs)
	572	create_inputs(tool, model, **kwargs)
	573	create_outputs(tool, model, **kwargs)
	574	create_help(tool, model)
	575
	576	# finally, serialize the tool
	577	output_file = output_destination
	578	# if multiple inputs are being converted,
	579	# then we need to generate a different output_file for each input
	580	if is_converting_multiple_ctds:
	581	output_file = os.path.join(output_file, get_filename_without_suffix(input_file) + ".xml")
	582	# wrap our tool element into a tree to be able to serialize it
	583	tree = ElementTree(tool)
	584	tree.write(open(output_file, 'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
	585	# let's use model to hold the name of the output file
	586	parsed_models.append([model, get_filename(output_file)])
	587
	588	return parsed_models
	589
	590
	591	def write_header(tool, model):
	592	tool.addprevious(etree.Comment(
	593	"This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). "
	594	"This file was automatically generated using CTD2Galaxy."))
	595	tool.addprevious(etree.Comment('Proposed Tool Section: [%s]' % model.opt_attribs.get("category", "")))
	596
	597
	598	def generate_tool_conf(parsed_models, tool_conf_destination, galaxy_tool_path, default_category):
	599	# for each category, we keep a list of models corresponding to it
	600	categories_to_tools = dict()
	601	for model in parsed_models:
	602	category = strip(model[0].opt_attribs.get("category", ""))
	603	if not category.strip():
	604	category = default_category
	605	if category not in categories_to_tools:
	606	categories_to_tools[category] = []
	607	categories_to_tools[category].append(model[1])
	608
	609	# at this point, we should have a map for all categories->tools
	610	toolbox_node = Element("toolbox")
	611
	612	if galaxy_tool_path is not None and not galaxy_tool_path.strip().endswith("/"):
	613	galaxy_tool_path = galaxy_tool_path.strip() + "/"
	614	if galaxy_tool_path is None:
	615	galaxy_tool_path = ""
	616
	617	for category, file_names in categories_to_tools.iteritems():
	618	section_node = add_child_node(toolbox_node, "section")
	619	section_node.attrib["id"] = "section-id-" + "".join(category.split())
	620	section_node.attrib["name"] = category
	621
	622	for filename in file_names:
	623	tool_node = add_child_node(section_node, "tool")
	624	tool_node.attrib["file"] = galaxy_tool_path + filename
	625
	626	toolconf_tree = ElementTree(toolbox_node)
	627	toolconf_tree.write(open(tool_conf_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
	628	info("Generated Galaxy tool_conf.xml in %s" % tool_conf_destination, 0)
	629
	630
	631	def generate_data_type_conf(supported_file_formats, data_types_destination):
	632	data_types_node = Element("datatypes")
	633	registration_node = add_child_node(data_types_node, "registration")
	634	registration_node.attrib["converters_path"] = "lib/galaxy/datatypes/converters"
	635	registration_node.attrib["display_path"] = "display_applications"
	636
	637	for format_name in supported_file_formats:
	638	data_type = supported_file_formats[format_name]
	639	# add only if it's a data type that does not exist in Galaxy
	640	if data_type.galaxy_type is not None:
	641	data_type_node = add_child_node(registration_node, "datatype")
	642	# we know galaxy_extension is not None
	643	data_type_node.attrib["extension"] = data_type.galaxy_extension
	644	data_type_node.attrib["type"] = data_type.galaxy_type
	645	if data_type.mimetype is not None:
	646	data_type_node.attrib["mimetype"] = data_type.mimetype
	647
	648	data_types_tree = ElementTree(data_types_node)
	649	data_types_tree.write(open(data_types_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
	650	info("Generated Galaxy datatypes_conf.xml in %s" % data_types_destination, 0)
	651
	652
	653	# taken from
	654	# http://stackoverflow.com/questions/8384737/python-extract-file-name-from-path-no-matter-what-the-os-path-format
	655	def get_filename(path):
	656	head, tail = ntpath.split(path)
	657	return tail or ntpath.basename(head)
	658
	659
	660	def get_filename_without_suffix(path):
	661	root, ext = os.path.splitext(os.path.basename(path))
	662	return root
	663
	664
	665	def create_tool(model):
	666	return Element("tool", OrderedDict([("id", model.name), ("name", model.name), ("version", model.version)]))
	667
	668
	669	def create_description(tool, model):
	670	if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None:
	671	description = SubElement(tool,"description")
	672	description.text = model.opt_attribs["description"]
	673
	674
	675	def get_param_name(param):
	676	# we generate parameters with colons for subgroups, but not for the topmost parents (OpenMS legacy)
	677	if type(param.parent) == ParameterGroup and param.parent.parent != None:
	678	return get_param_name(param.parent) + ":" + resolve_param_mapping(param)
	679	else:
	680	return resolve_param_mapping(param)
	681
	682
	683	# some parameters are mapped to command line options, this method helps resolve those mappings, if any
	684	# TODO: implement mapping of parameters!!!
	685	def resolve_param_mapping(param):
	686	return param.name
	687
	688
	689	def create_command(tool, model, **kwargs):
	690	final_command = get_tool_executable_path(model, kwargs["default_executable_path"]) + '\n'
	691	final_command += kwargs["add_to_command_line"] + '\n'
	692	advanced_command_start = "#if $adv_opts.adv_opts_selector=='advanced':\n"
	693	advanced_command_end = '#end if'
	694	advanced_command = ''
	695	parameter_hardcoder = kwargs["parameter_hardcoder"]
	696
	697	found_output_parameter = False
	698	for param in extract_parameters(model):
	699	if param.type is _OutFile:
	700	found_output_parameter = True
	701	command = ''
	702	param_name = get_param_name(param)
	703
	704	if param.name in kwargs["blacklisted_parameters"]:
	705	continue
	706
	707	hardcoded_value = parameter_hardcoder.get_hardcoded_value(param_name, model.name)
	708	if hardcoded_value:
	709	command += '-%s %s\n' % (param_name, hardcoded_value)
	710	else:
	711	# parameter is neither blacklisted nor hardcoded...
	712	galaxy_parameter_name = get_galaxy_parameter_name(param)
	713	repeat_galaxy_parameter_name = get_repeat_galaxy_parameter_name(param)
	714
	715	# logic for ITEMLISTs
	716	if param.is_list:
	717	if param.type is _InFile:
	718	command += "-" + str(param_name) + "\n"
	719	command += " #for token in $" + galaxy_parameter_name + ":\n"
	720	command += " $token\n"
	721	command += " #end for\n"
	722	else:
	723	command += "\n#if $" + repeat_galaxy_parameter_name + ":\n"
	724	command += "-" + str(param_name) + "\n"
	725	command += " #for token in $" + repeat_galaxy_parameter_name + ":\n"
	726	command += " #if \" \" in str(token):\n"
	727	command += " \"$token." + galaxy_parameter_name + "\"\n"
	728	command += " #else\n"
	729	command += " $token." + galaxy_parameter_name + "\n"
	730	command += " #end if\n"
	731	command += " #end for\n"
	732	command += "#end if\n"
	733	# logic for other ITEMs
	734	else:
	735	if param.advanced and param.type is not _OutFile:
	736	actual_parameter = "$adv_opts.%s" % galaxy_parameter_name
	737	else:
	738	actual_parameter = "$%s" % galaxy_parameter_name
	739	## if whitespace_validation has been set, we need to generate, for each parameter:
	740	## #if str( $t ).split() != '':
	741	## -t "$t"
	742	## #end if
	743	## TODO only useful for text fields, integers or floats
	744	## not useful for choices, input fields ...
	745
	746	if not is_boolean_parameter(param) and type(param.restrictions) is _Choices :
	747	command += "#if " + actual_parameter + ":\n"
	748	command += ' -%s\n' % param_name
	749	command += " #if \" \" in str(" + actual_parameter + "):\n"
	750	command += " \"" + actual_parameter + "\"\n"
	751	command += " #else\n"
	752	command += " " + actual_parameter + "\n"
	753	command += " #end if\n"
	754	command += "#end if\n"
	755	elif is_boolean_parameter(param):
	756	command += "#if " + actual_parameter + ":\n"
	757	command += ' -%s\n' % param_name
	758	command += "#end if\n"
	759	elif TYPE_TO_GALAXY_TYPE[param.type] is 'text':
	760	command += "#if " + actual_parameter + ":\n"
	761	command += " -%s " % param_name
	762	command += " \"" + actual_parameter + "\"\n"
	763	command += "#end if\n"
	764	else:
	765	command += "#if " + actual_parameter + ":\n"
	766	command += ' -%s ' % param_name
	767	command += actual_parameter + "\n"
	768	command += "#end if\n"
	769
	770	if param.advanced and param.type is not _OutFile:
	771	advanced_command += " %s" % command
	772	else:
	773	final_command += command
	774
	775	if advanced_command:
	776	final_command += "%s%s%s\n" % (advanced_command_start, advanced_command, advanced_command_end)
	777
	778	if not found_output_parameter:
	779	final_command += "> $param_stdout\n"
	780
	781	command_node = add_child_node(tool, "command")
	782	command_node.text = final_command
	783
	784
	785	# creates the xml elements needed to import the needed macros files
	786	# and to "expand" the macros
	787	def expand_macros(tool, model, **kwargs):
	788	macros_node = add_child_node(tool, "macros")
	789	token_node = add_child_node(macros_node, "token")
	790	token_node.attrib["name"] = "@EXECUTABLE@"
	791	token_node.text = get_tool_executable_path(model, kwargs["default_executable_path"])
	792
	793	# add <import> nodes
	794	for macro_file_name in kwargs["macros_file_names"]:
	795	macro_file = open(macro_file_name)
	796	import_node = add_child_node(macros_node, "import")
	797	# do not add the path of the file, rather, just its basename
	798	import_node.text = os.path.basename(macro_file.name)
	799
	800	# add <expand> nodes
	801	for expand_macro in kwargs["macros_to_expand"]:
	802	expand_node = add_child_node(tool, "expand")
	803	expand_node.attrib["macro"] = expand_macro
	804
	805
	806	def get_tool_executable_path(model, default_executable_path):
	807	# rules to build the galaxy executable path:
	808	# if executablePath is null, then use default_executable_path and store it in executablePath
	809	# if executablePath is null and executableName is null, then the name of the tool will be used
	810	# if executablePath is null and executableName is not null, then executableName will be used
	811	# if executablePath is not null and executableName is null,
	812	# then executablePath and the name of the tool will be used
	813	# if executablePath is not null and executableName is not null, then both will be used
	814
	815	# first, check if the model has executablePath / executableName defined
	816	executable_path = model.opt_attribs.get("executablePath", None)
	817	executable_name = model.opt_attribs.get("executableName", None)
	818
	819	# check if we need to use the default_executable_path
	820	if executable_path is None:
	821	executable_path = default_executable_path
	822
	823	# fix the executablePath to make sure that there is a '/' in the end
	824	if executable_path is not None:
	825	executable_path = executable_path.strip()
	826	if not executable_path.endswith('/'):
	827	executable_path += '/'
	828
	829	# assume that we have all information present
	830	command = str(executable_path) + str(executable_name)
	831	if executable_path is None:
	832	if executable_name is None:
	833	command = model.name
	834	else:
	835	command = executable_name
	836	else:
	837	if executable_name is None:
	838	command = executable_path + model.name
	839	return command
	840
	841
	842	def get_galaxy_parameter_name(param):
	843	return "param_%s" % get_param_name(param).replace(':', '_').replace('-', '_')
	844
	845
	846	def get_input_with_same_restrictions(out_param, model, supported_file_formats):
	847	for param in extract_parameters(model):
	848	if param.type is _InFile:
	849	if param.restrictions is not None:
	850	in_param_formats = get_supported_file_types(param.restrictions.formats, supported_file_formats)
	851	out_param_formats = get_supported_file_types(out_param.restrictions.formats, supported_file_formats)
	852	if in_param_formats == out_param_formats:
	853	return param
	854
	855
	856	def create_inputs(tool, model, **kwargs):
	857	inputs_node = SubElement(tool, "inputs")
	858
	859	# some suites (such as OpenMS) need some advanced options when handling inputs
	860	expand_advanced_node = add_child_node(tool, "expand", OrderedDict([("macro", ADVANCED_OPTIONS_MACRO_NAME)]))
	861	parameter_hardcoder = kwargs["parameter_hardcoder"]
	862
	863	# treat all non output-file parameters as inputs
	864	for param in extract_parameters(model):
	865	# no need to show hardcoded parameters
	866	hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name)
	867	if param.name in kwargs["blacklisted_parameters"] or hardcoded_value:
	868	# let's not use an extra level of indentation and use NOP
	869	continue
	870	if param.type is not _OutFile:
	871	if param.advanced:
	872	if expand_advanced_node is not None:
	873	parent_node = expand_advanced_node
	874	else:
	875	# something went wrong... we are handling an advanced parameter and the
	876	# advanced input macro was not set... inform the user about it
	877	info("The parameter %s has been set as advanced, but advanced_input_macro has "
	878	"not been set." % param.name, 1)
	879	# there is not much we can do, other than use the inputs_node as a parent node!
	880	parent_node = inputs_node
	881	else:
	882	parent_node = inputs_node
	883
	884	# for lists we need a repeat tag
	885	if param.is_list and param.type is not _InFile:
	886	rep_node = add_child_node(parent_node, "repeat")
	887	create_repeat_attribute_list(rep_node, param)
	888	parent_node = rep_node
	889
	890	param_node = add_child_node(parent_node, "param")
	891	create_param_attribute_list(param_node, param, kwargs["supported_file_formats"])
	892
	893	# advanced parameter selection should be at the end
	894	# and only available if an advanced parameter exists
	895	if expand_advanced_node is not None and len(expand_advanced_node) > 0:
	896	inputs_node.append(expand_advanced_node)
	897
	898
	899	def get_repeat_galaxy_parameter_name(param):
	900	return "rep_" + get_galaxy_parameter_name(param)
	901
	902
	903	def create_repeat_attribute_list(rep_node, param):
	904	rep_node.attrib["name"] = get_repeat_galaxy_parameter_name(param)
	905	if param.required:
	906	rep_node.attrib["min"] = "1"
	907	else:
	908	rep_node.attrib["min"] = "0"
	909	# for the ITEMLISTs which have LISTITEM children we only
	910	# need one parameter as it is given as a string
	911	if param.default is not None:
	912	rep_node.attrib["max"] = "1"
	913	rep_node.attrib["title"] = get_galaxy_parameter_name(param)
	914
	915
	916	def create_param_attribute_list(param_node, param, supported_file_formats):
	917	param_node.attrib["name"] = get_galaxy_parameter_name(param)
	918
	919	param_type = TYPE_TO_GALAXY_TYPE[param.type]
	920	if param_type is None:
	921	raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s"
	922	% {"type": param.type, "name": param.name})
	923
	924	if param.is_list:
	925	param_type = "text"
	926
	927	if is_selection_parameter(param):
	928	param_type = "select"
	929
	930	if is_boolean_parameter(param):
	931	param_type = "boolean"
	932
	933	if param.type is _InFile:
	934	# assume it's just text unless restrictions are provided
	935	param_format = "text"
	936	if param.restrictions is not None:
	937	# join all supported_formats for the file... this MUST be a _FileFormat
	938	if type(param.restrictions) is _FileFormat:
	939	param_format = ','.join(get_supported_file_types(param.restrictions.formats, supported_file_formats))
	940	else:
	941	raise InvalidModelException("Expected 'file type' restrictions for input file [%(name)s], "
	942	"but instead got [%(type)s]"
	943	% {"name": param.name, "type": type(param.restrictions)})
	944	param_node.attrib["type"] = "data"
	945	param_node.attrib["format"] = param_format
	946	# in the case of multiple input set multiple flag
	947	if param.is_list:
	948	param_node.attrib["multiple"] = "true"
	949
	950	else:
	951	param_node.attrib["type"] = param_type
	952
	953	# check for parameters with restricted values (which will correspond to a "select" in galaxy)
	954	if param.restrictions is not None:
	955	# it could be either _Choices or _NumericRange, with special case for boolean types
	956	if param_type == "boolean":
	957	create_boolean_parameter(param_node, param)
	958	elif type(param.restrictions) is _Choices:
	959	# create as many <option> elements as restriction values
	960	for choice in param.restrictions.choices:
	961	option_node = add_child_node(param_node, "option", OrderedDict([("value", str(choice))]))
	962	option_node.text = str(choice)
	963
	964	elif type(param.restrictions) is _NumericRange:
	965	if param.type is not int and param.type is not float:
	966	raise InvalidModelException("Expected either 'int' or 'float' in the numeric range restriction for "
	967	"parameter [%(name)s], but instead got [%(type)s]" %
	968	{"name": param.name, "type": type(param.restrictions)})
	969	# extract the min and max values and add them as attributes
	970	# validate the provided min and max values
	971	if param.restrictions.n_min is not None:
	972	param_node.attrib["min"] = str(param.restrictions.n_min)
	973	if param.restrictions.n_max is not None:
	974	param_node.attrib["max"] = str(param.restrictions.n_max)
	975	elif type(param.restrictions) is _FileFormat:
	976	param_node.attrib["format"] = ",".join(
	977	get_supported_file_types(param.restrictions.formats, supported_file_formats))
	978	else:
	979	raise InvalidModelException("Unrecognized restriction type [%(type)s] for parameter [%(name)s]"
	980	% {"type": type(param.restrictions), "name": param.name})
	981
	982	param_node.attrib["optional"] = str(not param.required)
	983
	984	if param_type == "text":
	985	# add size attribute... this is the length of a textbox field in Galaxy (it could also be 15x2, for instance)
	986	param_node.attrib["size"] = "30"
	987	# add sanitizer nodes, this is needed for special character like "["
	988	# which are used for example by FeatureFinderMultiplex
	989	sanitizer_node = SubElement(param_node, "sanitizer")
	990
	991	valid_node = SubElement(sanitizer_node, "valid", OrderedDict([("initial", "string.printable")]))
	992	add_child_node(valid_node, "remove", OrderedDict([("value", '\'')]))
	993	add_child_node(valid_node, "remove", OrderedDict([("value", '"')]))
	994
	995	# check for default value
	996	if param.default is not None:
	997	if type(param.default) is list:
	998	# we ASSUME that a list of parameters looks like:
	999	# $ tool -ignore He Ar Xe
	1000	# meaning, that, for example, Helium, Argon and Xenon will be ignored
	1001	param_node.attrib["value"] = ' '.join(map(str, param.default))
	1002
	1003	elif param_type != "boolean":
	1004	# boolean parameters handle default values by using the "checked" attribute
	1005	# there isn't much we can do... just stringify the value
	1006	param_node.attrib["value"] = str(param.default)
	1007	else:
	1008	if param.type is int or param.type is float:
	1009	# galaxy requires "value" to be included for int/float
	1010	# since no default was included, we need to figure out one in a clever way... but let the user know
	1011	# that we are "thinking" for him/her
	1012	warning("Generating default value for parameter [%s]. "
	1013	"Galaxy requires the attribute 'value' to be set for integer/floats. "
	1014	"Edit the CTD file and provide a suitable default value." % param.name, 1)
	1015	# check if there's a min/max and try to use them
	1016	default_value = None
	1017	if param.restrictions is not None:
	1018	if type(param.restrictions) is _NumericRange:
	1019	default_value = param.restrictions.n_min
	1020	if default_value is None:
	1021	default_value = param.restrictions.n_max
	1022	if default_value is None:
	1023	# no min/max provided... just use 0 and see what happens
	1024	default_value = 0
	1025	else:
	1026	# should never be here, since we have validated this anyway...
	1027	# this code is here just for documentation purposes
	1028	# however, better safe than sorry!
	1029	# (it could be that the code changes and then we have an ugly scenario)
	1030	raise InvalidModelException("Expected either a numeric range for parameter [%(name)s], "
	1031	"but instead got [%(type)s]"
	1032	% {"name": param.name, "type": type(param.restrictions)})
	1033	else:
	1034	# no restrictions and no default value provided...
	1035	# make up something
	1036	default_value = 0
	1037	param_node.attrib["value"] = str(default_value)
	1038
	1039	label = "%s parameter" % param.name
	1040	help_text = ""
	1041
	1042	if param.description is not None:
	1043	label, help_text = generate_label_and_help(param.description)
	1044
	1045	param_node.attrib["label"] = label
	1046	param_node.attrib["help"] = "(-%s)" % param.name + " " + help_text
	1047
	1048
	1049	def generate_label_and_help(desc):
	1050	label = ""
	1051	help_text = ""
	1052	# This tag is found in some descriptions
	1053	desc = str(desc).replace("#br#", " <br>")
	1054	# Get rid of dots in the end
	1055	if desc.endswith("."):
	1056	desc = desc.rstrip(".")
	1057	# Check if first word is a normal word and make it uppercase
	1058	if str(desc).find(" ") > -1:
	1059	first_word, rest = str(desc).split(" ", 1)
	1060	if str(first_word).islower():
	1061	# check if label has a quotient of the form a/b
	1062	if first_word.find("/") != 1 :
	1063	first_word.capitalize()
	1064	desc = first_word + " " + rest
	1065	label = desc
	1066
	1067	# Try to split the label if it is too long
	1068	if len(desc) > 50:
	1069	# find an example and put everything before in the label and the e.g. in the help
	1070	if desc.find("e.g.") > 1 :
	1071	label, help_text = desc.split("e.g.",1)
	1072	help_text = "e.g." + help_text
	1073	else:
	1074	# find the end of the first sentence
	1075	# look for ". " because some labels contain .file or something similar
	1076	delimiter = ""
	1077	if desc.find(". ") > 1 and desc.find("? ") > 1:
	1078	if desc.find(". ") < desc.find("? "):
	1079	delimiter = ". "
	1080	else:
	1081	delimiter = "? "
	1082	elif desc.find(". ") > 1:
	1083	delimiter = ". "
	1084	elif desc.find("? ") > 1:
	1085	delimiter = "? "
	1086	if delimiter != "":
	1087	label, help_text = desc.split(delimiter, 1)
	1088
	1089	# add the question mark back
	1090	if delimiter == "? ":
	1091	label += "? "
	1092
	1093	# remove all linebreaks
	1094	label = label.rstrip().rstrip('<br>').rstrip()
	1095	return label, help_text
	1096
	1097
	1098	def get_indented_text(text, indentation_level):
	1099	return ("%(indentation)s%(text)s" %
	1100	{"indentation": " " * (MESSAGE_INDENTATION_INCREMENT * indentation_level),
	1101	"text": text})
	1102
	1103
	1104	def warning(warning_text, indentation_level):
	1105	sys.stdout.write(get_indented_text("WARNING: %s\n" % warning_text, indentation_level))
	1106
	1107
	1108	def error(error_text, indentation_level):
	1109	sys.stderr.write(get_indented_text("ERROR: %s\n" % error_text, indentation_level))
	1110
	1111
	1112	def info(info_text, indentation_level):
	1113	sys.stdout.write(get_indented_text("INFO: %s\n" % info_text, indentation_level))
	1114
	1115
	1116	# determines if the given choices are boolean (basically, if the possible values are yes/no, true/false)
	1117	def is_boolean_parameter(param):
	1118	is_choices = False
	1119	if type(param.restrictions) is _Choices:
	1120	# for a true boolean experience, we need 2 values
	1121	# and also that those two values are either yes/no or true/false
	1122	if len(param.restrictions.choices) == 2:
	1123	choices = get_lowercase_list(param.restrictions.choices)
	1124	if ("yes" in choices and "no" in choices) or ("true" in choices and "false" in choices):
	1125	is_choices = True
	1126	return is_choices
	1127
	1128
	1129	# determines if there are choices for the parameter
	1130	def is_selection_parameter(param):
	1131	return type(param.restrictions) is _Choices
	1132
	1133
	1134	def get_lowercase_list(some_list):
	1135	lowercase_list = map(str, some_list)
	1136	lowercase_list = map(string.lower, lowercase_list)
	1137	lowercase_list = map(strip, lowercase_list)
	1138	return lowercase_list
	1139
	1140
	1141	# creates a galaxy boolean parameter type
	1142	# this method assumes that param has restrictions, and that only two restictions are present
	1143	# (either yes/no or true/false)
	1144	def create_boolean_parameter(param_node, param):
	1145	# first, determine the 'truevalue' and the 'falsevalue'
	1146	"""TODO: true and false values can be way more than 'true' and 'false'
	1147	but for that we need CTD support
	1148	"""
	1149	# by default, 'true' and 'false' are handled as flags, like the verbose flag (i.e., -v)
	1150	true_value = "-%s" % get_param_name(param)
	1151	false_value = ""
	1152	choices = get_lowercase_list(param.restrictions.choices)
	1153	if "yes" in choices:
	1154	true_value = "yes"
	1155	false_value = "no"
	1156	param_node.attrib["truevalue"] = true_value
	1157	param_node.attrib["falsevalue"] = false_value
	1158
	1159	# set the checked attribute
	1160	if param.default is not None:
	1161	checked_value = "false"
	1162	default = strip(string.lower(param.default))
	1163	if default == "yes" or default == "true":
	1164	checked_value = "true"
	1165	#attribute_list["checked"] = checked_value
	1166	param_node.attrib["checked"] = checked_value
	1167
	1168
	1169	def create_outputs(parent, model, **kwargs):
	1170	outputs_node = add_child_node(parent, "outputs")
	1171	parameter_hardcoder = kwargs["parameter_hardcoder"]
	1172
	1173	for param in extract_parameters(model):
	1174
	1175	# no need to show hardcoded parameters
	1176	hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name)
	1177	if param.name in kwargs["blacklisted_parameters"] or hardcoded_value:
	1178	# let's not use an extra level of indentation and use NOP
	1179	continue
	1180	if param.type is _OutFile:
	1181	create_output_node(outputs_node, param, model, kwargs["supported_file_formats"])
	1182
	1183	# If there are no outputs defined in the ctd the node will have no children
	1184	# and the stdout will be used as output
	1185	if len(outputs_node) == 0:
	1186	add_child_node(outputs_node, "data",
	1187	OrderedDict([("name", "param_stdout"), ("format", "text"), ("label", "Output from stdout")]))
	1188
	1189
	1190	def create_output_node(parent, param, model, supported_file_formats):
	1191	data_node = add_child_node(parent, "data")
	1192	data_node.attrib["name"] = get_galaxy_parameter_name(param)
	1193
	1194	data_format = "data"
	1195	if param.restrictions is not None:
	1196	if type(param.restrictions) is _FileFormat:
	1197	# set the first data output node to the first file format
	1198
	1199	# check if there are formats that have not been registered yet...
	1200	output = ""
	1201	for format_name in param.restrictions.formats:
	1202	if not format_name in supported_file_formats.keys():
	1203	output += " " + str(format_name)
	1204
	1205	# warn only if there's about to complain
	1206	if output:
	1207	warning("Parameter " + param.name + " has the following unsupported format(s):" + output, 1)
	1208
	1209	formats = get_supported_file_types(param.restrictions.formats, supported_file_formats)
	1210	try:
	1211	data_format = formats.pop()
	1212	except KeyError:
	1213	# there is not much we can do, other than catching the exception
	1214	pass
	1215	# if there are more than one output file formats try to take the format from the input parameter
	1216	if formats:
	1217	corresponding_input = get_input_with_same_restrictions(param, model, supported_file_formats)
	1218	if corresponding_input is not None:
	1219	data_format = "input"
	1220	data_node.attrib["metadata_source"] = get_galaxy_parameter_name(corresponding_input)
	1221	else:
	1222	raise InvalidModelException("Unrecognized restriction type [%(type)s] "
	1223	"for output [%(name)s]" % {"type": type(param.restrictions),
	1224	"name": param.name})
	1225	data_node.attrib["format"] = data_format
	1226
	1227	#TODO: find a smarter label ?
	1228	#if param.description is not None:
	1229	# data_node.setAttribute("label", param.description)
	1230	return data_node
	1231
	1232
	1233	def get_supported_file_types(formats, supported_file_formats):
	1234	return set([supported_file_formats.get(format_name, DataType(format_name, format_name)).galaxy_extension
	1235	for format_name in formats if format_name in supported_file_formats.keys()])
	1236
	1237
	1238	def create_change_format_node(parent, data_formats, input_ref):
	1239	# <change_format>
	1240	# <when input="secondary_structure" value="true" format="text"/>
	1241	# </change_format>
	1242	change_format_node = add_child_node(parent, "change_format")
	1243	for data_format in data_formats:
	1244	add_child_node(change_format_node, "when",
	1245	OrderedDict([("input", input_ref), ("value", data_format), ("format", data_format)]))
	1246
	1247
	1248	# Shows basic information about the file, such as data ranges and file type.
	1249	def create_help(tool, model):
	1250	manual = ''
	1251	doc_url = None
	1252	if 'manual' in model.opt_attribs.keys():
	1253	manual += '%s\n\n' % model.opt_attribs["manual"]
	1254	if 'docurl' in model.opt_attribs.keys():
	1255	doc_url = model.opt_attribs["docurl"]
	1256
	1257	help_text = "No help available"
	1258	if manual is not None:
	1259	help_text = manual
	1260	if doc_url is not None:
	1261	help_text = ("" if manual is None else manual) + "\nFor more information, visit %s" % doc_url
	1262	help_node = add_child_node(tool, "help")
	1263	# TODO: do we need CDATA Section here?
	1264	help_node.text = help_text
	1265
	1266
	1267	# since a model might contain several ParameterGroup elements,
	1268	# we want to simply 'flatten' the parameters to generate the Galaxy wrapper
	1269	def extract_parameters(model):
	1270	parameters = []
	1271	if len(model.parameters.parameters) > 0:
	1272	# use this to put parameters that are to be processed
	1273	# we know that CTDModel has one parent ParameterGroup
	1274	pending = [model.parameters]
	1275	while len(pending) > 0:
	1276	# take one element from 'pending'
	1277	parameter = pending.pop()
	1278	if type(parameter) is not ParameterGroup:
	1279	parameters.append(parameter)
	1280	else:
	1281	# append the first-level children of this ParameterGroup
	1282	pending.extend(parameter.parameters.values())
	1283	# returned the reversed list of parameters (as it is now,
	1284	# we have the last parameter in the CTD as first in the list)
	1285	return reversed(parameters)
	1286
	1287
	1288	# adds and returns a child node using the given name to the given parent node
	1289	def add_child_node(parent_node, child_node_name, attributes=OrderedDict([])):
	1290	child_node = SubElement(parent_node, child_node_name, attributes)
	1291	return child_node
	1292
	1293
	1294	if __name__ == "__main__":
	1295	sys.exit(main())

+31

-0

macros.xml less more

	0	<?xml version='1.0' encoding='UTF-8'?>
	1	<!-- CTD2Galaxy depends on this file and on the stdio, advanced_options macros!
	2	You can edit this file to add your own macros, if you so desire, or you can
	3	add additional macro files using the m/macros parameter -->
	4	<macros>
	5	<xml name="requirements">
	6	<requirements>
	7	<requirement type="binary">@EXECUTABLE@</requirement>
	8	</requirements>
	9	</xml>
	10	<xml name="stdio">
	11	<stdio>
	12	<exit_code range="1:"/>
	13	<exit_code range=":-1"/>
	14	<regex match="Error:"/>
	15	<regex match="Exception:"/>
	16	</stdio>
	17	</xml>
	18	<xml name="advanced_options">
	19	<conditional name="adv_opts">
	20	<param name="adv_opts_selector" type="select" label="Advanced Options">
	21	<option value="basic" selected="True">Hide Advanced Options</option>
	22	<option value="advanced">Show Advanced Options</option>
	23	</param>
	24	<when value="basic"/>
	25	<when value="advanced">
	26	<yield/>
	27	</when>
	28	</conditional>
	29	</xml>
	30	</macros>

-13

~~setup.py~~ less more

0		from distutils.core import setup
1
2		setup(
3		name='CTD2Galaxy',
4		version='1.0',
5		packages=['CTD2Galaxy'],
6		url='https://github.com/WorkflowConversion/CTD2Galaxy',
7		license='',
8		author='Luis de la Garza',
9		author_email='',
10		py_modules=['CTD2Galaxy/generator'],
11		description='A program to convert CTDs to Galaxy tool wrappers.'
12		)