Codebase list ctdconverter / 6da4c12
Made -m/--macros optional, providing a default value; moved around macros.xml; updated README.md Luis de la Garza 7 years ago
5 changed file(s) with 1362 addition(s) and 1318 deletion(s). Raw diff Collapse all Expand all
+0
-1297
CTD2Galaxy/generator.py less more
0 #!/usr/bin/env python
1 # encoding: utf-8
2
3 """
4 @author: delagarza
5 """
6
7
8 import sys
9 import os
10 import traceback
11 import ntpath
12 import string
13
14 from argparse import ArgumentParser
15 from argparse import RawDescriptionHelpFormatter
16 from CTDopts.CTDopts import CTDModel, _InFile, _OutFile, ParameterGroup, _Choices, _NumericRange, \
17 _FileFormat, ModelError
18 from collections import OrderedDict
19 from string import strip
20 from lxml import etree
21 from lxml.etree import SubElement, Element, ElementTree, ParseError, parse
22
23 __all__ = []
24 __version__ = 1.0
25 __date__ = '2014-09-17'
26 __updated__ = '2016-05-09'
27
28 MESSAGE_INDENTATION_INCREMENT = 2
29
30 TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'data',
31 _OutFile: 'data', _Choices: 'select'}
32
33 STDIO_MACRO_NAME = "stdio"
34 REQUIREMENTS_MACRO_NAME = "requirements"
35 ADVANCED_OPTIONS_MACRO_NAME = "advanced_options"
36
37 REQUIRED_MACROS = [STDIO_MACRO_NAME, REQUIREMENTS_MACRO_NAME, ADVANCED_OPTIONS_MACRO_NAME]
38
39
40 class CLIError(Exception):
41 # Generic exception to raise and log different fatal errors.
42 def __init__(self, msg):
43 super(CLIError).__init__(type(self))
44 self.msg = "E: %s" % msg
45
46 def __str__(self):
47 return self.msg
48
49 def __unicode__(self):
50 return self.msg
51
52
53 class InvalidModelException(ModelError):
54 def __init__(self, message):
55 super(InvalidModelException, self).__init__()
56 self.message = message
57
58 def __str__(self):
59 return self.message
60
61 def __repr__(self):
62 return self.message
63
64
65 class ApplicationException(Exception):
66 def __init__(self, msg):
67 super(ApplicationException).__init__(type(self))
68 self.msg = msg
69
70 def __str__(self):
71 return self.msg
72
73 def __unicode__(self):
74 return self.msg
75
76
77 class ExitCode:
78 def __init__(self, code_range="", level="", description=None):
79 self.range = code_range
80 self.level = level
81 self.description = description
82
83
84 class DataType:
85 def __init__(self, extension, galaxy_extension=None, galaxy_type=None, mimetype=None):
86 self.extension = extension
87 self.galaxy_extension = galaxy_extension
88 self.galaxy_type = galaxy_type
89 self.mimetype = mimetype
90
91
92 class ParameterHardcoder:
93 def __init__(self):
94 # map whose keys are the composite names of tools and parameters in the following pattern:
95 # [ToolName][separator][ParameterName] -> HardcodedValue
96 # if the parameter applies to all tools, then the following pattern is used:
97 # [ParameterName] -> HardcodedValue
98
99 # examples (assuming separator is '#'):
100 # threads -> 24
101 # XtandemAdapter#adapter -> xtandem.exe
102 # adapter -> adapter.exe
103 self.separator = "!"
104 self.parameter_map = {}
105
106 # the most specific value will be returned in case of overlap
107 def get_hardcoded_value(self, parameter_name, tool_name):
108 # look for the value that would apply for all tools
109 generic_value = self.parameter_map.get(parameter_name, None)
110 specific_value = self.parameter_map.get(self.build_key(parameter_name, tool_name), None)
111 if specific_value is not None:
112 return specific_value
113
114 return generic_value
115
116 def register_parameter(self, parameter_name, parameter_value, tool_name=None):
117 self.parameter_map[self.build_key(parameter_name, tool_name)] = parameter_value
118
119 def build_key(self, parameter_name, tool_name):
120 if tool_name is None:
121 return parameter_name
122 return "%s%s%s" % (parameter_name, self.separator, tool_name)
123
124
125 def main(argv=None): # IGNORE:C0111
126 # Command line options.
127 if argv is None:
128 argv = sys.argv
129 else:
130 sys.argv.extend(argv)
131
132 program_version = "v%s" % __version__
133 program_build_date = str(__updated__)
134 program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
135 program_short_description = "CTD2Galaxy - A project from the GenericWorkflowNodes family " \
136 "(https://github.com/orgs/genericworkflownodes)"
137 program_usage = '''
138 USAGE:
139
140 I - Parsing a single CTD file and generate a Galaxy wrapper:
141
142 $ python generator.py -i input.ctd -o output.xml
143
144
145 II - Parsing all found CTD files (files with .ctd and .xml extension) in a given folder and
146 output converted Galaxy wrappers in a given folder:
147
148 $ python generator.py -i /home/user/*.ctd -o /home/user/galaxywrappers
149
150
151 III - Providing file formats, mimetypes
152
153 Galaxy supports the concept of file format in order to connect compatible ports, that is, input ports of a certain
154 data format will be able to receive data from a port from the same format. This converter allows you to provide
155 a personalized file in which you can relate the CTD data formats with supported Galaxy data formats. The layout of
156 this file consists of lines, each of either one or four columns separated by any amount of whitespace. The content
157 of each column is as follows:
158
159 * 1st column: file extension
160 * 2nd column: data type, as listed in Galaxy
161 * 3rd column: full-named Galaxy data type, as it will appear on datatypes_conf.xml
162 * 4th column: mimetype (optional)
163
164 The following is an example of a valid "file formats" file:
165
166 ########################################## FILE FORMATS example ##########################################
167 # Every line starting with a # will be handled as a comment and will not be parsed.
168 # The first column is the file format as given in the CTD and second column is the Galaxy data format.
169 # The second, third, fourth and fifth column can be left empty if the data type has already been registered
170 # in Galaxy, otherwise, all but the mimetype must be provided.
171
172 # CTD type # Galaxy type # Long Galaxy data type # Mimetype
173 csv tabular galaxy.datatypes.data:Text
174 fasta
175 ini txt galaxy.datatypes.data:Text
176 txt
177 idxml txt galaxy.datatypes.xml:GenericXml application/xml
178 options txt galaxy.datatypes.data:Text
179 grid grid galaxy.datatypes.data:Grid
180
181 ##########################################################################################################
182
183 Note that each line consists precisely of either one, three or four columns. In the case of data types already
184 registered in Galaxy (such as fasta and txt in the above example), only the first column is needed. In the case of
185 data types that haven't been yet registered in Galaxy, the first three columns are needed (mimetype is optional).
186
187 For information about Galaxy data types and subclasses, see the following page:
188 https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes
189
190
191 IV - Hardcoding parameters
192
193 It is possible to hardcode parameters. This makes sense if you want to set a tool in Galaxy in 'quiet' mode or if
194 your tools support multi-threading and accept the number of threads via a parameter, without giving the end user the
195 chance to change the values for these parameters.
196
197 In order to generate hardcoded parameters, you need to provide a simple file. Each line of this file contains two
198 or three columns separated by whitespace. Any line starting with a '#' will be ignored. The first column contains
199 the name of the parameter, the second column contains the value that will always be set for this parameter. The
200 first two columns are mandatory.
201
202 If the parameter is to be hardcoded only for a set of tools, then a third column can be added. This column includes
203 a comma-separated list of tool names for which the parameter will be hardcoded. If a third column is not included,
204 then all processed tools containing the given parameter will get a hardcoded value for it.
205
206 The following is an example of a valid file:
207
208 ##################################### HARDCODED PARAMETERS example #####################################
209 # Every line starting with a # will be handled as a comment and will not be parsed.
210 # The first column is the name of the parameter and the second column is the value that will be used.
211
212 # Parameter name # Value # Tool(s)
213 threads \${GALAXY_SLOTS:-24}
214 mode quiet
215 xtandem_executable xtandem XTandemAdapter
216 verbosity high Foo, Bar
217
218 #########################################################################################################
219
220 Using the above file will produce a <command> similar to:
221
222 [tool_name] ... -threads \${GALAXY_SLOTS:-24} -mode quiet ...
223
224 For all tools. For XTandemAdapter, the <command> will be similar to:
225
226 XtandemAdapter ... -threads \${GALAXY_SLOTS:-24} -mode quiet -xtandem_executable xtandem ...
227
228 And for tools Foo and Bar, the <command> will be similar to:
229
230 Foo ... ... -threads \${GALAXY_SLOTS:-24} -mode quiet -verbosity high ...
231
232
233 V - Control which tools will be converted
234
235 Sometimes only a subset of CTDs needs to be converted. It is possible to either explicitly specify which tools will
236 be converted or which tools will not be converted.
237
238 The value of the -s/--skip-tools parameter is a file in which each line will be interpreted as the name of a tool
239 that will not be converted. Conversely, the value of the -r/--required-tools is a file in which each line will be
240 interpreted as a tool that is required. Only one of these parameters can be specified at a given time.
241
242 The format of both files is exactly the same. As stated before, each line will be interpreted as the name of a tool;
243 any line starting with a '#' will be ignored.
244
245 '''
246 program_license = '''%(short_description)s
247 Copyright 2015, Luis de la Garza
248
249 Licensed under the Apache License, Version 2.0 (the "License");
250 you may not use this file except in compliance with the License.
251 You may obtain a copy of the License at
252
253 http://www.apache.org/licenses/LICENSE-2.0
254
255 Unless required by applicable law or agreed to in writing, software
256 distributed under the License is distributed on an "AS IS" BASIS,
257 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
258 See the License for the specific language governing permissions and
259 limitations under the License.
260
261 %(usage)s
262 ''' % {'short_description': program_short_description, 'usage': program_usage}
263
264 try:
265 # Setup argument parser
266 parser = ArgumentParser(prog="CTD2Galaxy", description=program_license,
267 formatter_class=RawDescriptionHelpFormatter, add_help=True)
268 parser.add_argument("-i", "--input", dest="input_files", default=[], required=True, nargs="+", action="append",
269 help="List of CTD files to convert.")
270 parser.add_argument("-o", "--output-destination", dest="output_destination", required=True,
271 help="If multiple input files are given, then a folder in which all generated "
272 "XMLs will be generated is expected;"
273 "if a single input file is given, then a destination file is expected.")
274 parser.add_argument("-f", "--formats-file", dest="formats_file",
275 help="File containing the supported file formats. Run with '-h' or '--help' to see a "
276 "brief example on the layout of this file.", default=None, required=False)
277 parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line",
278 help="Adds content to the command line", default="", required=False)
279 parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination",
280 help="Specify the location of a datatypes_conf.xml to modify and add the registered "
281 "data types. If the provided destination does not exist, a new file will be created.",
282 default=None, required=False)
283 parser.add_argument("-x", "--default-executable-path", dest="default_executable_path",
284 help="Use this executable path when <executablePath> is not present in the CTD",
285 default=None, required=False)
286 parser.add_argument("-b", "--blacklist-parameters", dest="blacklisted_parameters", default=[], nargs="+", action="append",
287 help="List of parameters that will be ignored and won't appear on the galaxy stub",
288 required=False)
289 parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False,
290 help="Default category to use for tools lacking a category when generating tool_conf.xml")
291 parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False,
292 help="Specify the location of an existing tool_conf.xml that will be modified to include "
293 "the converted tools. If the provided destination does not exist, a new file will"
294 "be created.")
295 parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False,
296 help="The path that will be prepended to the file names when generating tool_conf.xml")
297 parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False,
298 help="Each line of the file will be interpreted as a tool name that needs translation. "
299 "Run with '-h' or '--help' to see a brief example on the format of this file.")
300 parser.add_argument("-s", "--skip-tools", dest="skip_tools_file", default=None, required=False,
301 help="File containing a list of tools for which a Galaxy stub will not be generated. "
302 "Run with '-h' or '--help' to see a brief example on the format of this file.")
303 parser.add_argument("-m", "--macros", dest="macros_files", default=[], nargs="+", action="append",
304 help="Import the additional given file(s) as macros. The macros stdio, requirements and"
305 "advanced_options are required. Please see sample_files/macros.xml for an example"
306 "of a valid macros file. All defined macros will be imported.",
307 required=True)
308 parser.add_argument("-p", "--hardcoded-parameters", dest="hardcoded_parameters", default=None, required=False,
309 help="File containing hardcoded values for the given parameters. Run with '-h' or '--help' "
310 "to see a brief example on the format of this file.")
311 # TODO: add verbosity, maybe?
312 parser.add_argument("-V", "--version", action='version', version=program_version_message)
313
314 # Process arguments
315 args = parser.parse_args()
316
317 # validate and prepare the passed arguments
318 validate_and_prepare_args(args)
319
320 # extract the names of the macros and check that we have found the ones we need
321 macros_file_names = args.macros_files
322 macros_to_expand = parse_macros_files(macros_file_names)
323
324 # parse the given supported file-formats file
325 supported_file_formats = parse_file_formats(args.formats_file)
326
327 # parse the hardcoded parameters file¬
328 parameter_hardcoder = parse_hardcoded_parameters(args.hardcoded_parameters)
329
330 # parse the skip/required tools files
331 skip_tools = parse_tools_list_file(args.skip_tools_file)
332 required_tools = parse_tools_list_file(args.required_tools_file)
333
334 #if verbose > 0:
335 # print("Verbose mode on")
336 parsed_models = convert(args.input_files,
337 args.output_destination,
338 supported_file_formats=supported_file_formats,
339 default_executable_path=args.default_executable_path,
340 add_to_command_line=args.add_to_command_line,
341 blacklisted_parameters=args.blacklisted_parameters,
342 required_tools=required_tools,
343 skip_tools=skip_tools,
344 macros_file_names=macros_file_names,
345 macros_to_expand=macros_to_expand,
346 parameter_hardcoder=parameter_hardcoder)
347
348 #TODO: add some sort of warning if a macro that doesn't exist is to be expanded
349
350 # it is not needed to copy the macros files, since the user has provided them
351
352 # generation of galaxy stubs is ready... now, let's see if we need to generate a tool_conf.xml
353 if args.tool_conf_destination is not None:
354 generate_tool_conf(parsed_models, args.tool_conf_destination,
355 args.galaxy_tool_path, args.default_category)
356
357 # now datatypes_conf.xml
358 if args.data_types_destination is not None:
359 generate_data_type_conf(supported_file_formats, args.data_types_destination)
360
361 return 0
362
363 except KeyboardInterrupt:
364 # handle keyboard interrupt
365 return 0
366 except ApplicationException, e:
367 error("CTD2Galaxy could not complete the requested operation.", 0)
368 error("Reason: " + e.msg, 0)
369 return 1
370 except ModelError, e:
371 error("There seems to be a problem with one of your input CTDs.", 0)
372 error("Reason: " + e.msg, 0)
373 return 1
374 except Exception, e:
375 traceback.print_exc()
376 return 2
377
378
379 def parse_tools_list_file(tools_list_file):
380 tools_list = None
381 if tools_list_file is not None:
382 tools_list = []
383 with open(tools_list_file) as f:
384 for line in f:
385 if line is None or not line.strip() or line.strip().startswith("#"):
386 continue
387 else:
388 tools_list.append(line.strip())
389
390 return tools_list
391
392
393 def parse_macros_files(macros_file_names):
394 macros_to_expand = set()
395
396 for macros_file_name in macros_file_names:
397 try:
398 macros_file = open(macros_file_name)
399 root = parse(macros_file).getroot()
400 for xml_element in root.findall("xml"):
401 name = xml_element.attrib["name"]
402 if name in macros_to_expand:
403 warning("Macro %s has already been found. Duplicate found in file %s." %
404 (name, macros_file_name), 0)
405 else:
406 macros_to_expand.add(name)
407 except ParseError, e:
408 raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " +
409 str(e))
410 except IOError, e:
411 raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " +
412 str(e))
413
414 # we depend on "stdio", "requirements" and "advanced_options" to exist on all the given macros files
415 missing_needed_macros = []
416 for required_macro in REQUIRED_MACROS:
417 if required_macro not in macros_to_expand:
418 missing_needed_macros.append(required_macro)
419
420 if missing_needed_macros:
421 raise ApplicationException(
422 "The following required macro(s) were not found in any of the given macros files: %s, "
423 "see sample_files/macros.xml for an example of a valid macros file."
424 % ", ".join(missing_needed_macros))
425
426 # we do not need to "expand" the advanced_options macro
427 macros_to_expand.remove(ADVANCED_OPTIONS_MACRO_NAME)
428 return macros_to_expand
429
430 def parse_hardcoded_parameters(hardcoded_parameters_file):
431 parameter_hardcoder = ParameterHardcoder()
432 if hardcoded_parameters_file is not None:
433 line_number = 0
434 with open(hardcoded_parameters_file) as f:
435 for line in f:
436 line_number += 1
437 if line is None or not line.strip() or line.strip().startswith("#"):
438 pass
439 else:
440 # the third column must not be obtained as a whole, and not split
441 parsed_hardcoded_parameter = line.strip().split(None, 2)
442 # valid lines contain two or three columns
443 if len(parsed_hardcoded_parameter) != 2 and len(parsed_hardcoded_parameter) != 3:
444 warning("Invalid line at line number %d of the given hardcoded parameters file. Line will be"
445 "ignored:\n%s" % (line_number, line), 0)
446 continue
447
448 parameter_name = parsed_hardcoded_parameter[0]
449 hardcoded_value = parsed_hardcoded_parameter[1]
450 tool_names = None
451 if len(parsed_hardcoded_parameter) == 3:
452 tool_names = parsed_hardcoded_parameter[2].split(',')
453 if tool_names:
454 for tool_name in tool_names:
455 parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name.strip())
456 else:
457 parameter_hardcoder.register_parameter(parameter_name, hardcoded_value)
458
459 return parameter_hardcoder
460
461
462 def parse_file_formats(formats_file):
463 supported_formats = {}
464 if formats_file is not None:
465 line_number = 0
466 with open(formats_file) as f:
467 for line in f:
468 line_number += 1
469 if line is None or not line.strip() or line.strip().startswith("#"):
470 # ignore (it'd be weird to have something like:
471 # if line is not None and not (not line.strip()) ...
472 pass
473 else:
474 # not an empty line, no comment
475 # strip the line and split by whitespace
476 parsed_formats = line.strip().split()
477 # valid lines contain either one or four columns
478 if not (len(parsed_formats) == 1 or len(parsed_formats) == 3 or len(parsed_formats) == 4):
479 warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" %
480 (line_number, line), 0)
481 # ignore the line
482 continue
483 elif len(parsed_formats) == 1:
484 supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[0])
485 else:
486 mimetype = None
487 # check if mimetype was provided
488 if len(parsed_formats) == 4:
489 mimetype = parsed_formats[3]
490 supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[1],
491 parsed_formats[2], mimetype)
492 return supported_formats
493
494
495 def validate_and_prepare_args(args):
496 # check that only one of skip_tools_file and required_tools_file has been provided
497 if args.skip_tools_file is not None and args.required_tools_file is not None:
498 raise ApplicationException(
499 "You have provided both a file with tools to ignore and a file with required tools.\n"
500 "Only one of -s/--skip-tools, -r/--required-tools can be provided.")
501
502 # first, we convert all list of lists in args to flat lists
503 lists_to_flatten = ["input_files", "blacklisted_parameters", "macros_files"]
504 for list_to_flatten in lists_to_flatten:
505 setattr(args, list_to_flatten, [item for sub_list in getattr(args, list_to_flatten) for item in sub_list])
506
507 # if input is a single file, we expect output to be a file (and not a dir that already exists)
508 if len(args.input_files) == 1:
509 if os.path.isdir(args.output_destination):
510 raise ApplicationException("If a single input file is provided, output (%s) is expected to be a file "
511 "and not a folder.\n" % args.output_destination)
512
513 # if input is a list of files, we expect output to be a folder
514 if len(args.input_files) > 1:
515 if not os.path.isdir(args.output_destination):
516 raise ApplicationException("If several input files are provided, output (%s) is expected to be an "
517 "existing directory.\n" % args.output_destination)
518
519 # check that the provided input files, if provided, contain a valid file path
520 input_variables_to_check = ["skip_tools_file", "required_tools_file", "macros_files",
521 "input_files", "formats_file", "hardcoded_parameters"]
522
523 for variable_name in input_variables_to_check:
524 paths_to_check = []
525 # check if we are handling a single file or a list of files
526 member_value = getattr(args, variable_name)
527 if member_value is not None:
528 if isinstance(member_value, list):
529 for file_name in member_value:
530 paths_to_check.append(strip(str(file_name)))
531 else:
532 paths_to_check.append(strip(str(member_value)))
533
534 for path_to_check in paths_to_check:
535 if not os.path.isfile(path_to_check) or not os.path.exists(path_to_check):
536 raise ApplicationException(
537 "The provided input file (%s) does not exist or is not a valid file path."
538 % path_to_check)
539
540 # check that the provided output files, if provided, contain a valid file path (i.e., not a folder)
541 output_variables_to_check = ["data_types_destination", "tool_conf_destination"]
542
543 for variable_name in output_variables_to_check:
544 file_name = getattr(args, variable_name)
545 if file_name is not None and os.path.isdir(file_name):
546 raise ApplicationException("The provided output file name (%s) points to a directory." % file_name)
547
548
549 def convert(input_files, output_destination, **kwargs):
550 # first, generate a model
551 is_converting_multiple_ctds = len(input_files) > 1
552 parsed_models = []
553 for input_file in input_files:
554 try:
555 model = CTDModel(from_file=input_file)
556 except Exception, e:
557 error(str(e), 1)
558 continue
559
560 if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]:
561 info("Skipping tool %s" % model.name, 0)
562 continue
563 elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]:
564 info("Tool %s is not required, skipping it" % model.name, 0)
565 continue
566 else:
567 info("Converting from %s " % input_file, 0)
568 tool = create_tool(model)
569 write_header(tool, model)
570 create_description(tool, model)
571 expand_macros(tool, model, **kwargs)
572 create_command(tool, model, **kwargs)
573 create_inputs(tool, model, **kwargs)
574 create_outputs(tool, model, **kwargs)
575 create_help(tool, model)
576
577 # finally, serialize the tool
578 output_file = output_destination
579 # if multiple inputs are being converted,
580 # then we need to generate a different output_file for each input
581 if is_converting_multiple_ctds:
582 output_file = os.path.join(output_file, get_filename_without_suffix(input_file) + ".xml")
583 # wrap our tool element into a tree to be able to serialize it
584 tree = ElementTree(tool)
585 tree.write(open(output_file, 'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
586 # let's use model to hold the name of the output file
587 parsed_models.append([model, get_filename(output_file)])
588
589 return parsed_models
590
591
592 def write_header(tool, model):
593 tool.addprevious(etree.Comment(
594 "This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). "
595 "This file was automatically generated using CTD2Galaxy."))
596 tool.addprevious(etree.Comment('Proposed Tool Section: [%s]' % model.opt_attribs.get("category", "")))
597
598
599 def generate_tool_conf(parsed_models, tool_conf_destination, galaxy_tool_path, default_category):
600 # for each category, we keep a list of models corresponding to it
601 categories_to_tools = dict()
602 for model in parsed_models:
603 category = strip(model[0].opt_attribs.get("category", ""))
604 if not category.strip():
605 category = default_category
606 if category not in categories_to_tools:
607 categories_to_tools[category] = []
608 categories_to_tools[category].append(model[1])
609
610 # at this point, we should have a map for all categories->tools
611 toolbox_node = Element("toolbox")
612
613 if galaxy_tool_path is not None and not galaxy_tool_path.strip().endswith("/"):
614 galaxy_tool_path = galaxy_tool_path.strip() + "/"
615 if galaxy_tool_path is None:
616 galaxy_tool_path = ""
617
618 for category, file_names in categories_to_tools.iteritems():
619 section_node = add_child_node(toolbox_node, "section")
620 section_node.attrib["id"] = "section-id-" + "".join(category.split())
621 section_node.attrib["name"] = category
622
623 for filename in file_names:
624 tool_node = add_child_node(section_node, "tool")
625 tool_node.attrib["file"] = galaxy_tool_path + filename
626
627 toolconf_tree = ElementTree(toolbox_node)
628 toolconf_tree.write(open(tool_conf_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
629 info("Generated Galaxy tool_conf.xml in %s" % tool_conf_destination, 0)
630
631
632 def generate_data_type_conf(supported_file_formats, data_types_destination):
633 data_types_node = Element("datatypes")
634 registration_node = add_child_node(data_types_node, "registration")
635 registration_node.attrib["converters_path"] = "lib/galaxy/datatypes/converters"
636 registration_node.attrib["display_path"] = "display_applications"
637
638 for format_name in supported_file_formats:
639 data_type = supported_file_formats[format_name]
640 # add only if it's a data type that does not exist in Galaxy
641 if data_type.galaxy_type is not None:
642 data_type_node = add_child_node(registration_node, "datatype")
643 # we know galaxy_extension is not None
644 data_type_node.attrib["extension"] = data_type.galaxy_extension
645 data_type_node.attrib["type"] = data_type.galaxy_type
646 if data_type.mimetype is not None:
647 data_type_node.attrib["mimetype"] = data_type.mimetype
648
649 data_types_tree = ElementTree(data_types_node)
650 data_types_tree.write(open(data_types_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
651 info("Generated Galaxy datatypes_conf.xml in %s" % data_types_destination, 0)
652
653
654 # taken from
655 # http://stackoverflow.com/questions/8384737/python-extract-file-name-from-path-no-matter-what-the-os-path-format
656 def get_filename(path):
657 head, tail = ntpath.split(path)
658 return tail or ntpath.basename(head)
659
660
661 def get_filename_without_suffix(path):
662 root, ext = os.path.splitext(os.path.basename(path))
663 return root
664
665
666 def create_tool(model):
667 return Element("tool", OrderedDict([("id", model.name), ("name", model.name), ("version", model.version)]))
668
669
670 def create_description(tool, model):
671 if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None:
672 description = SubElement(tool,"description")
673 description.text = model.opt_attribs["description"]
674
675
676 def get_param_name(param):
677 # we generate parameters with colons for subgroups, but not for the topmost parents (OpenMS legacy)
678 if type(param.parent) == ParameterGroup and param.parent.parent != None:
679 return get_param_name(param.parent) + ":" + resolve_param_mapping(param)
680 else:
681 return resolve_param_mapping(param)
682
683
684 # some parameters are mapped to command line options, this method helps resolve those mappings, if any
685 # TODO: implement mapping of parameters!!!
686 def resolve_param_mapping(param):
687 return param.name
688
689
690 def create_command(tool, model, **kwargs):
691 final_command = get_tool_executable_path(model, kwargs["default_executable_path"]) + '\n'
692 final_command += kwargs["add_to_command_line"] + '\n'
693 advanced_command_start = "#if $adv_opts.adv_opts_selector=='advanced':\n"
694 advanced_command_end = '#end if'
695 advanced_command = ''
696 parameter_hardcoder = kwargs["parameter_hardcoder"]
697
698 found_output_parameter = False
699 for param in extract_parameters(model):
700 if param.type is _OutFile:
701 found_output_parameter = True
702 command = ''
703 param_name = get_param_name(param)
704
705 if param.name in kwargs["blacklisted_parameters"]:
706 continue
707
708 hardcoded_value = parameter_hardcoder.get_hardcoded_value(param_name, model.name)
709 if hardcoded_value:
710 command += '-%s %s\n' % (param_name, hardcoded_value)
711 else:
712 # parameter is neither blacklisted nor hardcoded...
713 galaxy_parameter_name = get_galaxy_parameter_name(param)
714 repeat_galaxy_parameter_name = get_repeat_galaxy_parameter_name(param)
715
716 # logic for ITEMLISTs
717 if param.is_list:
718 if param.type is _InFile:
719 command += "-" + str(param_name) + "\n"
720 command += " #for token in $" + galaxy_parameter_name + ":\n"
721 command += " $token\n"
722 command += " #end for\n"
723 else:
724 command += "\n#if $" + repeat_galaxy_parameter_name + ":\n"
725 command += "-" + str(param_name) + "\n"
726 command += " #for token in $" + repeat_galaxy_parameter_name + ":\n"
727 command += " #if \" \" in str(token):\n"
728 command += " \"$token." + galaxy_parameter_name + "\"\n"
729 command += " #else\n"
730 command += " $token." + galaxy_parameter_name + "\n"
731 command += " #end if\n"
732 command += " #end for\n"
733 command += "#end if\n"
734 # logic for other ITEMs
735 else:
736 if param.advanced and param.type is not _OutFile:
737 actual_parameter = "$adv_opts.%s" % galaxy_parameter_name
738 else:
739 actual_parameter = "$%s" % galaxy_parameter_name
740 ## if whitespace_validation has been set, we need to generate, for each parameter:
741 ## #if str( $t ).split() != '':
742 ## -t "$t"
743 ## #end if
744 ## TODO only useful for text fields, integers or floats
745 ## not useful for choices, input fields ...
746
747 if not is_boolean_parameter(param) and type(param.restrictions) is _Choices :
748 command += "#if " + actual_parameter + ":\n"
749 command += ' -%s\n' % param_name
750 command += " #if \" \" in str(" + actual_parameter + "):\n"
751 command += " \"" + actual_parameter + "\"\n"
752 command += " #else\n"
753 command += " " + actual_parameter + "\n"
754 command += " #end if\n"
755 command += "#end if\n"
756 elif is_boolean_parameter(param):
757 command += "#if " + actual_parameter + ":\n"
758 command += ' -%s\n' % param_name
759 command += "#end if\n"
760 elif TYPE_TO_GALAXY_TYPE[param.type] is 'text':
761 command += "#if " + actual_parameter + ":\n"
762 command += " -%s " % param_name
763 command += " \"" + actual_parameter + "\"\n"
764 command += "#end if\n"
765 else:
766 command += "#if " + actual_parameter + ":\n"
767 command += ' -%s ' % param_name
768 command += actual_parameter + "\n"
769 command += "#end if\n"
770
771 if param.advanced and param.type is not _OutFile:
772 advanced_command += " %s" % command
773 else:
774 final_command += command
775
776 if advanced_command:
777 final_command += "%s%s%s\n" % (advanced_command_start, advanced_command, advanced_command_end)
778
779 if not found_output_parameter:
780 final_command += "> $param_stdout\n"
781
782 command_node = add_child_node(tool, "command")
783 command_node.text = final_command
784
785
786 # creates the xml elements needed to import the needed macros files
787 # and to "expand" the macros
788 def expand_macros(tool, model, **kwargs):
789 macros_node = add_child_node(tool, "macros")
790 token_node = add_child_node(macros_node, "token")
791 token_node.attrib["name"] = "@EXECUTABLE@"
792 token_node.text = get_tool_executable_path(model, kwargs["default_executable_path"])
793
794 # add <import> nodes
795 for macro_file_name in kwargs["macros_file_names"]:
796 macro_file = open(macro_file_name)
797 import_node = add_child_node(macros_node, "import")
798 # do not add the path of the file, rather, just its basename
799 import_node.text = os.path.basename(macro_file.name)
800
801 # add <expand> nodes
802 for expand_macro in kwargs["macros_to_expand"]:
803 expand_node = add_child_node(tool, "expand")
804 expand_node.attrib["macro"] = expand_macro
805
806
807 def get_tool_executable_path(model, default_executable_path):
808 # rules to build the galaxy executable path:
809 # if executablePath is null, then use default_executable_path and store it in executablePath
810 # if executablePath is null and executableName is null, then the name of the tool will be used
811 # if executablePath is null and executableName is not null, then executableName will be used
812 # if executablePath is not null and executableName is null,
813 # then executablePath and the name of the tool will be used
814 # if executablePath is not null and executableName is not null, then both will be used
815
816 # first, check if the model has executablePath / executableName defined
817 executable_path = model.opt_attribs.get("executablePath", None)
818 executable_name = model.opt_attribs.get("executableName", None)
819
820 # check if we need to use the default_executable_path
821 if executable_path is None:
822 executable_path = default_executable_path
823
824 # fix the executablePath to make sure that there is a '/' in the end
825 if executable_path is not None:
826 executable_path = executable_path.strip()
827 if not executable_path.endswith('/'):
828 executable_path += '/'
829
830 # assume that we have all information present
831 command = str(executable_path) + str(executable_name)
832 if executable_path is None:
833 if executable_name is None:
834 command = model.name
835 else:
836 command = executable_name
837 else:
838 if executable_name is None:
839 command = executable_path + model.name
840 return command
841
842
843 def get_galaxy_parameter_name(param):
844 return "param_%s" % get_param_name(param).replace(':', '_').replace('-', '_')
845
846
847 def get_input_with_same_restrictions(out_param, model, supported_file_formats):
848 for param in extract_parameters(model):
849 if param.type is _InFile:
850 if param.restrictions is not None:
851 in_param_formats = get_supported_file_types(param.restrictions.formats, supported_file_formats)
852 out_param_formats = get_supported_file_types(out_param.restrictions.formats, supported_file_formats)
853 if in_param_formats == out_param_formats:
854 return param
855
856
857 def create_inputs(tool, model, **kwargs):
858 inputs_node = SubElement(tool, "inputs")
859
860 # some suites (such as OpenMS) need some advanced options when handling inputs
861 expand_advanced_node = add_child_node(tool, "expand", OrderedDict([("macro", ADVANCED_OPTIONS_MACRO_NAME)]))
862 parameter_hardcoder = kwargs["parameter_hardcoder"]
863
864 # treat all non output-file parameters as inputs
865 for param in extract_parameters(model):
866 # no need to show hardcoded parameters
867 hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name)
868 if param.name in kwargs["blacklisted_parameters"] or hardcoded_value:
869 # let's not use an extra level of indentation and use NOP
870 continue
871 if param.type is not _OutFile:
872 if param.advanced:
873 if expand_advanced_node is not None:
874 parent_node = expand_advanced_node
875 else:
876 # something went wrong... we are handling an advanced parameter and the
877 # advanced input macro was not set... inform the user about it
878 info("The parameter %s has been set as advanced, but advanced_input_macro has "
879 "not been set." % param.name, 1)
880 # there is not much we can do, other than use the inputs_node as a parent node!
881 parent_node = inputs_node
882 else:
883 parent_node = inputs_node
884
885 # for lists we need a repeat tag
886 if param.is_list and param.type is not _InFile:
887 rep_node = add_child_node(parent_node, "repeat")
888 create_repeat_attribute_list(rep_node, param)
889 parent_node = rep_node
890
891 param_node = add_child_node(parent_node, "param")
892 create_param_attribute_list(param_node, param, kwargs["supported_file_formats"])
893
894 # advanced parameter selection should be at the end
895 # and only available if an advanced parameter exists
896 if expand_advanced_node is not None and len(expand_advanced_node) > 0:
897 inputs_node.append(expand_advanced_node)
898
899
900 def get_repeat_galaxy_parameter_name(param):
901 return "rep_" + get_galaxy_parameter_name(param)
902
903
904 def create_repeat_attribute_list(rep_node, param):
905 rep_node.attrib["name"] = get_repeat_galaxy_parameter_name(param)
906 if param.required:
907 rep_node.attrib["min"] = "1"
908 else:
909 rep_node.attrib["min"] = "0"
910 # for the ITEMLISTs which have LISTITEM children we only
911 # need one parameter as it is given as a string
912 if param.default is not None:
913 rep_node.attrib["max"] = "1"
914 rep_node.attrib["title"] = get_galaxy_parameter_name(param)
915
916
917 def create_param_attribute_list(param_node, param, supported_file_formats):
918 param_node.attrib["name"] = get_galaxy_parameter_name(param)
919
920 param_type = TYPE_TO_GALAXY_TYPE[param.type]
921 if param_type is None:
922 raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s"
923 % {"type": param.type, "name": param.name})
924
925 if param.is_list:
926 param_type = "text"
927
928 if is_selection_parameter(param):
929 param_type = "select"
930
931 if is_boolean_parameter(param):
932 param_type = "boolean"
933
934 if param.type is _InFile:
935 # assume it's just text unless restrictions are provided
936 param_format = "text"
937 if param.restrictions is not None:
938 # join all supported_formats for the file... this MUST be a _FileFormat
939 if type(param.restrictions) is _FileFormat:
940 param_format = ','.join(get_supported_file_types(param.restrictions.formats, supported_file_formats))
941 else:
942 raise InvalidModelException("Expected 'file type' restrictions for input file [%(name)s], "
943 "but instead got [%(type)s]"
944 % {"name": param.name, "type": type(param.restrictions)})
945 param_node.attrib["type"] = "data"
946 param_node.attrib["format"] = param_format
947 # in the case of multiple input set multiple flag
948 if param.is_list:
949 param_node.attrib["multiple"] = "true"
950
951 else:
952 param_node.attrib["type"] = param_type
953
954 # check for parameters with restricted values (which will correspond to a "select" in galaxy)
955 if param.restrictions is not None:
956 # it could be either _Choices or _NumericRange, with special case for boolean types
957 if param_type == "boolean":
958 create_boolean_parameter(param_node, param)
959 elif type(param.restrictions) is _Choices:
960 # create as many <option> elements as restriction values
961 for choice in param.restrictions.choices:
962 option_node = add_child_node(param_node, "option", OrderedDict([("value", str(choice))]))
963 option_node.text = str(choice)
964
965 elif type(param.restrictions) is _NumericRange:
966 if param.type is not int and param.type is not float:
967 raise InvalidModelException("Expected either 'int' or 'float' in the numeric range restriction for "
968 "parameter [%(name)s], but instead got [%(type)s]" %
969 {"name": param.name, "type": type(param.restrictions)})
970 # extract the min and max values and add them as attributes
971 # validate the provided min and max values
972 if param.restrictions.n_min is not None:
973 param_node.attrib["min"] = str(param.restrictions.n_min)
974 if param.restrictions.n_max is not None:
975 param_node.attrib["max"] = str(param.restrictions.n_max)
976 elif type(param.restrictions) is _FileFormat:
977 param_node.attrib["format"] = ",".join(
978 get_supported_file_types(param.restrictions.formats, supported_file_formats))
979 else:
980 raise InvalidModelException("Unrecognized restriction type [%(type)s] for parameter [%(name)s]"
981 % {"type": type(param.restrictions), "name": param.name})
982
983 param_node.attrib["optional"] = str(not param.required)
984
985 if param_type == "text":
986 # add size attribute... this is the length of a textbox field in Galaxy (it could also be 15x2, for instance)
987 param_node.attrib["size"] = "30"
988 # add sanitizer nodes, this is needed for special character like "["
989 # which are used for example by FeatureFinderMultiplex
990 sanitizer_node = SubElement(param_node, "sanitizer")
991
992 valid_node = SubElement(sanitizer_node, "valid", OrderedDict([("initial", "string.printable")]))
993 add_child_node(valid_node, "remove", OrderedDict([("value", '\'')]))
994 add_child_node(valid_node, "remove", OrderedDict([("value", '"')]))
995
996 # check for default value
997 if param.default is not None:
998 if type(param.default) is list:
999 # we ASSUME that a list of parameters looks like:
1000 # $ tool -ignore He Ar Xe
1001 # meaning, that, for example, Helium, Argon and Xenon will be ignored
1002 param_node.attrib["value"] = ' '.join(map(str, param.default))
1003
1004 elif param_type != "boolean":
1005 # boolean parameters handle default values by using the "checked" attribute
1006 # there isn't much we can do... just stringify the value
1007 param_node.attrib["value"] = str(param.default)
1008 else:
1009 if param.type is int or param.type is float:
1010 # galaxy requires "value" to be included for int/float
1011 # since no default was included, we need to figure out one in a clever way... but let the user know
1012 # that we are "thinking" for him/her
1013 warning("Generating default value for parameter [%s]. "
1014 "Galaxy requires the attribute 'value' to be set for integer/floats. "
1015 "Edit the CTD file and provide a suitable default value." % param.name, 1)
1016 # check if there's a min/max and try to use them
1017 default_value = None
1018 if param.restrictions is not None:
1019 if type(param.restrictions) is _NumericRange:
1020 default_value = param.restrictions.n_min
1021 if default_value is None:
1022 default_value = param.restrictions.n_max
1023 if default_value is None:
1024 # no min/max provided... just use 0 and see what happens
1025 default_value = 0
1026 else:
1027 # should never be here, since we have validated this anyway...
1028 # this code is here just for documentation purposes
1029 # however, better safe than sorry!
1030 # (it could be that the code changes and then we have an ugly scenario)
1031 raise InvalidModelException("Expected either a numeric range for parameter [%(name)s], "
1032 "but instead got [%(type)s]"
1033 % {"name": param.name, "type": type(param.restrictions)})
1034 else:
1035 # no restrictions and no default value provided...
1036 # make up something
1037 default_value = 0
1038 param_node.attrib["value"] = str(default_value)
1039
1040 label = "%s parameter" % param.name
1041 help_text = ""
1042
1043 if param.description is not None:
1044 label, help_text = generate_label_and_help(param.description)
1045
1046 param_node.attrib["label"] = label
1047 param_node.attrib["help"] = "(-%s)" % param.name + " " + help_text
1048
1049
1050 def generate_label_and_help(desc):
1051 label = ""
1052 help_text = ""
1053 # This tag is found in some descriptions
1054 desc = str(desc).replace("#br#", " <br>")
1055 # Get rid of dots in the end
1056 if desc.endswith("."):
1057 desc = desc.rstrip(".")
1058 # Check if first word is a normal word and make it uppercase
1059 if str(desc).find(" ") > -1:
1060 first_word, rest = str(desc).split(" ", 1)
1061 if str(first_word).islower():
1062 # check if label has a quotient of the form a/b
1063 if first_word.find("/") != 1 :
1064 first_word.capitalize()
1065 desc = first_word + " " + rest
1066 label = desc
1067
1068 # Try to split the label if it is too long
1069 if len(desc) > 50:
1070 # find an example and put everything before in the label and the e.g. in the help
1071 if desc.find("e.g.") > 1 :
1072 label, help_text = desc.split("e.g.",1)
1073 help_text = "e.g." + help_text
1074 else:
1075 # find the end of the first sentence
1076 # look for ". " because some labels contain .file or something similar
1077 delimiter = ""
1078 if desc.find(". ") > 1 and desc.find("? ") > 1:
1079 if desc.find(". ") < desc.find("? "):
1080 delimiter = ". "
1081 else:
1082 delimiter = "? "
1083 elif desc.find(". ") > 1:
1084 delimiter = ". "
1085 elif desc.find("? ") > 1:
1086 delimiter = "? "
1087 if delimiter != "":
1088 label, help_text = desc.split(delimiter, 1)
1089
1090 # add the question mark back
1091 if delimiter == "? ":
1092 label += "? "
1093
1094 # remove all linebreaks
1095 label = label.rstrip().rstrip('<br>').rstrip()
1096 return label, help_text
1097
1098
1099 def get_indented_text(text, indentation_level):
1100 return ("%(indentation)s%(text)s" %
1101 {"indentation": " " * (MESSAGE_INDENTATION_INCREMENT * indentation_level),
1102 "text": text})
1103
1104
1105 def warning(warning_text, indentation_level):
1106 sys.stdout.write(get_indented_text("WARNING: %s\n" % warning_text, indentation_level))
1107
1108
1109 def error(error_text, indentation_level):
1110 sys.stderr.write(get_indented_text("ERROR: %s\n" % error_text, indentation_level))
1111
1112
1113 def info(info_text, indentation_level):
1114 sys.stdout.write(get_indented_text("INFO: %s\n" % info_text, indentation_level))
1115
1116
1117 # determines if the given choices are boolean (basically, if the possible values are yes/no, true/false)
1118 def is_boolean_parameter(param):
1119 is_choices = False
1120 if type(param.restrictions) is _Choices:
1121 # for a true boolean experience, we need 2 values
1122 # and also that those two values are either yes/no or true/false
1123 if len(param.restrictions.choices) == 2:
1124 choices = get_lowercase_list(param.restrictions.choices)
1125 if ("yes" in choices and "no" in choices) or ("true" in choices and "false" in choices):
1126 is_choices = True
1127 return is_choices
1128
1129
1130 # determines if there are choices for the parameter
1131 def is_selection_parameter(param):
1132 return type(param.restrictions) is _Choices
1133
1134
1135 def get_lowercase_list(some_list):
1136 lowercase_list = map(str, some_list)
1137 lowercase_list = map(string.lower, lowercase_list)
1138 lowercase_list = map(strip, lowercase_list)
1139 return lowercase_list
1140
1141
1142 # creates a galaxy boolean parameter type
1143 # this method assumes that param has restrictions, and that only two restictions are present
1144 # (either yes/no or true/false)
1145 def create_boolean_parameter(param_node, param):
1146 # first, determine the 'truevalue' and the 'falsevalue'
1147 """TODO: true and false values can be way more than 'true' and 'false'
1148 but for that we need CTD support
1149 """
1150 # by default, 'true' and 'false' are handled as flags, like the verbose flag (i.e., -v)
1151 true_value = "-%s" % get_param_name(param)
1152 false_value = ""
1153 choices = get_lowercase_list(param.restrictions.choices)
1154 if "yes" in choices:
1155 true_value = "yes"
1156 false_value = "no"
1157 param_node.attrib["truevalue"] = true_value
1158 param_node.attrib["falsevalue"] = false_value
1159
1160 # set the checked attribute
1161 if param.default is not None:
1162 checked_value = "false"
1163 default = strip(string.lower(param.default))
1164 if default == "yes" or default == "true":
1165 checked_value = "true"
1166 #attribute_list["checked"] = checked_value
1167 param_node.attrib["checked"] = checked_value
1168
1169
1170 def create_outputs(parent, model, **kwargs):
1171 outputs_node = add_child_node(parent, "outputs")
1172 parameter_hardcoder = kwargs["parameter_hardcoder"]
1173
1174 for param in extract_parameters(model):
1175
1176 # no need to show hardcoded parameters
1177 hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name)
1178 if param.name in kwargs["blacklisted_parameters"] or hardcoded_value:
1179 # let's not use an extra level of indentation and use NOP
1180 continue
1181 if param.type is _OutFile:
1182 create_output_node(outputs_node, param, model, kwargs["supported_file_formats"])
1183
1184 # If there are no outputs defined in the ctd the node will have no children
1185 # and the stdout will be used as output
1186 if len(outputs_node) == 0:
1187 add_child_node(outputs_node, "data",
1188 OrderedDict([("name", "param_stdout"), ("format", "text"), ("label", "Output from stdout")]))
1189
1190
1191 def create_output_node(parent, param, model, supported_file_formats):
1192 data_node = add_child_node(parent, "data")
1193 data_node.attrib["name"] = get_galaxy_parameter_name(param)
1194
1195 data_format = "data"
1196 if param.restrictions is not None:
1197 if type(param.restrictions) is _FileFormat:
1198 # set the first data output node to the first file format
1199
1200 # check if there are formats that have not been registered yet...
1201 output = ""
1202 for format_name in param.restrictions.formats:
1203 if not format_name in supported_file_formats.keys():
1204 output += " " + str(format_name)
1205
1206 # warn only if there's about to complain
1207 if output:
1208 warning("Parameter " + param.name + " has the following unsupported format(s):" + output, 1)
1209
1210 formats = get_supported_file_types(param.restrictions.formats, supported_file_formats)
1211 try:
1212 data_format = formats.pop()
1213 except KeyError:
1214 # there is not much we can do, other than catching the exception
1215 pass
1216 # if there are more than one output file formats try to take the format from the input parameter
1217 if formats:
1218 corresponding_input = get_input_with_same_restrictions(param, model, supported_file_formats)
1219 if corresponding_input is not None:
1220 data_format = "input"
1221 data_node.attrib["metadata_source"] = get_galaxy_parameter_name(corresponding_input)
1222 else:
1223 raise InvalidModelException("Unrecognized restriction type [%(type)s] "
1224 "for output [%(name)s]" % {"type": type(param.restrictions),
1225 "name": param.name})
1226 data_node.attrib["format"] = data_format
1227
1228 #TODO: find a smarter label ?
1229 #if param.description is not None:
1230 # data_node.setAttribute("label", param.description)
1231 return data_node
1232
1233
1234 def get_supported_file_types(formats, supported_file_formats):
1235 return set([supported_file_formats.get(format_name, DataType(format_name, format_name)).galaxy_extension
1236 for format_name in formats if format_name in supported_file_formats.keys()])
1237
1238
1239 def create_change_format_node(parent, data_formats, input_ref):
1240 # <change_format>
1241 # <when input="secondary_structure" value="true" format="text"/>
1242 # </change_format>
1243 change_format_node = add_child_node(parent, "change_format")
1244 for data_format in data_formats:
1245 add_child_node(change_format_node, "when",
1246 OrderedDict([("input", input_ref), ("value", data_format), ("format", data_format)]))
1247
1248
1249 # Shows basic information about the file, such as data ranges and file type.
1250 def create_help(tool, model):
1251 manual = ''
1252 doc_url = None
1253 if 'manual' in model.opt_attribs.keys():
1254 manual += '%s\n\n' % model.opt_attribs["manual"]
1255 if 'docurl' in model.opt_attribs.keys():
1256 doc_url = model.opt_attribs["docurl"]
1257
1258 help_text = "No help available"
1259 if manual is not None:
1260 help_text = manual
1261 if doc_url is not None:
1262 help_text = ("" if manual is None else manual) + "\nFor more information, visit %s" % doc_url
1263 help_node = add_child_node(tool, "help")
1264 # TODO: do we need CDATA Section here?
1265 help_node.text = help_text
1266
1267
1268 # since a model might contain several ParameterGroup elements,
1269 # we want to simply 'flatten' the parameters to generate the Galaxy wrapper
1270 def extract_parameters(model):
1271 parameters = []
1272 if len(model.parameters.parameters) > 0:
1273 # use this to put parameters that are to be processed
1274 # we know that CTDModel has one parent ParameterGroup
1275 pending = [model.parameters]
1276 while len(pending) > 0:
1277 # take one element from 'pending'
1278 parameter = pending.pop()
1279 if type(parameter) is not ParameterGroup:
1280 parameters.append(parameter)
1281 else:
1282 # append the first-level children of this ParameterGroup
1283 pending.extend(parameter.parameters.values())
1284 # returned the reversed list of parameters (as it is now,
1285 # we have the last parameter in the CTD as first in the list)
1286 return reversed(parameters)
1287
1288
1289 # adds and returns a child node using the given name to the given parent node
1290 def add_child_node(parent_node, child_node_name, attributes=OrderedDict([])):
1291 child_node = SubElement(parent_node, child_node_name, attributes)
1292 return child_node
1293
1294
1295 if __name__ == "__main__":
1296 sys.exit(main())
22
33 Given one or more CTD files, `CTD2Galaxy` generates the needed Galaxy wrappers to include them in a Galaxy instance.
44
5 ## How to install
5 ## Dependencies
6
7 `CTD2Galaxy` has the following python dependencies:
8
9 1. `lxml`.
10 1. [CTDopts]
11
12 You can install the [CTDopts] and `lxml` modules via `conda`, like so:
13
14 ```sh
15 $ conda install lxml
16 $ conda install -c workflowconversion ctdopts
17 ```
18
19 Note that the [CTDopts] module is available on the `workflowconversion` channel.
20
21 Of course, you can just download [CTDopts] and make it available through your `PYTHONPATH` environment variable. To get more information about how to install python modules, visit: https://docs.python.org/2/install/.
22
23
24 ## How to install CTD2Galaxy
625
726 1. Download the source code from https://github.com/genericworkflownodes/CTD2Galaxy.
8 2. Download CTDopts from https://github.com/genericworkflownodes/CTDopts.
9 3. You can install the `CTDopts` and `CTD2Galaxy` modules, or just make them available through your `PYTHONPATH` environment variable. To get more information about how to install python modules, visit: https://docs.python.org/2/install/.
1027
1128 ## How to use: most common tasks
1229
1330 The generator takes several parameters and a varying number of inputs and outputs. The following sub-sections show how to perform the most common operations.
1431
1532 Running the generator with the `-h/--help` parameter will print extended information about each of the parameters.
33
34 ### Macros
35
36 Galaxy supports the use of macros via a `macros.xml` file (`CTD2Galaxy` provides a sample macros file in `supported_formats/macros.xml`). Instead of repeating sections, macros can be used and expanded. If you want fine control over the macros, you can use the `-m` / `--macros` parameter to provide your own macros file.
37
38 Please note that the used macros file must be copied to your Galaxy installation on the same location in which you place the generated *ToolConfig* files.
1639
1740 ### One input, one output
1841
7194
7295 Any of the following invocations will convert `/data/input_one.ctd` and `/data/input_two.ctd`:
7396
74 $ python generator.py -i /data/input_one.ctd -i /data/input_two.ctd -o /data/generated
97 $ python generator.py -i /data/input_one.ctd -i /data/input_two.ctd -o /data/generated
7598 $ python generator.py -i /data/input_one.ctd /data/input_two.ctd -o /data/generated
7699 $ python generator.py --input /data/input_one.ctd /data/input_two.ctd -o /data/generated
77 $ python generator.py --input /data/input_one.ctd --input /data/input_two.ctd -o /data/generated
100 $ python generator.py --input /data/input_one.ctd --input /data/input_two.ctd -o /data/generated
78101
79102 The following invocation will convert `/data/input.ctd` into `/data/output.xml`:
80103
81 $ python generator.py -i /data/input.ctd -o /data/output.xml
104 $ python generator.py -i /data/input.ctd -o /data/output.xml -m sample_files/macros.xml
82105
83106 Of course, you can also use wildcards, which will be automatically expanded by any modern operating system. This is extremely useful if you want to convert several files at a time. Imagine that the folder `/data/ctds` contains three files, `input_one.ctd`, `input_two.ctd` and `input_three.ctd`. The following two invocations will produce the same output in the `/data/galaxy`:
84107
232255
233256 * Purpose: Include external macros files.
234257 * Short/long version: `-m` / `--macros`
235 * Required: yes.
258 * Required: no.
259 * Default: `macros.xml`
236260 * Taken values: List of paths of macros files to include.
237261
238262 *ToolConfig* supports elaborate sections such as `<stdio>`, `<requirements>`, etc., that are identical across tools of the same suite. Macros files assist in the task of including external xml sections into *ToolConfig* files. For more information about the syntax of macros files, see: https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#Reusing_Repeated_Configuration_Elements
239263
240 There are some macros that are required, namely `stdio`, `requirements` and `advanced_options`. A template macro file is included under `support_files/macros.xml`. Although this is a required file, it can be edited to suit your needs and you could add extra macros or leave it as it is and include additional files.
264 There are some macros that are required, namely `stdio`, `requirements` and `advanced_options`. A template macro file is included in [macros.xml]. It can be edited to suit your needs and you could add extra macros or leave it as it is and include additional files.
241265
242266 Every macro found in the included files and in `support_files/macros.xml` will be expanded. Users are responsible for copying the given macros files in their corresponding galaxy folders.
243267
320344 * MapAlignerPoseClustering
321345 * MapAlignerSpectrum
322346 * MapAlignerRTTransformer
347
348 [CTDopts]: https://github.com/genericworkflownodes/CTDopts.
349 [macros.xml]: https://github.com/WorkflowConversion/CTD2Galaxy/blob/master/macros.xml
0 #!/usr/bin/env python
1 # encoding: utf-8
2
3 """
4 @author: delagarza
5 """
6
7
8 import sys
9 import os
10 import traceback
11 import ntpath
12 import string
13
14 from argparse import ArgumentParser
15 from argparse import RawDescriptionHelpFormatter
16 from CTDopts.CTDopts import CTDModel, _InFile, _OutFile, ParameterGroup, _Choices, _NumericRange, \
17 _FileFormat, ModelError
18 from collections import OrderedDict
19 from string import strip
20 from lxml import etree
21 from lxml.etree import SubElement, Element, ElementTree, ParseError, parse
22
23 __all__ = []
24 __version__ = 1.0
25 __date__ = '2014-09-17'
26 __updated__ = '2016-05-09'
27
28 MESSAGE_INDENTATION_INCREMENT = 2
29
30 TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'data',
31 _OutFile: 'data', _Choices: 'select'}
32
33 STDIO_MACRO_NAME = "stdio"
34 REQUIREMENTS_MACRO_NAME = "requirements"
35 ADVANCED_OPTIONS_MACRO_NAME = "advanced_options"
36
37 REQUIRED_MACROS = [STDIO_MACRO_NAME, REQUIREMENTS_MACRO_NAME, ADVANCED_OPTIONS_MACRO_NAME]
38
39
40 class CLIError(Exception):
41 # Generic exception to raise and log different fatal errors.
42 def __init__(self, msg):
43 super(CLIError).__init__(type(self))
44 self.msg = "E: %s" % msg
45
46 def __str__(self):
47 return self.msg
48
49 def __unicode__(self):
50 return self.msg
51
52
53 class InvalidModelException(ModelError):
54 def __init__(self, message):
55 super(InvalidModelException, self).__init__()
56 self.message = message
57
58 def __str__(self):
59 return self.message
60
61 def __repr__(self):
62 return self.message
63
64
65 class ApplicationException(Exception):
66 def __init__(self, msg):
67 super(ApplicationException).__init__(type(self))
68 self.msg = msg
69
70 def __str__(self):
71 return self.msg
72
73 def __unicode__(self):
74 return self.msg
75
76
77 class ExitCode:
78 def __init__(self, code_range="", level="", description=None):
79 self.range = code_range
80 self.level = level
81 self.description = description
82
83
84 class DataType:
85 def __init__(self, extension, galaxy_extension=None, galaxy_type=None, mimetype=None):
86 self.extension = extension
87 self.galaxy_extension = galaxy_extension
88 self.galaxy_type = galaxy_type
89 self.mimetype = mimetype
90
91
92 class ParameterHardcoder:
93 def __init__(self):
94 # map whose keys are the composite names of tools and parameters in the following pattern:
95 # [ToolName][separator][ParameterName] -> HardcodedValue
96 # if the parameter applies to all tools, then the following pattern is used:
97 # [ParameterName] -> HardcodedValue
98
99 # examples (assuming separator is '#'):
100 # threads -> 24
101 # XtandemAdapter#adapter -> xtandem.exe
102 # adapter -> adapter.exe
103 self.separator = "!"
104 self.parameter_map = {}
105
106 # the most specific value will be returned in case of overlap
107 def get_hardcoded_value(self, parameter_name, tool_name):
108 # look for the value that would apply for all tools
109 generic_value = self.parameter_map.get(parameter_name, None)
110 specific_value = self.parameter_map.get(self.build_key(parameter_name, tool_name), None)
111 if specific_value is not None:
112 return specific_value
113
114 return generic_value
115
116 def register_parameter(self, parameter_name, parameter_value, tool_name=None):
117 self.parameter_map[self.build_key(parameter_name, tool_name)] = parameter_value
118
119 def build_key(self, parameter_name, tool_name):
120 if tool_name is None:
121 return parameter_name
122 return "%s%s%s" % (parameter_name, self.separator, tool_name)
123
124
125 def main(argv=None): # IGNORE:C0111
126 # Command line options.
127 if argv is None:
128 argv = sys.argv
129 else:
130 sys.argv.extend(argv)
131
132 program_version = "v%s" % __version__
133 program_build_date = str(__updated__)
134 program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
135 program_short_description = "CTD2Galaxy - A project from the GenericWorkflowNodes family " \
136 "(https://github.com/orgs/genericworkflownodes)"
137 program_usage = '''
138 USAGE:
139
140 I - Parsing a single CTD file and generate a Galaxy wrapper:
141
142 $ python generator.py -i input.ctd -o output.xml
143
144
145 II - Parsing all found CTD files (files with .ctd and .xml extension) in a given folder and
146 output converted Galaxy wrappers in a given folder:
147
148 $ python generator.py -i /home/user/*.ctd -o /home/user/galaxywrappers
149
150
151 III - Providing file formats, mimetypes
152
153 Galaxy supports the concept of file format in order to connect compatible ports, that is, input ports of a certain
154 data format will be able to receive data from a port from the same format. This converter allows you to provide
155 a personalized file in which you can relate the CTD data formats with supported Galaxy data formats. The layout of
156 this file consists of lines, each of either one or four columns separated by any amount of whitespace. The content
157 of each column is as follows:
158
159 * 1st column: file extension
160 * 2nd column: data type, as listed in Galaxy
161 * 3rd column: full-named Galaxy data type, as it will appear on datatypes_conf.xml
162 * 4th column: mimetype (optional)
163
164 The following is an example of a valid "file formats" file:
165
166 ########################################## FILE FORMATS example ##########################################
167 # Every line starting with a # will be handled as a comment and will not be parsed.
168 # The first column is the file format as given in the CTD and second column is the Galaxy data format.
169 # The second, third, fourth and fifth column can be left empty if the data type has already been registered
170 # in Galaxy, otherwise, all but the mimetype must be provided.
171
172 # CTD type # Galaxy type # Long Galaxy data type # Mimetype
173 csv tabular galaxy.datatypes.data:Text
174 fasta
175 ini txt galaxy.datatypes.data:Text
176 txt
177 idxml txt galaxy.datatypes.xml:GenericXml application/xml
178 options txt galaxy.datatypes.data:Text
179 grid grid galaxy.datatypes.data:Grid
180
181 ##########################################################################################################
182
183 Note that each line consists precisely of either one, three or four columns. In the case of data types already
184 registered in Galaxy (such as fasta and txt in the above example), only the first column is needed. In the case of
185 data types that haven't been yet registered in Galaxy, the first three columns are needed (mimetype is optional).
186
187 For information about Galaxy data types and subclasses, see the following page:
188 https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes
189
190
191 IV - Hardcoding parameters
192
193 It is possible to hardcode parameters. This makes sense if you want to set a tool in Galaxy in 'quiet' mode or if
194 your tools support multi-threading and accept the number of threads via a parameter, without giving the end user the
195 chance to change the values for these parameters.
196
197 In order to generate hardcoded parameters, you need to provide a simple file. Each line of this file contains two
198 or three columns separated by whitespace. Any line starting with a '#' will be ignored. The first column contains
199 the name of the parameter, the second column contains the value that will always be set for this parameter. The
200 first two columns are mandatory.
201
202 If the parameter is to be hardcoded only for a set of tools, then a third column can be added. This column includes
203 a comma-separated list of tool names for which the parameter will be hardcoded. If a third column is not included,
204 then all processed tools containing the given parameter will get a hardcoded value for it.
205
206 The following is an example of a valid file:
207
208 ##################################### HARDCODED PARAMETERS example #####################################
209 # Every line starting with a # will be handled as a comment and will not be parsed.
210 # The first column is the name of the parameter and the second column is the value that will be used.
211
212 # Parameter name # Value # Tool(s)
213 threads \${GALAXY_SLOTS:-24}
214 mode quiet
215 xtandem_executable xtandem XTandemAdapter
216 verbosity high Foo, Bar
217
218 #########################################################################################################
219
220 Using the above file will produce a <command> similar to:
221
222 [tool_name] ... -threads \${GALAXY_SLOTS:-24} -mode quiet ...
223
224 For all tools. For XTandemAdapter, the <command> will be similar to:
225
226 XtandemAdapter ... -threads \${GALAXY_SLOTS:-24} -mode quiet -xtandem_executable xtandem ...
227
228 And for tools Foo and Bar, the <command> will be similar to:
229
230 Foo ... ... -threads \${GALAXY_SLOTS:-24} -mode quiet -verbosity high ...
231
232
233 V - Control which tools will be converted
234
235 Sometimes only a subset of CTDs needs to be converted. It is possible to either explicitly specify which tools will
236 be converted or which tools will not be converted.
237
238 The value of the -s/--skip-tools parameter is a file in which each line will be interpreted as the name of a tool
239 that will not be converted. Conversely, the value of the -r/--required-tools is a file in which each line will be
240 interpreted as a tool that is required. Only one of these parameters can be specified at a given time.
241
242 The format of both files is exactly the same. As stated before, each line will be interpreted as the name of a tool;
243 any line starting with a '#' will be ignored.
244
245 '''
246 program_license = '''%(short_description)s
247 Copyright 2015, Luis de la Garza
248
249 Licensed under the Apache License, Version 2.0 (the "License");
250 you may not use this file except in compliance with the License.
251 You may obtain a copy of the License at
252
253 http://www.apache.org/licenses/LICENSE-2.0
254
255 Unless required by applicable law or agreed to in writing, software
256 distributed under the License is distributed on an "AS IS" BASIS,
257 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
258 See the License for the specific language governing permissions and
259 limitations under the License.
260
261 %(usage)s
262 ''' % {'short_description': program_short_description, 'usage': program_usage}
263
264 try:
265 # Setup argument parser
266 parser = ArgumentParser(prog="CTD2Galaxy", description=program_license,
267 formatter_class=RawDescriptionHelpFormatter, add_help=True)
268 parser.add_argument("-i", "--input", dest="input_files", default=[], required=True, nargs="+", action="append",
269 help="List of CTD files to convert.")
270 parser.add_argument("-o", "--output-destination", dest="output_destination", required=True,
271 help="If multiple input files are given, then a folder in which all generated "
272 "XMLs will be generated is expected;"
273 "if a single input file is given, then a destination file is expected.")
274 parser.add_argument("-f", "--formats-file", dest="formats_file",
275 help="File containing the supported file formats. Run with '-h' or '--help' to see a "
276 "brief example on the layout of this file.", default=None, required=False)
277 parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line",
278 help="Adds content to the command line", default="", required=False)
279 parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination",
280 help="Specify the location of a datatypes_conf.xml to modify and add the registered "
281 "data types. If the provided destination does not exist, a new file will be created.",
282 default=None, required=False)
283 parser.add_argument("-x", "--default-executable-path", dest="default_executable_path",
284 help="Use this executable path when <executablePath> is not present in the CTD",
285 default=None, required=False)
286 parser.add_argument("-b", "--blacklist-parameters", dest="blacklisted_parameters", default=[], nargs="+", action="append",
287 help="List of parameters that will be ignored and won't appear on the galaxy stub",
288 required=False)
289 parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False,
290 help="Default category to use for tools lacking a category when generating tool_conf.xml")
291 parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False,
292 help="Specify the location of an existing tool_conf.xml that will be modified to include "
293 "the converted tools. If the provided destination does not exist, a new file will"
294 "be created.")
295 parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False,
296 help="The path that will be prepended to the file names when generating tool_conf.xml")
297 parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False,
298 help="Each line of the file will be interpreted as a tool name that needs translation. "
299 "Run with '-h' or '--help' to see a brief example on the format of this file.")
300 parser.add_argument("-s", "--skip-tools", dest="skip_tools_file", default=None, required=False,
301 help="File containing a list of tools for which a Galaxy stub will not be generated. "
302 "Run with '-h' or '--help' to see a brief example on the format of this file.")
303 parser.add_argument("-m", "--macros", dest="macros_files", default=[['macros.xml']], nargs="+",
304 action="append", required=None, help="Import the additional given file(s) as macros. "
305 "The macros stdio, requirements and advanced_options are required. Please see "
306 "macros.xml for an example of a valid macros file. Al defined macros will be imported.")
307 parser.add_argument("-p", "--hardcoded-parameters", dest="hardcoded_parameters", default=None, required=False,
308 help="File containing hardcoded values for the given parameters. Run with '-h' or '--help' "
309 "to see a brief example on the format of this file.")
310 # TODO: add verbosity, maybe?
311 parser.add_argument("-V", "--version", action='version', version=program_version_message)
312
313 # Process arguments
314 args = parser.parse_args()
315
316 # validate and prepare the passed arguments
317 validate_and_prepare_args(args)
318
319 # extract the names of the macros and check that we have found the ones we need
320 macros_file_names = args.macros_files
321 macros_to_expand = parse_macros_files(macros_file_names)
322
323 # parse the given supported file-formats file
324 supported_file_formats = parse_file_formats(args.formats_file)
325
326 # parse the hardcoded parameters file¬
327 parameter_hardcoder = parse_hardcoded_parameters(args.hardcoded_parameters)
328
329 # parse the skip/required tools files
330 skip_tools = parse_tools_list_file(args.skip_tools_file)
331 required_tools = parse_tools_list_file(args.required_tools_file)
332
333 #if verbose > 0:
334 # print("Verbose mode on")
335 parsed_models = convert(args.input_files,
336 args.output_destination,
337 supported_file_formats=supported_file_formats,
338 default_executable_path=args.default_executable_path,
339 add_to_command_line=args.add_to_command_line,
340 blacklisted_parameters=args.blacklisted_parameters,
341 required_tools=required_tools,
342 skip_tools=skip_tools,
343 macros_file_names=macros_file_names,
344 macros_to_expand=macros_to_expand,
345 parameter_hardcoder=parameter_hardcoder)
346
347 #TODO: add some sort of warning if a macro that doesn't exist is to be expanded
348
349 # it is not needed to copy the macros files, since the user has provided them
350
351 # generation of galaxy stubs is ready... now, let's see if we need to generate a tool_conf.xml
352 if args.tool_conf_destination is not None:
353 generate_tool_conf(parsed_models, args.tool_conf_destination,
354 args.galaxy_tool_path, args.default_category)
355
356 # now datatypes_conf.xml
357 if args.data_types_destination is not None:
358 generate_data_type_conf(supported_file_formats, args.data_types_destination)
359
360 return 0
361
362 except KeyboardInterrupt:
363 # handle keyboard interrupt
364 return 0
365 except ApplicationException, e:
366 error("CTD2Galaxy could not complete the requested operation.", 0)
367 error("Reason: " + e.msg, 0)
368 return 1
369 except ModelError, e:
370 error("There seems to be a problem with one of your input CTDs.", 0)
371 error("Reason: " + e.msg, 0)
372 return 1
373 except Exception, e:
374 traceback.print_exc()
375 return 2
376
377
378 def parse_tools_list_file(tools_list_file):
379 tools_list = None
380 if tools_list_file is not None:
381 tools_list = []
382 with open(tools_list_file) as f:
383 for line in f:
384 if line is None or not line.strip() or line.strip().startswith("#"):
385 continue
386 else:
387 tools_list.append(line.strip())
388
389 return tools_list
390
391
392 def parse_macros_files(macros_file_names):
393 macros_to_expand = set()
394
395 for macros_file_name in macros_file_names:
396 try:
397 macros_file = open(macros_file_name)
398 root = parse(macros_file).getroot()
399 for xml_element in root.findall("xml"):
400 name = xml_element.attrib["name"]
401 if name in macros_to_expand:
402 warning("Macro %s has already been found. Duplicate found in file %s." %
403 (name, macros_file_name), 0)
404 else:
405 macros_to_expand.add(name)
406 except ParseError, e:
407 raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " +
408 str(e))
409 except IOError, e:
410 raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " +
411 str(e))
412
413 # we depend on "stdio", "requirements" and "advanced_options" to exist on all the given macros files
414 missing_needed_macros = []
415 for required_macro in REQUIRED_MACROS:
416 if required_macro not in macros_to_expand:
417 missing_needed_macros.append(required_macro)
418
419 if missing_needed_macros:
420 raise ApplicationException(
421 "The following required macro(s) were not found in any of the given macros files: %s, "
422 "see sample_files/macros.xml for an example of a valid macros file."
423 % ", ".join(missing_needed_macros))
424
425 # we do not need to "expand" the advanced_options macro
426 macros_to_expand.remove(ADVANCED_OPTIONS_MACRO_NAME)
427 return macros_to_expand
428
429 def parse_hardcoded_parameters(hardcoded_parameters_file):
430 parameter_hardcoder = ParameterHardcoder()
431 if hardcoded_parameters_file is not None:
432 line_number = 0
433 with open(hardcoded_parameters_file) as f:
434 for line in f:
435 line_number += 1
436 if line is None or not line.strip() or line.strip().startswith("#"):
437 pass
438 else:
439 # the third column must not be obtained as a whole, and not split
440 parsed_hardcoded_parameter = line.strip().split(None, 2)
441 # valid lines contain two or three columns
442 if len(parsed_hardcoded_parameter) != 2 and len(parsed_hardcoded_parameter) != 3:
443 warning("Invalid line at line number %d of the given hardcoded parameters file. Line will be"
444 "ignored:\n%s" % (line_number, line), 0)
445 continue
446
447 parameter_name = parsed_hardcoded_parameter[0]
448 hardcoded_value = parsed_hardcoded_parameter[1]
449 tool_names = None
450 if len(parsed_hardcoded_parameter) == 3:
451 tool_names = parsed_hardcoded_parameter[2].split(',')
452 if tool_names:
453 for tool_name in tool_names:
454 parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name.strip())
455 else:
456 parameter_hardcoder.register_parameter(parameter_name, hardcoded_value)
457
458 return parameter_hardcoder
459
460
461 def parse_file_formats(formats_file):
462 supported_formats = {}
463 if formats_file is not None:
464 line_number = 0
465 with open(formats_file) as f:
466 for line in f:
467 line_number += 1
468 if line is None or not line.strip() or line.strip().startswith("#"):
469 # ignore (it'd be weird to have something like:
470 # if line is not None and not (not line.strip()) ...
471 pass
472 else:
473 # not an empty line, no comment
474 # strip the line and split by whitespace
475 parsed_formats = line.strip().split()
476 # valid lines contain either one or four columns
477 if not (len(parsed_formats) == 1 or len(parsed_formats) == 3 or len(parsed_formats) == 4):
478 warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" %
479 (line_number, line), 0)
480 # ignore the line
481 continue
482 elif len(parsed_formats) == 1:
483 supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[0])
484 else:
485 mimetype = None
486 # check if mimetype was provided
487 if len(parsed_formats) == 4:
488 mimetype = parsed_formats[3]
489 supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[1],
490 parsed_formats[2], mimetype)
491 return supported_formats
492
493
494 def validate_and_prepare_args(args):
495 # check that only one of skip_tools_file and required_tools_file has been provided
496 if args.skip_tools_file is not None and args.required_tools_file is not None:
497 raise ApplicationException(
498 "You have provided both a file with tools to ignore and a file with required tools.\n"
499 "Only one of -s/--skip-tools, -r/--required-tools can be provided.")
500
501 # first, we convert all list of lists in args to flat lists
502 lists_to_flatten = ["input_files", "blacklisted_parameters", "macros_files"]
503 for list_to_flatten in lists_to_flatten:
504 setattr(args, list_to_flatten, [item for sub_list in getattr(args, list_to_flatten) for item in sub_list])
505
506 # if input is a single file, we expect output to be a file (and not a dir that already exists)
507 if len(args.input_files) == 1:
508 if os.path.isdir(args.output_destination):
509 raise ApplicationException("If a single input file is provided, output (%s) is expected to be a file "
510 "and not a folder.\n" % args.output_destination)
511
512 # if input is a list of files, we expect output to be a folder
513 if len(args.input_files) > 1:
514 if not os.path.isdir(args.output_destination):
515 raise ApplicationException("If several input files are provided, output (%s) is expected to be an "
516 "existing directory.\n" % args.output_destination)
517
518 # check that the provided input files, if provided, contain a valid file path
519 input_variables_to_check = ["skip_tools_file", "required_tools_file", "macros_files",
520 "input_files", "formats_file", "hardcoded_parameters"]
521
522 for variable_name in input_variables_to_check:
523 paths_to_check = []
524 # check if we are handling a single file or a list of files
525 member_value = getattr(args, variable_name)
526 if member_value is not None:
527 if isinstance(member_value, list):
528 for file_name in member_value:
529 paths_to_check.append(strip(str(file_name)))
530 else:
531 paths_to_check.append(strip(str(member_value)))
532
533 for path_to_check in paths_to_check:
534 if not os.path.isfile(path_to_check) or not os.path.exists(path_to_check):
535 raise ApplicationException(
536 "The provided input file (%s) does not exist or is not a valid file path."
537 % path_to_check)
538
539 # check that the provided output files, if provided, contain a valid file path (i.e., not a folder)
540 output_variables_to_check = ["data_types_destination", "tool_conf_destination"]
541
542 for variable_name in output_variables_to_check:
543 file_name = getattr(args, variable_name)
544 if file_name is not None and os.path.isdir(file_name):
545 raise ApplicationException("The provided output file name (%s) points to a directory." % file_name)
546
547
548 def convert(input_files, output_destination, **kwargs):
549 # first, generate a model
550 is_converting_multiple_ctds = len(input_files) > 1
551 parsed_models = []
552 for input_file in input_files:
553 try:
554 model = CTDModel(from_file=input_file)
555 except Exception, e:
556 error(str(e), 1)
557 continue
558
559 if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]:
560 info("Skipping tool %s" % model.name, 0)
561 continue
562 elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]:
563 info("Tool %s is not required, skipping it" % model.name, 0)
564 continue
565 else:
566 info("Converting from %s " % input_file, 0)
567 tool = create_tool(model)
568 write_header(tool, model)
569 create_description(tool, model)
570 expand_macros(tool, model, **kwargs)
571 create_command(tool, model, **kwargs)
572 create_inputs(tool, model, **kwargs)
573 create_outputs(tool, model, **kwargs)
574 create_help(tool, model)
575
576 # finally, serialize the tool
577 output_file = output_destination
578 # if multiple inputs are being converted,
579 # then we need to generate a different output_file for each input
580 if is_converting_multiple_ctds:
581 output_file = os.path.join(output_file, get_filename_without_suffix(input_file) + ".xml")
582 # wrap our tool element into a tree to be able to serialize it
583 tree = ElementTree(tool)
584 tree.write(open(output_file, 'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
585 # let's use model to hold the name of the output file
586 parsed_models.append([model, get_filename(output_file)])
587
588 return parsed_models
589
590
591 def write_header(tool, model):
592 tool.addprevious(etree.Comment(
593 "This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). "
594 "This file was automatically generated using CTD2Galaxy."))
595 tool.addprevious(etree.Comment('Proposed Tool Section: [%s]' % model.opt_attribs.get("category", "")))
596
597
598 def generate_tool_conf(parsed_models, tool_conf_destination, galaxy_tool_path, default_category):
599 # for each category, we keep a list of models corresponding to it
600 categories_to_tools = dict()
601 for model in parsed_models:
602 category = strip(model[0].opt_attribs.get("category", ""))
603 if not category.strip():
604 category = default_category
605 if category not in categories_to_tools:
606 categories_to_tools[category] = []
607 categories_to_tools[category].append(model[1])
608
609 # at this point, we should have a map for all categories->tools
610 toolbox_node = Element("toolbox")
611
612 if galaxy_tool_path is not None and not galaxy_tool_path.strip().endswith("/"):
613 galaxy_tool_path = galaxy_tool_path.strip() + "/"
614 if galaxy_tool_path is None:
615 galaxy_tool_path = ""
616
617 for category, file_names in categories_to_tools.iteritems():
618 section_node = add_child_node(toolbox_node, "section")
619 section_node.attrib["id"] = "section-id-" + "".join(category.split())
620 section_node.attrib["name"] = category
621
622 for filename in file_names:
623 tool_node = add_child_node(section_node, "tool")
624 tool_node.attrib["file"] = galaxy_tool_path + filename
625
626 toolconf_tree = ElementTree(toolbox_node)
627 toolconf_tree.write(open(tool_conf_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
628 info("Generated Galaxy tool_conf.xml in %s" % tool_conf_destination, 0)
629
630
631 def generate_data_type_conf(supported_file_formats, data_types_destination):
632 data_types_node = Element("datatypes")
633 registration_node = add_child_node(data_types_node, "registration")
634 registration_node.attrib["converters_path"] = "lib/galaxy/datatypes/converters"
635 registration_node.attrib["display_path"] = "display_applications"
636
637 for format_name in supported_file_formats:
638 data_type = supported_file_formats[format_name]
639 # add only if it's a data type that does not exist in Galaxy
640 if data_type.galaxy_type is not None:
641 data_type_node = add_child_node(registration_node, "datatype")
642 # we know galaxy_extension is not None
643 data_type_node.attrib["extension"] = data_type.galaxy_extension
644 data_type_node.attrib["type"] = data_type.galaxy_type
645 if data_type.mimetype is not None:
646 data_type_node.attrib["mimetype"] = data_type.mimetype
647
648 data_types_tree = ElementTree(data_types_node)
649 data_types_tree.write(open(data_types_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
650 info("Generated Galaxy datatypes_conf.xml in %s" % data_types_destination, 0)
651
652
653 # taken from
654 # http://stackoverflow.com/questions/8384737/python-extract-file-name-from-path-no-matter-what-the-os-path-format
655 def get_filename(path):
656 head, tail = ntpath.split(path)
657 return tail or ntpath.basename(head)
658
659
660 def get_filename_without_suffix(path):
661 root, ext = os.path.splitext(os.path.basename(path))
662 return root
663
664
665 def create_tool(model):
666 return Element("tool", OrderedDict([("id", model.name), ("name", model.name), ("version", model.version)]))
667
668
669 def create_description(tool, model):
670 if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None:
671 description = SubElement(tool,"description")
672 description.text = model.opt_attribs["description"]
673
674
675 def get_param_name(param):
676 # we generate parameters with colons for subgroups, but not for the topmost parents (OpenMS legacy)
677 if type(param.parent) == ParameterGroup and param.parent.parent != None:
678 return get_param_name(param.parent) + ":" + resolve_param_mapping(param)
679 else:
680 return resolve_param_mapping(param)
681
682
683 # some parameters are mapped to command line options, this method helps resolve those mappings, if any
684 # TODO: implement mapping of parameters!!!
685 def resolve_param_mapping(param):
686 return param.name
687
688
689 def create_command(tool, model, **kwargs):
690 final_command = get_tool_executable_path(model, kwargs["default_executable_path"]) + '\n'
691 final_command += kwargs["add_to_command_line"] + '\n'
692 advanced_command_start = "#if $adv_opts.adv_opts_selector=='advanced':\n"
693 advanced_command_end = '#end if'
694 advanced_command = ''
695 parameter_hardcoder = kwargs["parameter_hardcoder"]
696
697 found_output_parameter = False
698 for param in extract_parameters(model):
699 if param.type is _OutFile:
700 found_output_parameter = True
701 command = ''
702 param_name = get_param_name(param)
703
704 if param.name in kwargs["blacklisted_parameters"]:
705 continue
706
707 hardcoded_value = parameter_hardcoder.get_hardcoded_value(param_name, model.name)
708 if hardcoded_value:
709 command += '-%s %s\n' % (param_name, hardcoded_value)
710 else:
711 # parameter is neither blacklisted nor hardcoded...
712 galaxy_parameter_name = get_galaxy_parameter_name(param)
713 repeat_galaxy_parameter_name = get_repeat_galaxy_parameter_name(param)
714
715 # logic for ITEMLISTs
716 if param.is_list:
717 if param.type is _InFile:
718 command += "-" + str(param_name) + "\n"
719 command += " #for token in $" + galaxy_parameter_name + ":\n"
720 command += " $token\n"
721 command += " #end for\n"
722 else:
723 command += "\n#if $" + repeat_galaxy_parameter_name + ":\n"
724 command += "-" + str(param_name) + "\n"
725 command += " #for token in $" + repeat_galaxy_parameter_name + ":\n"
726 command += " #if \" \" in str(token):\n"
727 command += " \"$token." + galaxy_parameter_name + "\"\n"
728 command += " #else\n"
729 command += " $token." + galaxy_parameter_name + "\n"
730 command += " #end if\n"
731 command += " #end for\n"
732 command += "#end if\n"
733 # logic for other ITEMs
734 else:
735 if param.advanced and param.type is not _OutFile:
736 actual_parameter = "$adv_opts.%s" % galaxy_parameter_name
737 else:
738 actual_parameter = "$%s" % galaxy_parameter_name
739 ## if whitespace_validation has been set, we need to generate, for each parameter:
740 ## #if str( $t ).split() != '':
741 ## -t "$t"
742 ## #end if
743 ## TODO only useful for text fields, integers or floats
744 ## not useful for choices, input fields ...
745
746 if not is_boolean_parameter(param) and type(param.restrictions) is _Choices :
747 command += "#if " + actual_parameter + ":\n"
748 command += ' -%s\n' % param_name
749 command += " #if \" \" in str(" + actual_parameter + "):\n"
750 command += " \"" + actual_parameter + "\"\n"
751 command += " #else\n"
752 command += " " + actual_parameter + "\n"
753 command += " #end if\n"
754 command += "#end if\n"
755 elif is_boolean_parameter(param):
756 command += "#if " + actual_parameter + ":\n"
757 command += ' -%s\n' % param_name
758 command += "#end if\n"
759 elif TYPE_TO_GALAXY_TYPE[param.type] is 'text':
760 command += "#if " + actual_parameter + ":\n"
761 command += " -%s " % param_name
762 command += " \"" + actual_parameter + "\"\n"
763 command += "#end if\n"
764 else:
765 command += "#if " + actual_parameter + ":\n"
766 command += ' -%s ' % param_name
767 command += actual_parameter + "\n"
768 command += "#end if\n"
769
770 if param.advanced and param.type is not _OutFile:
771 advanced_command += " %s" % command
772 else:
773 final_command += command
774
775 if advanced_command:
776 final_command += "%s%s%s\n" % (advanced_command_start, advanced_command, advanced_command_end)
777
778 if not found_output_parameter:
779 final_command += "> $param_stdout\n"
780
781 command_node = add_child_node(tool, "command")
782 command_node.text = final_command
783
784
785 # creates the xml elements needed to import the needed macros files
786 # and to "expand" the macros
787 def expand_macros(tool, model, **kwargs):
788 macros_node = add_child_node(tool, "macros")
789 token_node = add_child_node(macros_node, "token")
790 token_node.attrib["name"] = "@EXECUTABLE@"
791 token_node.text = get_tool_executable_path(model, kwargs["default_executable_path"])
792
793 # add <import> nodes
794 for macro_file_name in kwargs["macros_file_names"]:
795 macro_file = open(macro_file_name)
796 import_node = add_child_node(macros_node, "import")
797 # do not add the path of the file, rather, just its basename
798 import_node.text = os.path.basename(macro_file.name)
799
800 # add <expand> nodes
801 for expand_macro in kwargs["macros_to_expand"]:
802 expand_node = add_child_node(tool, "expand")
803 expand_node.attrib["macro"] = expand_macro
804
805
806 def get_tool_executable_path(model, default_executable_path):
807 # rules to build the galaxy executable path:
808 # if executablePath is null, then use default_executable_path and store it in executablePath
809 # if executablePath is null and executableName is null, then the name of the tool will be used
810 # if executablePath is null and executableName is not null, then executableName will be used
811 # if executablePath is not null and executableName is null,
812 # then executablePath and the name of the tool will be used
813 # if executablePath is not null and executableName is not null, then both will be used
814
815 # first, check if the model has executablePath / executableName defined
816 executable_path = model.opt_attribs.get("executablePath", None)
817 executable_name = model.opt_attribs.get("executableName", None)
818
819 # check if we need to use the default_executable_path
820 if executable_path is None:
821 executable_path = default_executable_path
822
823 # fix the executablePath to make sure that there is a '/' in the end
824 if executable_path is not None:
825 executable_path = executable_path.strip()
826 if not executable_path.endswith('/'):
827 executable_path += '/'
828
829 # assume that we have all information present
830 command = str(executable_path) + str(executable_name)
831 if executable_path is None:
832 if executable_name is None:
833 command = model.name
834 else:
835 command = executable_name
836 else:
837 if executable_name is None:
838 command = executable_path + model.name
839 return command
840
841
842 def get_galaxy_parameter_name(param):
843 return "param_%s" % get_param_name(param).replace(':', '_').replace('-', '_')
844
845
846 def get_input_with_same_restrictions(out_param, model, supported_file_formats):
847 for param in extract_parameters(model):
848 if param.type is _InFile:
849 if param.restrictions is not None:
850 in_param_formats = get_supported_file_types(param.restrictions.formats, supported_file_formats)
851 out_param_formats = get_supported_file_types(out_param.restrictions.formats, supported_file_formats)
852 if in_param_formats == out_param_formats:
853 return param
854
855
856 def create_inputs(tool, model, **kwargs):
857 inputs_node = SubElement(tool, "inputs")
858
859 # some suites (such as OpenMS) need some advanced options when handling inputs
860 expand_advanced_node = add_child_node(tool, "expand", OrderedDict([("macro", ADVANCED_OPTIONS_MACRO_NAME)]))
861 parameter_hardcoder = kwargs["parameter_hardcoder"]
862
863 # treat all non output-file parameters as inputs
864 for param in extract_parameters(model):
865 # no need to show hardcoded parameters
866 hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name)
867 if param.name in kwargs["blacklisted_parameters"] or hardcoded_value:
868 # let's not use an extra level of indentation and use NOP
869 continue
870 if param.type is not _OutFile:
871 if param.advanced:
872 if expand_advanced_node is not None:
873 parent_node = expand_advanced_node
874 else:
875 # something went wrong... we are handling an advanced parameter and the
876 # advanced input macro was not set... inform the user about it
877 info("The parameter %s has been set as advanced, but advanced_input_macro has "
878 "not been set." % param.name, 1)
879 # there is not much we can do, other than use the inputs_node as a parent node!
880 parent_node = inputs_node
881 else:
882 parent_node = inputs_node
883
884 # for lists we need a repeat tag
885 if param.is_list and param.type is not _InFile:
886 rep_node = add_child_node(parent_node, "repeat")
887 create_repeat_attribute_list(rep_node, param)
888 parent_node = rep_node
889
890 param_node = add_child_node(parent_node, "param")
891 create_param_attribute_list(param_node, param, kwargs["supported_file_formats"])
892
893 # advanced parameter selection should be at the end
894 # and only available if an advanced parameter exists
895 if expand_advanced_node is not None and len(expand_advanced_node) > 0:
896 inputs_node.append(expand_advanced_node)
897
898
899 def get_repeat_galaxy_parameter_name(param):
900 return "rep_" + get_galaxy_parameter_name(param)
901
902
903 def create_repeat_attribute_list(rep_node, param):
904 rep_node.attrib["name"] = get_repeat_galaxy_parameter_name(param)
905 if param.required:
906 rep_node.attrib["min"] = "1"
907 else:
908 rep_node.attrib["min"] = "0"
909 # for the ITEMLISTs which have LISTITEM children we only
910 # need one parameter as it is given as a string
911 if param.default is not None:
912 rep_node.attrib["max"] = "1"
913 rep_node.attrib["title"] = get_galaxy_parameter_name(param)
914
915
916 def create_param_attribute_list(param_node, param, supported_file_formats):
917 param_node.attrib["name"] = get_galaxy_parameter_name(param)
918
919 param_type = TYPE_TO_GALAXY_TYPE[param.type]
920 if param_type is None:
921 raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s"
922 % {"type": param.type, "name": param.name})
923
924 if param.is_list:
925 param_type = "text"
926
927 if is_selection_parameter(param):
928 param_type = "select"
929
930 if is_boolean_parameter(param):
931 param_type = "boolean"
932
933 if param.type is _InFile:
934 # assume it's just text unless restrictions are provided
935 param_format = "text"
936 if param.restrictions is not None:
937 # join all supported_formats for the file... this MUST be a _FileFormat
938 if type(param.restrictions) is _FileFormat:
939 param_format = ','.join(get_supported_file_types(param.restrictions.formats, supported_file_formats))
940 else:
941 raise InvalidModelException("Expected 'file type' restrictions for input file [%(name)s], "
942 "but instead got [%(type)s]"
943 % {"name": param.name, "type": type(param.restrictions)})
944 param_node.attrib["type"] = "data"
945 param_node.attrib["format"] = param_format
946 # in the case of multiple input set multiple flag
947 if param.is_list:
948 param_node.attrib["multiple"] = "true"
949
950 else:
951 param_node.attrib["type"] = param_type
952
953 # check for parameters with restricted values (which will correspond to a "select" in galaxy)
954 if param.restrictions is not None:
955 # it could be either _Choices or _NumericRange, with special case for boolean types
956 if param_type == "boolean":
957 create_boolean_parameter(param_node, param)
958 elif type(param.restrictions) is _Choices:
959 # create as many <option> elements as restriction values
960 for choice in param.restrictions.choices:
961 option_node = add_child_node(param_node, "option", OrderedDict([("value", str(choice))]))
962 option_node.text = str(choice)
963
964 elif type(param.restrictions) is _NumericRange:
965 if param.type is not int and param.type is not float:
966 raise InvalidModelException("Expected either 'int' or 'float' in the numeric range restriction for "
967 "parameter [%(name)s], but instead got [%(type)s]" %
968 {"name": param.name, "type": type(param.restrictions)})
969 # extract the min and max values and add them as attributes
970 # validate the provided min and max values
971 if param.restrictions.n_min is not None:
972 param_node.attrib["min"] = str(param.restrictions.n_min)
973 if param.restrictions.n_max is not None:
974 param_node.attrib["max"] = str(param.restrictions.n_max)
975 elif type(param.restrictions) is _FileFormat:
976 param_node.attrib["format"] = ",".join(
977 get_supported_file_types(param.restrictions.formats, supported_file_formats))
978 else:
979 raise InvalidModelException("Unrecognized restriction type [%(type)s] for parameter [%(name)s]"
980 % {"type": type(param.restrictions), "name": param.name})
981
982 param_node.attrib["optional"] = str(not param.required)
983
984 if param_type == "text":
985 # add size attribute... this is the length of a textbox field in Galaxy (it could also be 15x2, for instance)
986 param_node.attrib["size"] = "30"
987 # add sanitizer nodes, this is needed for special character like "["
988 # which are used for example by FeatureFinderMultiplex
989 sanitizer_node = SubElement(param_node, "sanitizer")
990
991 valid_node = SubElement(sanitizer_node, "valid", OrderedDict([("initial", "string.printable")]))
992 add_child_node(valid_node, "remove", OrderedDict([("value", '\'')]))
993 add_child_node(valid_node, "remove", OrderedDict([("value", '"')]))
994
995 # check for default value
996 if param.default is not None:
997 if type(param.default) is list:
998 # we ASSUME that a list of parameters looks like:
999 # $ tool -ignore He Ar Xe
1000 # meaning, that, for example, Helium, Argon and Xenon will be ignored
1001 param_node.attrib["value"] = ' '.join(map(str, param.default))
1002
1003 elif param_type != "boolean":
1004 # boolean parameters handle default values by using the "checked" attribute
1005 # there isn't much we can do... just stringify the value
1006 param_node.attrib["value"] = str(param.default)
1007 else:
1008 if param.type is int or param.type is float:
1009 # galaxy requires "value" to be included for int/float
1010 # since no default was included, we need to figure out one in a clever way... but let the user know
1011 # that we are "thinking" for him/her
1012 warning("Generating default value for parameter [%s]. "
1013 "Galaxy requires the attribute 'value' to be set for integer/floats. "
1014 "Edit the CTD file and provide a suitable default value." % param.name, 1)
1015 # check if there's a min/max and try to use them
1016 default_value = None
1017 if param.restrictions is not None:
1018 if type(param.restrictions) is _NumericRange:
1019 default_value = param.restrictions.n_min
1020 if default_value is None:
1021 default_value = param.restrictions.n_max
1022 if default_value is None:
1023 # no min/max provided... just use 0 and see what happens
1024 default_value = 0
1025 else:
1026 # should never be here, since we have validated this anyway...
1027 # this code is here just for documentation purposes
1028 # however, better safe than sorry!
1029 # (it could be that the code changes and then we have an ugly scenario)
1030 raise InvalidModelException("Expected either a numeric range for parameter [%(name)s], "
1031 "but instead got [%(type)s]"
1032 % {"name": param.name, "type": type(param.restrictions)})
1033 else:
1034 # no restrictions and no default value provided...
1035 # make up something
1036 default_value = 0
1037 param_node.attrib["value"] = str(default_value)
1038
1039 label = "%s parameter" % param.name
1040 help_text = ""
1041
1042 if param.description is not None:
1043 label, help_text = generate_label_and_help(param.description)
1044
1045 param_node.attrib["label"] = label
1046 param_node.attrib["help"] = "(-%s)" % param.name + " " + help_text
1047
1048
1049 def generate_label_and_help(desc):
1050 label = ""
1051 help_text = ""
1052 # This tag is found in some descriptions
1053 desc = str(desc).replace("#br#", " <br>")
1054 # Get rid of dots in the end
1055 if desc.endswith("."):
1056 desc = desc.rstrip(".")
1057 # Check if first word is a normal word and make it uppercase
1058 if str(desc).find(" ") > -1:
1059 first_word, rest = str(desc).split(" ", 1)
1060 if str(first_word).islower():
1061 # check if label has a quotient of the form a/b
1062 if first_word.find("/") != 1 :
1063 first_word.capitalize()
1064 desc = first_word + " " + rest
1065 label = desc
1066
1067 # Try to split the label if it is too long
1068 if len(desc) > 50:
1069 # find an example and put everything before in the label and the e.g. in the help
1070 if desc.find("e.g.") > 1 :
1071 label, help_text = desc.split("e.g.",1)
1072 help_text = "e.g." + help_text
1073 else:
1074 # find the end of the first sentence
1075 # look for ". " because some labels contain .file or something similar
1076 delimiter = ""
1077 if desc.find(". ") > 1 and desc.find("? ") > 1:
1078 if desc.find(". ") < desc.find("? "):
1079 delimiter = ". "
1080 else:
1081 delimiter = "? "
1082 elif desc.find(". ") > 1:
1083 delimiter = ". "
1084 elif desc.find("? ") > 1:
1085 delimiter = "? "
1086 if delimiter != "":
1087 label, help_text = desc.split(delimiter, 1)
1088
1089 # add the question mark back
1090 if delimiter == "? ":
1091 label += "? "
1092
1093 # remove all linebreaks
1094 label = label.rstrip().rstrip('<br>').rstrip()
1095 return label, help_text
1096
1097
1098 def get_indented_text(text, indentation_level):
1099 return ("%(indentation)s%(text)s" %
1100 {"indentation": " " * (MESSAGE_INDENTATION_INCREMENT * indentation_level),
1101 "text": text})
1102
1103
1104 def warning(warning_text, indentation_level):
1105 sys.stdout.write(get_indented_text("WARNING: %s\n" % warning_text, indentation_level))
1106
1107
1108 def error(error_text, indentation_level):
1109 sys.stderr.write(get_indented_text("ERROR: %s\n" % error_text, indentation_level))
1110
1111
1112 def info(info_text, indentation_level):
1113 sys.stdout.write(get_indented_text("INFO: %s\n" % info_text, indentation_level))
1114
1115
1116 # determines if the given choices are boolean (basically, if the possible values are yes/no, true/false)
1117 def is_boolean_parameter(param):
1118 is_choices = False
1119 if type(param.restrictions) is _Choices:
1120 # for a true boolean experience, we need 2 values
1121 # and also that those two values are either yes/no or true/false
1122 if len(param.restrictions.choices) == 2:
1123 choices = get_lowercase_list(param.restrictions.choices)
1124 if ("yes" in choices and "no" in choices) or ("true" in choices and "false" in choices):
1125 is_choices = True
1126 return is_choices
1127
1128
1129 # determines if there are choices for the parameter
1130 def is_selection_parameter(param):
1131 return type(param.restrictions) is _Choices
1132
1133
1134 def get_lowercase_list(some_list):
1135 lowercase_list = map(str, some_list)
1136 lowercase_list = map(string.lower, lowercase_list)
1137 lowercase_list = map(strip, lowercase_list)
1138 return lowercase_list
1139
1140
1141 # creates a galaxy boolean parameter type
1142 # this method assumes that param has restrictions, and that only two restictions are present
1143 # (either yes/no or true/false)
1144 def create_boolean_parameter(param_node, param):
1145 # first, determine the 'truevalue' and the 'falsevalue'
1146 """TODO: true and false values can be way more than 'true' and 'false'
1147 but for that we need CTD support
1148 """
1149 # by default, 'true' and 'false' are handled as flags, like the verbose flag (i.e., -v)
1150 true_value = "-%s" % get_param_name(param)
1151 false_value = ""
1152 choices = get_lowercase_list(param.restrictions.choices)
1153 if "yes" in choices:
1154 true_value = "yes"
1155 false_value = "no"
1156 param_node.attrib["truevalue"] = true_value
1157 param_node.attrib["falsevalue"] = false_value
1158
1159 # set the checked attribute
1160 if param.default is not None:
1161 checked_value = "false"
1162 default = strip(string.lower(param.default))
1163 if default == "yes" or default == "true":
1164 checked_value = "true"
1165 #attribute_list["checked"] = checked_value
1166 param_node.attrib["checked"] = checked_value
1167
1168
1169 def create_outputs(parent, model, **kwargs):
1170 outputs_node = add_child_node(parent, "outputs")
1171 parameter_hardcoder = kwargs["parameter_hardcoder"]
1172
1173 for param in extract_parameters(model):
1174
1175 # no need to show hardcoded parameters
1176 hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name)
1177 if param.name in kwargs["blacklisted_parameters"] or hardcoded_value:
1178 # let's not use an extra level of indentation and use NOP
1179 continue
1180 if param.type is _OutFile:
1181 create_output_node(outputs_node, param, model, kwargs["supported_file_formats"])
1182
1183 # If there are no outputs defined in the ctd the node will have no children
1184 # and the stdout will be used as output
1185 if len(outputs_node) == 0:
1186 add_child_node(outputs_node, "data",
1187 OrderedDict([("name", "param_stdout"), ("format", "text"), ("label", "Output from stdout")]))
1188
1189
1190 def create_output_node(parent, param, model, supported_file_formats):
1191 data_node = add_child_node(parent, "data")
1192 data_node.attrib["name"] = get_galaxy_parameter_name(param)
1193
1194 data_format = "data"
1195 if param.restrictions is not None:
1196 if type(param.restrictions) is _FileFormat:
1197 # set the first data output node to the first file format
1198
1199 # check if there are formats that have not been registered yet...
1200 output = ""
1201 for format_name in param.restrictions.formats:
1202 if not format_name in supported_file_formats.keys():
1203 output += " " + str(format_name)
1204
1205 # warn only if there's about to complain
1206 if output:
1207 warning("Parameter " + param.name + " has the following unsupported format(s):" + output, 1)
1208
1209 formats = get_supported_file_types(param.restrictions.formats, supported_file_formats)
1210 try:
1211 data_format = formats.pop()
1212 except KeyError:
1213 # there is not much we can do, other than catching the exception
1214 pass
1215 # if there are more than one output file formats try to take the format from the input parameter
1216 if formats:
1217 corresponding_input = get_input_with_same_restrictions(param, model, supported_file_formats)
1218 if corresponding_input is not None:
1219 data_format = "input"
1220 data_node.attrib["metadata_source"] = get_galaxy_parameter_name(corresponding_input)
1221 else:
1222 raise InvalidModelException("Unrecognized restriction type [%(type)s] "
1223 "for output [%(name)s]" % {"type": type(param.restrictions),
1224 "name": param.name})
1225 data_node.attrib["format"] = data_format
1226
1227 #TODO: find a smarter label ?
1228 #if param.description is not None:
1229 # data_node.setAttribute("label", param.description)
1230 return data_node
1231
1232
1233 def get_supported_file_types(formats, supported_file_formats):
1234 return set([supported_file_formats.get(format_name, DataType(format_name, format_name)).galaxy_extension
1235 for format_name in formats if format_name in supported_file_formats.keys()])
1236
1237
1238 def create_change_format_node(parent, data_formats, input_ref):
1239 # <change_format>
1240 # <when input="secondary_structure" value="true" format="text"/>
1241 # </change_format>
1242 change_format_node = add_child_node(parent, "change_format")
1243 for data_format in data_formats:
1244 add_child_node(change_format_node, "when",
1245 OrderedDict([("input", input_ref), ("value", data_format), ("format", data_format)]))
1246
1247
1248 # Shows basic information about the file, such as data ranges and file type.
1249 def create_help(tool, model):
1250 manual = ''
1251 doc_url = None
1252 if 'manual' in model.opt_attribs.keys():
1253 manual += '%s\n\n' % model.opt_attribs["manual"]
1254 if 'docurl' in model.opt_attribs.keys():
1255 doc_url = model.opt_attribs["docurl"]
1256
1257 help_text = "No help available"
1258 if manual is not None:
1259 help_text = manual
1260 if doc_url is not None:
1261 help_text = ("" if manual is None else manual) + "\nFor more information, visit %s" % doc_url
1262 help_node = add_child_node(tool, "help")
1263 # TODO: do we need CDATA Section here?
1264 help_node.text = help_text
1265
1266
1267 # since a model might contain several ParameterGroup elements,
1268 # we want to simply 'flatten' the parameters to generate the Galaxy wrapper
1269 def extract_parameters(model):
1270 parameters = []
1271 if len(model.parameters.parameters) > 0:
1272 # use this to put parameters that are to be processed
1273 # we know that CTDModel has one parent ParameterGroup
1274 pending = [model.parameters]
1275 while len(pending) > 0:
1276 # take one element from 'pending'
1277 parameter = pending.pop()
1278 if type(parameter) is not ParameterGroup:
1279 parameters.append(parameter)
1280 else:
1281 # append the first-level children of this ParameterGroup
1282 pending.extend(parameter.parameters.values())
1283 # returned the reversed list of parameters (as it is now,
1284 # we have the last parameter in the CTD as first in the list)
1285 return reversed(parameters)
1286
1287
1288 # adds and returns a child node using the given name to the given parent node
1289 def add_child_node(parent_node, child_node_name, attributes=OrderedDict([])):
1290 child_node = SubElement(parent_node, child_node_name, attributes)
1291 return child_node
1292
1293
1294 if __name__ == "__main__":
1295 sys.exit(main())
0 <?xml version='1.0' encoding='UTF-8'?>
1 <!-- CTD2Galaxy depends on this file and on the stdio, advanced_options macros!
2 You can edit this file to add your own macros, if you so desire, or you can
3 add additional macro files using the m/macros parameter -->
4 <macros>
5 <xml name="requirements">
6 <requirements>
7 <requirement type="binary">@EXECUTABLE@</requirement>
8 </requirements>
9 </xml>
10 <xml name="stdio">
11 <stdio>
12 <exit_code range="1:"/>
13 <exit_code range=":-1"/>
14 <regex match="Error:"/>
15 <regex match="Exception:"/>
16 </stdio>
17 </xml>
18 <xml name="advanced_options">
19 <conditional name="adv_opts">
20 <param name="adv_opts_selector" type="select" label="Advanced Options">
21 <option value="basic" selected="True">Hide Advanced Options</option>
22 <option value="advanced">Show Advanced Options</option>
23 </param>
24 <when value="basic"/>
25 <when value="advanced">
26 <yield/>
27 </when>
28 </conditional>
29 </xml>
30 </macros>
+0
-13
setup.py less more
0 from distutils.core import setup
1
2 setup(
3 name='CTD2Galaxy',
4 version='1.0',
5 packages=['CTD2Galaxy'],
6 url='https://github.com/WorkflowConversion/CTD2Galaxy',
7 license='',
8 author='Luis de la Garza',
9 author_email='',
10 py_modules=['CTD2Galaxy/generator'],
11 description='A program to convert CTDs to Galaxy tool wrappers.'
12 )