Made -m/--macros optional, providing a default value; moved around macros.xml; updated README.md
Luis de la Garza
7 years ago
0 | #!/usr/bin/env python | |
1 | # encoding: utf-8 | |
2 | ||
3 | """ | |
4 | @author: delagarza | |
5 | """ | |
6 | ||
7 | ||
8 | import sys | |
9 | import os | |
10 | import traceback | |
11 | import ntpath | |
12 | import string | |
13 | ||
14 | from argparse import ArgumentParser | |
15 | from argparse import RawDescriptionHelpFormatter | |
16 | from CTDopts.CTDopts import CTDModel, _InFile, _OutFile, ParameterGroup, _Choices, _NumericRange, \ | |
17 | _FileFormat, ModelError | |
18 | from collections import OrderedDict | |
19 | from string import strip | |
20 | from lxml import etree | |
21 | from lxml.etree import SubElement, Element, ElementTree, ParseError, parse | |
22 | ||
23 | __all__ = [] | |
24 | __version__ = 1.0 | |
25 | __date__ = '2014-09-17' | |
26 | __updated__ = '2016-05-09' | |
27 | ||
28 | MESSAGE_INDENTATION_INCREMENT = 2 | |
29 | ||
30 | TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'data', | |
31 | _OutFile: 'data', _Choices: 'select'} | |
32 | ||
33 | STDIO_MACRO_NAME = "stdio" | |
34 | REQUIREMENTS_MACRO_NAME = "requirements" | |
35 | ADVANCED_OPTIONS_MACRO_NAME = "advanced_options" | |
36 | ||
37 | REQUIRED_MACROS = [STDIO_MACRO_NAME, REQUIREMENTS_MACRO_NAME, ADVANCED_OPTIONS_MACRO_NAME] | |
38 | ||
39 | ||
40 | class CLIError(Exception): | |
41 | # Generic exception to raise and log different fatal errors. | |
42 | def __init__(self, msg): | |
43 | super(CLIError).__init__(type(self)) | |
44 | self.msg = "E: %s" % msg | |
45 | ||
46 | def __str__(self): | |
47 | return self.msg | |
48 | ||
49 | def __unicode__(self): | |
50 | return self.msg | |
51 | ||
52 | ||
53 | class InvalidModelException(ModelError): | |
54 | def __init__(self, message): | |
55 | super(InvalidModelException, self).__init__() | |
56 | self.message = message | |
57 | ||
58 | def __str__(self): | |
59 | return self.message | |
60 | ||
61 | def __repr__(self): | |
62 | return self.message | |
63 | ||
64 | ||
65 | class ApplicationException(Exception): | |
66 | def __init__(self, msg): | |
67 | super(ApplicationException).__init__(type(self)) | |
68 | self.msg = msg | |
69 | ||
70 | def __str__(self): | |
71 | return self.msg | |
72 | ||
73 | def __unicode__(self): | |
74 | return self.msg | |
75 | ||
76 | ||
77 | class ExitCode: | |
78 | def __init__(self, code_range="", level="", description=None): | |
79 | self.range = code_range | |
80 | self.level = level | |
81 | self.description = description | |
82 | ||
83 | ||
84 | class DataType: | |
85 | def __init__(self, extension, galaxy_extension=None, galaxy_type=None, mimetype=None): | |
86 | self.extension = extension | |
87 | self.galaxy_extension = galaxy_extension | |
88 | self.galaxy_type = galaxy_type | |
89 | self.mimetype = mimetype | |
90 | ||
91 | ||
92 | class ParameterHardcoder: | |
93 | def __init__(self): | |
94 | # map whose keys are the composite names of tools and parameters in the following pattern: | |
95 | # [ToolName][separator][ParameterName] -> HardcodedValue | |
96 | # if the parameter applies to all tools, then the following pattern is used: | |
97 | # [ParameterName] -> HardcodedValue | |
98 | ||
99 | # examples (assuming separator is '#'): | |
100 | # threads -> 24 | |
101 | # XtandemAdapter#adapter -> xtandem.exe | |
102 | # adapter -> adapter.exe | |
103 | self.separator = "!" | |
104 | self.parameter_map = {} | |
105 | ||
106 | # the most specific value will be returned in case of overlap | |
107 | def get_hardcoded_value(self, parameter_name, tool_name): | |
108 | # look for the value that would apply for all tools | |
109 | generic_value = self.parameter_map.get(parameter_name, None) | |
110 | specific_value = self.parameter_map.get(self.build_key(parameter_name, tool_name), None) | |
111 | if specific_value is not None: | |
112 | return specific_value | |
113 | ||
114 | return generic_value | |
115 | ||
116 | def register_parameter(self, parameter_name, parameter_value, tool_name=None): | |
117 | self.parameter_map[self.build_key(parameter_name, tool_name)] = parameter_value | |
118 | ||
119 | def build_key(self, parameter_name, tool_name): | |
120 | if tool_name is None: | |
121 | return parameter_name | |
122 | return "%s%s%s" % (parameter_name, self.separator, tool_name) | |
123 | ||
124 | ||
125 | def main(argv=None): # IGNORE:C0111 | |
126 | # Command line options. | |
127 | if argv is None: | |
128 | argv = sys.argv | |
129 | else: | |
130 | sys.argv.extend(argv) | |
131 | ||
132 | program_version = "v%s" % __version__ | |
133 | program_build_date = str(__updated__) | |
134 | program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) | |
135 | program_short_description = "CTD2Galaxy - A project from the GenericWorkflowNodes family " \ | |
136 | "(https://github.com/orgs/genericworkflownodes)" | |
137 | program_usage = ''' | |
138 | USAGE: | |
139 | ||
140 | I - Parsing a single CTD file and generate a Galaxy wrapper: | |
141 | ||
142 | $ python generator.py -i input.ctd -o output.xml | |
143 | ||
144 | ||
145 | II - Parsing all found CTD files (files with .ctd and .xml extension) in a given folder and | |
146 | output converted Galaxy wrappers in a given folder: | |
147 | ||
148 | $ python generator.py -i /home/user/*.ctd -o /home/user/galaxywrappers | |
149 | ||
150 | ||
151 | III - Providing file formats, mimetypes | |
152 | ||
153 | Galaxy supports the concept of file format in order to connect compatible ports, that is, input ports of a certain | |
154 | data format will be able to receive data from a port from the same format. This converter allows you to provide | |
155 | a personalized file in which you can relate the CTD data formats with supported Galaxy data formats. The layout of | |
156 | this file consists of lines, each of either one or four columns separated by any amount of whitespace. The content | |
157 | of each column is as follows: | |
158 | ||
159 | * 1st column: file extension | |
160 | * 2nd column: data type, as listed in Galaxy | |
161 | * 3rd column: full-named Galaxy data type, as it will appear on datatypes_conf.xml | |
162 | * 4th column: mimetype (optional) | |
163 | ||
164 | The following is an example of a valid "file formats" file: | |
165 | ||
166 | ########################################## FILE FORMATS example ########################################## | |
167 | # Every line starting with a # will be handled as a comment and will not be parsed. | |
168 | # The first column is the file format as given in the CTD and second column is the Galaxy data format. | |
169 | # The second, third, fourth and fifth column can be left empty if the data type has already been registered | |
170 | # in Galaxy, otherwise, all but the mimetype must be provided. | |
171 | ||
172 | # CTD type # Galaxy type # Long Galaxy data type # Mimetype | |
173 | csv tabular galaxy.datatypes.data:Text | |
174 | fasta | |
175 | ini txt galaxy.datatypes.data:Text | |
176 | txt | |
177 | idxml txt galaxy.datatypes.xml:GenericXml application/xml | |
178 | options txt galaxy.datatypes.data:Text | |
179 | grid grid galaxy.datatypes.data:Grid | |
180 | ||
181 | ########################################################################################################## | |
182 | ||
183 | Note that each line consists precisely of either one, three or four columns. In the case of data types already | |
184 | registered in Galaxy (such as fasta and txt in the above example), only the first column is needed. In the case of | |
185 | data types that haven't been yet registered in Galaxy, the first three columns are needed (mimetype is optional). | |
186 | ||
187 | For information about Galaxy data types and subclasses, see the following page: | |
188 | https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes | |
189 | ||
190 | ||
191 | IV - Hardcoding parameters | |
192 | ||
193 | It is possible to hardcode parameters. This makes sense if you want to set a tool in Galaxy in 'quiet' mode or if | |
194 | your tools support multi-threading and accept the number of threads via a parameter, without giving the end user the | |
195 | chance to change the values for these parameters. | |
196 | ||
197 | In order to generate hardcoded parameters, you need to provide a simple file. Each line of this file contains two | |
198 | or three columns separated by whitespace. Any line starting with a '#' will be ignored. The first column contains | |
199 | the name of the parameter, the second column contains the value that will always be set for this parameter. The | |
200 | first two columns are mandatory. | |
201 | ||
202 | If the parameter is to be hardcoded only for a set of tools, then a third column can be added. This column includes | |
203 | a comma-separated list of tool names for which the parameter will be hardcoded. If a third column is not included, | |
204 | then all processed tools containing the given parameter will get a hardcoded value for it. | |
205 | ||
206 | The following is an example of a valid file: | |
207 | ||
208 | ##################################### HARDCODED PARAMETERS example ##################################### | |
209 | # Every line starting with a # will be handled as a comment and will not be parsed. | |
210 | # The first column is the name of the parameter and the second column is the value that will be used. | |
211 | ||
212 | # Parameter name # Value # Tool(s) | |
213 | threads \${GALAXY_SLOTS:-24} | |
214 | mode quiet | |
215 | xtandem_executable xtandem XTandemAdapter | |
216 | verbosity high Foo, Bar | |
217 | ||
218 | ######################################################################################################### | |
219 | ||
220 | Using the above file will produce a <command> similar to: | |
221 | ||
222 | [tool_name] ... -threads \${GALAXY_SLOTS:-24} -mode quiet ... | |
223 | ||
224 | For all tools. For XTandemAdapter, the <command> will be similar to: | |
225 | ||
226 | XtandemAdapter ... -threads \${GALAXY_SLOTS:-24} -mode quiet -xtandem_executable xtandem ... | |
227 | ||
228 | And for tools Foo and Bar, the <command> will be similar to: | |
229 | ||
230 | Foo ... ... -threads \${GALAXY_SLOTS:-24} -mode quiet -verbosity high ... | |
231 | ||
232 | ||
233 | V - Control which tools will be converted | |
234 | ||
235 | Sometimes only a subset of CTDs needs to be converted. It is possible to either explicitly specify which tools will | |
236 | be converted or which tools will not be converted. | |
237 | ||
238 | The value of the -s/--skip-tools parameter is a file in which each line will be interpreted as the name of a tool | |
239 | that will not be converted. Conversely, the value of the -r/--required-tools is a file in which each line will be | |
240 | interpreted as a tool that is required. Only one of these parameters can be specified at a given time. | |
241 | ||
242 | The format of both files is exactly the same. As stated before, each line will be interpreted as the name of a tool; | |
243 | any line starting with a '#' will be ignored. | |
244 | ||
245 | ''' | |
246 | program_license = '''%(short_description)s | |
247 | Copyright 2015, Luis de la Garza | |
248 | ||
249 | Licensed under the Apache License, Version 2.0 (the "License"); | |
250 | you may not use this file except in compliance with the License. | |
251 | You may obtain a copy of the License at | |
252 | ||
253 | http://www.apache.org/licenses/LICENSE-2.0 | |
254 | ||
255 | Unless required by applicable law or agreed to in writing, software | |
256 | distributed under the License is distributed on an "AS IS" BASIS, | |
257 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
258 | See the License for the specific language governing permissions and | |
259 | limitations under the License. | |
260 | ||
261 | %(usage)s | |
262 | ''' % {'short_description': program_short_description, 'usage': program_usage} | |
263 | ||
264 | try: | |
265 | # Setup argument parser | |
266 | parser = ArgumentParser(prog="CTD2Galaxy", description=program_license, | |
267 | formatter_class=RawDescriptionHelpFormatter, add_help=True) | |
268 | parser.add_argument("-i", "--input", dest="input_files", default=[], required=True, nargs="+", action="append", | |
269 | help="List of CTD files to convert.") | |
270 | parser.add_argument("-o", "--output-destination", dest="output_destination", required=True, | |
271 | help="If multiple input files are given, then a folder in which all generated " | |
272 | "XMLs will be generated is expected;" | |
273 | "if a single input file is given, then a destination file is expected.") | |
274 | parser.add_argument("-f", "--formats-file", dest="formats_file", | |
275 | help="File containing the supported file formats. Run with '-h' or '--help' to see a " | |
276 | "brief example on the layout of this file.", default=None, required=False) | |
277 | parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line", | |
278 | help="Adds content to the command line", default="", required=False) | |
279 | parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination", | |
280 | help="Specify the location of a datatypes_conf.xml to modify and add the registered " | |
281 | "data types. If the provided destination does not exist, a new file will be created.", | |
282 | default=None, required=False) | |
283 | parser.add_argument("-x", "--default-executable-path", dest="default_executable_path", | |
284 | help="Use this executable path when <executablePath> is not present in the CTD", | |
285 | default=None, required=False) | |
286 | parser.add_argument("-b", "--blacklist-parameters", dest="blacklisted_parameters", default=[], nargs="+", action="append", | |
287 | help="List of parameters that will be ignored and won't appear on the galaxy stub", | |
288 | required=False) | |
289 | parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False, | |
290 | help="Default category to use for tools lacking a category when generating tool_conf.xml") | |
291 | parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False, | |
292 | help="Specify the location of an existing tool_conf.xml that will be modified to include " | |
293 | "the converted tools. If the provided destination does not exist, a new file will" | |
294 | "be created.") | |
295 | parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False, | |
296 | help="The path that will be prepended to the file names when generating tool_conf.xml") | |
297 | parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False, | |
298 | help="Each line of the file will be interpreted as a tool name that needs translation. " | |
299 | "Run with '-h' or '--help' to see a brief example on the format of this file.") | |
300 | parser.add_argument("-s", "--skip-tools", dest="skip_tools_file", default=None, required=False, | |
301 | help="File containing a list of tools for which a Galaxy stub will not be generated. " | |
302 | "Run with '-h' or '--help' to see a brief example on the format of this file.") | |
303 | parser.add_argument("-m", "--macros", dest="macros_files", default=[], nargs="+", action="append", | |
304 | help="Import the additional given file(s) as macros. The macros stdio, requirements and" | |
305 | "advanced_options are required. Please see sample_files/macros.xml for an example" | |
306 | "of a valid macros file. All defined macros will be imported.", | |
307 | required=True) | |
308 | parser.add_argument("-p", "--hardcoded-parameters", dest="hardcoded_parameters", default=None, required=False, | |
309 | help="File containing hardcoded values for the given parameters. Run with '-h' or '--help' " | |
310 | "to see a brief example on the format of this file.") | |
311 | # TODO: add verbosity, maybe? | |
312 | parser.add_argument("-V", "--version", action='version', version=program_version_message) | |
313 | ||
314 | # Process arguments | |
315 | args = parser.parse_args() | |
316 | ||
317 | # validate and prepare the passed arguments | |
318 | validate_and_prepare_args(args) | |
319 | ||
320 | # extract the names of the macros and check that we have found the ones we need | |
321 | macros_file_names = args.macros_files | |
322 | macros_to_expand = parse_macros_files(macros_file_names) | |
323 | ||
324 | # parse the given supported file-formats file | |
325 | supported_file_formats = parse_file_formats(args.formats_file) | |
326 | ||
327 | # parse the hardcoded parameters file¬ | |
328 | parameter_hardcoder = parse_hardcoded_parameters(args.hardcoded_parameters) | |
329 | ||
330 | # parse the skip/required tools files | |
331 | skip_tools = parse_tools_list_file(args.skip_tools_file) | |
332 | required_tools = parse_tools_list_file(args.required_tools_file) | |
333 | ||
334 | #if verbose > 0: | |
335 | # print("Verbose mode on") | |
336 | parsed_models = convert(args.input_files, | |
337 | args.output_destination, | |
338 | supported_file_formats=supported_file_formats, | |
339 | default_executable_path=args.default_executable_path, | |
340 | add_to_command_line=args.add_to_command_line, | |
341 | blacklisted_parameters=args.blacklisted_parameters, | |
342 | required_tools=required_tools, | |
343 | skip_tools=skip_tools, | |
344 | macros_file_names=macros_file_names, | |
345 | macros_to_expand=macros_to_expand, | |
346 | parameter_hardcoder=parameter_hardcoder) | |
347 | ||
348 | #TODO: add some sort of warning if a macro that doesn't exist is to be expanded | |
349 | ||
350 | # it is not needed to copy the macros files, since the user has provided them | |
351 | ||
352 | # generation of galaxy stubs is ready... now, let's see if we need to generate a tool_conf.xml | |
353 | if args.tool_conf_destination is not None: | |
354 | generate_tool_conf(parsed_models, args.tool_conf_destination, | |
355 | args.galaxy_tool_path, args.default_category) | |
356 | ||
357 | # now datatypes_conf.xml | |
358 | if args.data_types_destination is not None: | |
359 | generate_data_type_conf(supported_file_formats, args.data_types_destination) | |
360 | ||
361 | return 0 | |
362 | ||
363 | except KeyboardInterrupt: | |
364 | # handle keyboard interrupt | |
365 | return 0 | |
366 | except ApplicationException, e: | |
367 | error("CTD2Galaxy could not complete the requested operation.", 0) | |
368 | error("Reason: " + e.msg, 0) | |
369 | return 1 | |
370 | except ModelError, e: | |
371 | error("There seems to be a problem with one of your input CTDs.", 0) | |
372 | error("Reason: " + e.msg, 0) | |
373 | return 1 | |
374 | except Exception, e: | |
375 | traceback.print_exc() | |
376 | return 2 | |
377 | ||
378 | ||
379 | def parse_tools_list_file(tools_list_file): | |
380 | tools_list = None | |
381 | if tools_list_file is not None: | |
382 | tools_list = [] | |
383 | with open(tools_list_file) as f: | |
384 | for line in f: | |
385 | if line is None or not line.strip() or line.strip().startswith("#"): | |
386 | continue | |
387 | else: | |
388 | tools_list.append(line.strip()) | |
389 | ||
390 | return tools_list | |
391 | ||
392 | ||
393 | def parse_macros_files(macros_file_names): | |
394 | macros_to_expand = set() | |
395 | ||
396 | for macros_file_name in macros_file_names: | |
397 | try: | |
398 | macros_file = open(macros_file_name) | |
399 | root = parse(macros_file).getroot() | |
400 | for xml_element in root.findall("xml"): | |
401 | name = xml_element.attrib["name"] | |
402 | if name in macros_to_expand: | |
403 | warning("Macro %s has already been found. Duplicate found in file %s." % | |
404 | (name, macros_file_name), 0) | |
405 | else: | |
406 | macros_to_expand.add(name) | |
407 | except ParseError, e: | |
408 | raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " + | |
409 | str(e)) | |
410 | except IOError, e: | |
411 | raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " + | |
412 | str(e)) | |
413 | ||
414 | # we depend on "stdio", "requirements" and "advanced_options" to exist on all the given macros files | |
415 | missing_needed_macros = [] | |
416 | for required_macro in REQUIRED_MACROS: | |
417 | if required_macro not in macros_to_expand: | |
418 | missing_needed_macros.append(required_macro) | |
419 | ||
420 | if missing_needed_macros: | |
421 | raise ApplicationException( | |
422 | "The following required macro(s) were not found in any of the given macros files: %s, " | |
423 | "see sample_files/macros.xml for an example of a valid macros file." | |
424 | % ", ".join(missing_needed_macros)) | |
425 | ||
426 | # we do not need to "expand" the advanced_options macro | |
427 | macros_to_expand.remove(ADVANCED_OPTIONS_MACRO_NAME) | |
428 | return macros_to_expand | |
429 | ||
430 | def parse_hardcoded_parameters(hardcoded_parameters_file): | |
431 | parameter_hardcoder = ParameterHardcoder() | |
432 | if hardcoded_parameters_file is not None: | |
433 | line_number = 0 | |
434 | with open(hardcoded_parameters_file) as f: | |
435 | for line in f: | |
436 | line_number += 1 | |
437 | if line is None or not line.strip() or line.strip().startswith("#"): | |
438 | pass | |
439 | else: | |
440 | # the third column must not be obtained as a whole, and not split | |
441 | parsed_hardcoded_parameter = line.strip().split(None, 2) | |
442 | # valid lines contain two or three columns | |
443 | if len(parsed_hardcoded_parameter) != 2 and len(parsed_hardcoded_parameter) != 3: | |
444 | warning("Invalid line at line number %d of the given hardcoded parameters file. Line will be" | |
445 | "ignored:\n%s" % (line_number, line), 0) | |
446 | continue | |
447 | ||
448 | parameter_name = parsed_hardcoded_parameter[0] | |
449 | hardcoded_value = parsed_hardcoded_parameter[1] | |
450 | tool_names = None | |
451 | if len(parsed_hardcoded_parameter) == 3: | |
452 | tool_names = parsed_hardcoded_parameter[2].split(',') | |
453 | if tool_names: | |
454 | for tool_name in tool_names: | |
455 | parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name.strip()) | |
456 | else: | |
457 | parameter_hardcoder.register_parameter(parameter_name, hardcoded_value) | |
458 | ||
459 | return parameter_hardcoder | |
460 | ||
461 | ||
462 | def parse_file_formats(formats_file): | |
463 | supported_formats = {} | |
464 | if formats_file is not None: | |
465 | line_number = 0 | |
466 | with open(formats_file) as f: | |
467 | for line in f: | |
468 | line_number += 1 | |
469 | if line is None or not line.strip() or line.strip().startswith("#"): | |
470 | # ignore (it'd be weird to have something like: | |
471 | # if line is not None and not (not line.strip()) ... | |
472 | pass | |
473 | else: | |
474 | # not an empty line, no comment | |
475 | # strip the line and split by whitespace | |
476 | parsed_formats = line.strip().split() | |
477 | # valid lines contain either one or four columns | |
478 | if not (len(parsed_formats) == 1 or len(parsed_formats) == 3 or len(parsed_formats) == 4): | |
479 | warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" % | |
480 | (line_number, line), 0) | |
481 | # ignore the line | |
482 | continue | |
483 | elif len(parsed_formats) == 1: | |
484 | supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[0]) | |
485 | else: | |
486 | mimetype = None | |
487 | # check if mimetype was provided | |
488 | if len(parsed_formats) == 4: | |
489 | mimetype = parsed_formats[3] | |
490 | supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[1], | |
491 | parsed_formats[2], mimetype) | |
492 | return supported_formats | |
493 | ||
494 | ||
495 | def validate_and_prepare_args(args): | |
496 | # check that only one of skip_tools_file and required_tools_file has been provided | |
497 | if args.skip_tools_file is not None and args.required_tools_file is not None: | |
498 | raise ApplicationException( | |
499 | "You have provided both a file with tools to ignore and a file with required tools.\n" | |
500 | "Only one of -s/--skip-tools, -r/--required-tools can be provided.") | |
501 | ||
502 | # first, we convert all list of lists in args to flat lists | |
503 | lists_to_flatten = ["input_files", "blacklisted_parameters", "macros_files"] | |
504 | for list_to_flatten in lists_to_flatten: | |
505 | setattr(args, list_to_flatten, [item for sub_list in getattr(args, list_to_flatten) for item in sub_list]) | |
506 | ||
507 | # if input is a single file, we expect output to be a file (and not a dir that already exists) | |
508 | if len(args.input_files) == 1: | |
509 | if os.path.isdir(args.output_destination): | |
510 | raise ApplicationException("If a single input file is provided, output (%s) is expected to be a file " | |
511 | "and not a folder.\n" % args.output_destination) | |
512 | ||
513 | # if input is a list of files, we expect output to be a folder | |
514 | if len(args.input_files) > 1: | |
515 | if not os.path.isdir(args.output_destination): | |
516 | raise ApplicationException("If several input files are provided, output (%s) is expected to be an " | |
517 | "existing directory.\n" % args.output_destination) | |
518 | ||
519 | # check that the provided input files, if provided, contain a valid file path | |
520 | input_variables_to_check = ["skip_tools_file", "required_tools_file", "macros_files", | |
521 | "input_files", "formats_file", "hardcoded_parameters"] | |
522 | ||
523 | for variable_name in input_variables_to_check: | |
524 | paths_to_check = [] | |
525 | # check if we are handling a single file or a list of files | |
526 | member_value = getattr(args, variable_name) | |
527 | if member_value is not None: | |
528 | if isinstance(member_value, list): | |
529 | for file_name in member_value: | |
530 | paths_to_check.append(strip(str(file_name))) | |
531 | else: | |
532 | paths_to_check.append(strip(str(member_value))) | |
533 | ||
534 | for path_to_check in paths_to_check: | |
535 | if not os.path.isfile(path_to_check) or not os.path.exists(path_to_check): | |
536 | raise ApplicationException( | |
537 | "The provided input file (%s) does not exist or is not a valid file path." | |
538 | % path_to_check) | |
539 | ||
540 | # check that the provided output files, if provided, contain a valid file path (i.e., not a folder) | |
541 | output_variables_to_check = ["data_types_destination", "tool_conf_destination"] | |
542 | ||
543 | for variable_name in output_variables_to_check: | |
544 | file_name = getattr(args, variable_name) | |
545 | if file_name is not None and os.path.isdir(file_name): | |
546 | raise ApplicationException("The provided output file name (%s) points to a directory." % file_name) | |
547 | ||
548 | ||
549 | def convert(input_files, output_destination, **kwargs): | |
550 | # first, generate a model | |
551 | is_converting_multiple_ctds = len(input_files) > 1 | |
552 | parsed_models = [] | |
553 | for input_file in input_files: | |
554 | try: | |
555 | model = CTDModel(from_file=input_file) | |
556 | except Exception, e: | |
557 | error(str(e), 1) | |
558 | continue | |
559 | ||
560 | if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]: | |
561 | info("Skipping tool %s" % model.name, 0) | |
562 | continue | |
563 | elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]: | |
564 | info("Tool %s is not required, skipping it" % model.name, 0) | |
565 | continue | |
566 | else: | |
567 | info("Converting from %s " % input_file, 0) | |
568 | tool = create_tool(model) | |
569 | write_header(tool, model) | |
570 | create_description(tool, model) | |
571 | expand_macros(tool, model, **kwargs) | |
572 | create_command(tool, model, **kwargs) | |
573 | create_inputs(tool, model, **kwargs) | |
574 | create_outputs(tool, model, **kwargs) | |
575 | create_help(tool, model) | |
576 | ||
577 | # finally, serialize the tool | |
578 | output_file = output_destination | |
579 | # if multiple inputs are being converted, | |
580 | # then we need to generate a different output_file for each input | |
581 | if is_converting_multiple_ctds: | |
582 | output_file = os.path.join(output_file, get_filename_without_suffix(input_file) + ".xml") | |
583 | # wrap our tool element into a tree to be able to serialize it | |
584 | tree = ElementTree(tool) | |
585 | tree.write(open(output_file, 'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) | |
586 | # let's use model to hold the name of the output file | |
587 | parsed_models.append([model, get_filename(output_file)]) | |
588 | ||
589 | return parsed_models | |
590 | ||
591 | ||
592 | def write_header(tool, model): | |
593 | tool.addprevious(etree.Comment( | |
594 | "This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). " | |
595 | "This file was automatically generated using CTD2Galaxy.")) | |
596 | tool.addprevious(etree.Comment('Proposed Tool Section: [%s]' % model.opt_attribs.get("category", ""))) | |
597 | ||
598 | ||
599 | def generate_tool_conf(parsed_models, tool_conf_destination, galaxy_tool_path, default_category): | |
600 | # for each category, we keep a list of models corresponding to it | |
601 | categories_to_tools = dict() | |
602 | for model in parsed_models: | |
603 | category = strip(model[0].opt_attribs.get("category", "")) | |
604 | if not category.strip(): | |
605 | category = default_category | |
606 | if category not in categories_to_tools: | |
607 | categories_to_tools[category] = [] | |
608 | categories_to_tools[category].append(model[1]) | |
609 | ||
610 | # at this point, we should have a map for all categories->tools | |
611 | toolbox_node = Element("toolbox") | |
612 | ||
613 | if galaxy_tool_path is not None and not galaxy_tool_path.strip().endswith("/"): | |
614 | galaxy_tool_path = galaxy_tool_path.strip() + "/" | |
615 | if galaxy_tool_path is None: | |
616 | galaxy_tool_path = "" | |
617 | ||
618 | for category, file_names in categories_to_tools.iteritems(): | |
619 | section_node = add_child_node(toolbox_node, "section") | |
620 | section_node.attrib["id"] = "section-id-" + "".join(category.split()) | |
621 | section_node.attrib["name"] = category | |
622 | ||
623 | for filename in file_names: | |
624 | tool_node = add_child_node(section_node, "tool") | |
625 | tool_node.attrib["file"] = galaxy_tool_path + filename | |
626 | ||
627 | toolconf_tree = ElementTree(toolbox_node) | |
628 | toolconf_tree.write(open(tool_conf_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) | |
629 | info("Generated Galaxy tool_conf.xml in %s" % tool_conf_destination, 0) | |
630 | ||
631 | ||
632 | def generate_data_type_conf(supported_file_formats, data_types_destination): | |
633 | data_types_node = Element("datatypes") | |
634 | registration_node = add_child_node(data_types_node, "registration") | |
635 | registration_node.attrib["converters_path"] = "lib/galaxy/datatypes/converters" | |
636 | registration_node.attrib["display_path"] = "display_applications" | |
637 | ||
638 | for format_name in supported_file_formats: | |
639 | data_type = supported_file_formats[format_name] | |
640 | # add only if it's a data type that does not exist in Galaxy | |
641 | if data_type.galaxy_type is not None: | |
642 | data_type_node = add_child_node(registration_node, "datatype") | |
643 | # we know galaxy_extension is not None | |
644 | data_type_node.attrib["extension"] = data_type.galaxy_extension | |
645 | data_type_node.attrib["type"] = data_type.galaxy_type | |
646 | if data_type.mimetype is not None: | |
647 | data_type_node.attrib["mimetype"] = data_type.mimetype | |
648 | ||
649 | data_types_tree = ElementTree(data_types_node) | |
650 | data_types_tree.write(open(data_types_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) | |
651 | info("Generated Galaxy datatypes_conf.xml in %s" % data_types_destination, 0) | |
652 | ||
653 | ||
654 | # taken from | |
655 | # http://stackoverflow.com/questions/8384737/python-extract-file-name-from-path-no-matter-what-the-os-path-format | |
656 | def get_filename(path): | |
657 | head, tail = ntpath.split(path) | |
658 | return tail or ntpath.basename(head) | |
659 | ||
660 | ||
661 | def get_filename_without_suffix(path): | |
662 | root, ext = os.path.splitext(os.path.basename(path)) | |
663 | return root | |
664 | ||
665 | ||
666 | def create_tool(model): | |
667 | return Element("tool", OrderedDict([("id", model.name), ("name", model.name), ("version", model.version)])) | |
668 | ||
669 | ||
670 | def create_description(tool, model): | |
671 | if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None: | |
672 | description = SubElement(tool,"description") | |
673 | description.text = model.opt_attribs["description"] | |
674 | ||
675 | ||
676 | def get_param_name(param): | |
677 | # we generate parameters with colons for subgroups, but not for the topmost parents (OpenMS legacy) | |
678 | if type(param.parent) == ParameterGroup and param.parent.parent != None: | |
679 | return get_param_name(param.parent) + ":" + resolve_param_mapping(param) | |
680 | else: | |
681 | return resolve_param_mapping(param) | |
682 | ||
683 | ||
684 | # some parameters are mapped to command line options, this method helps resolve those mappings, if any | |
685 | # TODO: implement mapping of parameters!!! | |
686 | def resolve_param_mapping(param): | |
687 | return param.name | |
688 | ||
689 | ||
690 | def create_command(tool, model, **kwargs): | |
691 | final_command = get_tool_executable_path(model, kwargs["default_executable_path"]) + '\n' | |
692 | final_command += kwargs["add_to_command_line"] + '\n' | |
693 | advanced_command_start = "#if $adv_opts.adv_opts_selector=='advanced':\n" | |
694 | advanced_command_end = '#end if' | |
695 | advanced_command = '' | |
696 | parameter_hardcoder = kwargs["parameter_hardcoder"] | |
697 | ||
698 | found_output_parameter = False | |
699 | for param in extract_parameters(model): | |
700 | if param.type is _OutFile: | |
701 | found_output_parameter = True | |
702 | command = '' | |
703 | param_name = get_param_name(param) | |
704 | ||
705 | if param.name in kwargs["blacklisted_parameters"]: | |
706 | continue | |
707 | ||
708 | hardcoded_value = parameter_hardcoder.get_hardcoded_value(param_name, model.name) | |
709 | if hardcoded_value: | |
710 | command += '-%s %s\n' % (param_name, hardcoded_value) | |
711 | else: | |
712 | # parameter is neither blacklisted nor hardcoded... | |
713 | galaxy_parameter_name = get_galaxy_parameter_name(param) | |
714 | repeat_galaxy_parameter_name = get_repeat_galaxy_parameter_name(param) | |
715 | ||
716 | # logic for ITEMLISTs | |
717 | if param.is_list: | |
718 | if param.type is _InFile: | |
719 | command += "-" + str(param_name) + "\n" | |
720 | command += " #for token in $" + galaxy_parameter_name + ":\n" | |
721 | command += " $token\n" | |
722 | command += " #end for\n" | |
723 | else: | |
724 | command += "\n#if $" + repeat_galaxy_parameter_name + ":\n" | |
725 | command += "-" + str(param_name) + "\n" | |
726 | command += " #for token in $" + repeat_galaxy_parameter_name + ":\n" | |
727 | command += " #if \" \" in str(token):\n" | |
728 | command += " \"$token." + galaxy_parameter_name + "\"\n" | |
729 | command += " #else\n" | |
730 | command += " $token." + galaxy_parameter_name + "\n" | |
731 | command += " #end if\n" | |
732 | command += " #end for\n" | |
733 | command += "#end if\n" | |
734 | # logic for other ITEMs | |
735 | else: | |
736 | if param.advanced and param.type is not _OutFile: | |
737 | actual_parameter = "$adv_opts.%s" % galaxy_parameter_name | |
738 | else: | |
739 | actual_parameter = "$%s" % galaxy_parameter_name | |
740 | ## if whitespace_validation has been set, we need to generate, for each parameter: | |
741 | ## #if str( $t ).split() != '': | |
742 | ## -t "$t" | |
743 | ## #end if | |
744 | ## TODO only useful for text fields, integers or floats | |
745 | ## not useful for choices, input fields ... | |
746 | ||
747 | if not is_boolean_parameter(param) and type(param.restrictions) is _Choices : | |
748 | command += "#if " + actual_parameter + ":\n" | |
749 | command += ' -%s\n' % param_name | |
750 | command += " #if \" \" in str(" + actual_parameter + "):\n" | |
751 | command += " \"" + actual_parameter + "\"\n" | |
752 | command += " #else\n" | |
753 | command += " " + actual_parameter + "\n" | |
754 | command += " #end if\n" | |
755 | command += "#end if\n" | |
756 | elif is_boolean_parameter(param): | |
757 | command += "#if " + actual_parameter + ":\n" | |
758 | command += ' -%s\n' % param_name | |
759 | command += "#end if\n" | |
760 | elif TYPE_TO_GALAXY_TYPE[param.type] is 'text': | |
761 | command += "#if " + actual_parameter + ":\n" | |
762 | command += " -%s " % param_name | |
763 | command += " \"" + actual_parameter + "\"\n" | |
764 | command += "#end if\n" | |
765 | else: | |
766 | command += "#if " + actual_parameter + ":\n" | |
767 | command += ' -%s ' % param_name | |
768 | command += actual_parameter + "\n" | |
769 | command += "#end if\n" | |
770 | ||
771 | if param.advanced and param.type is not _OutFile: | |
772 | advanced_command += " %s" % command | |
773 | else: | |
774 | final_command += command | |
775 | ||
776 | if advanced_command: | |
777 | final_command += "%s%s%s\n" % (advanced_command_start, advanced_command, advanced_command_end) | |
778 | ||
779 | if not found_output_parameter: | |
780 | final_command += "> $param_stdout\n" | |
781 | ||
782 | command_node = add_child_node(tool, "command") | |
783 | command_node.text = final_command | |
784 | ||
785 | ||
786 | # creates the xml elements needed to import the needed macros files | |
787 | # and to "expand" the macros | |
788 | def expand_macros(tool, model, **kwargs): | |
789 | macros_node = add_child_node(tool, "macros") | |
790 | token_node = add_child_node(macros_node, "token") | |
791 | token_node.attrib["name"] = "@EXECUTABLE@" | |
792 | token_node.text = get_tool_executable_path(model, kwargs["default_executable_path"]) | |
793 | ||
794 | # add <import> nodes | |
795 | for macro_file_name in kwargs["macros_file_names"]: | |
796 | macro_file = open(macro_file_name) | |
797 | import_node = add_child_node(macros_node, "import") | |
798 | # do not add the path of the file, rather, just its basename | |
799 | import_node.text = os.path.basename(macro_file.name) | |
800 | ||
801 | # add <expand> nodes | |
802 | for expand_macro in kwargs["macros_to_expand"]: | |
803 | expand_node = add_child_node(tool, "expand") | |
804 | expand_node.attrib["macro"] = expand_macro | |
805 | ||
806 | ||
807 | def get_tool_executable_path(model, default_executable_path): | |
808 | # rules to build the galaxy executable path: | |
809 | # if executablePath is null, then use default_executable_path and store it in executablePath | |
810 | # if executablePath is null and executableName is null, then the name of the tool will be used | |
811 | # if executablePath is null and executableName is not null, then executableName will be used | |
812 | # if executablePath is not null and executableName is null, | |
813 | # then executablePath and the name of the tool will be used | |
814 | # if executablePath is not null and executableName is not null, then both will be used | |
815 | ||
816 | # first, check if the model has executablePath / executableName defined | |
817 | executable_path = model.opt_attribs.get("executablePath", None) | |
818 | executable_name = model.opt_attribs.get("executableName", None) | |
819 | ||
820 | # check if we need to use the default_executable_path | |
821 | if executable_path is None: | |
822 | executable_path = default_executable_path | |
823 | ||
824 | # fix the executablePath to make sure that there is a '/' in the end | |
825 | if executable_path is not None: | |
826 | executable_path = executable_path.strip() | |
827 | if not executable_path.endswith('/'): | |
828 | executable_path += '/' | |
829 | ||
830 | # assume that we have all information present | |
831 | command = str(executable_path) + str(executable_name) | |
832 | if executable_path is None: | |
833 | if executable_name is None: | |
834 | command = model.name | |
835 | else: | |
836 | command = executable_name | |
837 | else: | |
838 | if executable_name is None: | |
839 | command = executable_path + model.name | |
840 | return command | |
841 | ||
842 | ||
843 | def get_galaxy_parameter_name(param): | |
844 | return "param_%s" % get_param_name(param).replace(':', '_').replace('-', '_') | |
845 | ||
846 | ||
847 | def get_input_with_same_restrictions(out_param, model, supported_file_formats): | |
848 | for param in extract_parameters(model): | |
849 | if param.type is _InFile: | |
850 | if param.restrictions is not None: | |
851 | in_param_formats = get_supported_file_types(param.restrictions.formats, supported_file_formats) | |
852 | out_param_formats = get_supported_file_types(out_param.restrictions.formats, supported_file_formats) | |
853 | if in_param_formats == out_param_formats: | |
854 | return param | |
855 | ||
856 | ||
857 | def create_inputs(tool, model, **kwargs): | |
858 | inputs_node = SubElement(tool, "inputs") | |
859 | ||
860 | # some suites (such as OpenMS) need some advanced options when handling inputs | |
861 | expand_advanced_node = add_child_node(tool, "expand", OrderedDict([("macro", ADVANCED_OPTIONS_MACRO_NAME)])) | |
862 | parameter_hardcoder = kwargs["parameter_hardcoder"] | |
863 | ||
864 | # treat all non output-file parameters as inputs | |
865 | for param in extract_parameters(model): | |
866 | # no need to show hardcoded parameters | |
867 | hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name) | |
868 | if param.name in kwargs["blacklisted_parameters"] or hardcoded_value: | |
869 | # let's not use an extra level of indentation and use NOP | |
870 | continue | |
871 | if param.type is not _OutFile: | |
872 | if param.advanced: | |
873 | if expand_advanced_node is not None: | |
874 | parent_node = expand_advanced_node | |
875 | else: | |
876 | # something went wrong... we are handling an advanced parameter and the | |
877 | # advanced input macro was not set... inform the user about it | |
878 | info("The parameter %s has been set as advanced, but advanced_input_macro has " | |
879 | "not been set." % param.name, 1) | |
880 | # there is not much we can do, other than use the inputs_node as a parent node! | |
881 | parent_node = inputs_node | |
882 | else: | |
883 | parent_node = inputs_node | |
884 | ||
885 | # for lists we need a repeat tag | |
886 | if param.is_list and param.type is not _InFile: | |
887 | rep_node = add_child_node(parent_node, "repeat") | |
888 | create_repeat_attribute_list(rep_node, param) | |
889 | parent_node = rep_node | |
890 | ||
891 | param_node = add_child_node(parent_node, "param") | |
892 | create_param_attribute_list(param_node, param, kwargs["supported_file_formats"]) | |
893 | ||
894 | # advanced parameter selection should be at the end | |
895 | # and only available if an advanced parameter exists | |
896 | if expand_advanced_node is not None and len(expand_advanced_node) > 0: | |
897 | inputs_node.append(expand_advanced_node) | |
898 | ||
899 | ||
900 | def get_repeat_galaxy_parameter_name(param): | |
901 | return "rep_" + get_galaxy_parameter_name(param) | |
902 | ||
903 | ||
904 | def create_repeat_attribute_list(rep_node, param): | |
905 | rep_node.attrib["name"] = get_repeat_galaxy_parameter_name(param) | |
906 | if param.required: | |
907 | rep_node.attrib["min"] = "1" | |
908 | else: | |
909 | rep_node.attrib["min"] = "0" | |
910 | # for the ITEMLISTs which have LISTITEM children we only | |
911 | # need one parameter as it is given as a string | |
912 | if param.default is not None: | |
913 | rep_node.attrib["max"] = "1" | |
914 | rep_node.attrib["title"] = get_galaxy_parameter_name(param) | |
915 | ||
916 | ||
917 | def create_param_attribute_list(param_node, param, supported_file_formats): | |
918 | param_node.attrib["name"] = get_galaxy_parameter_name(param) | |
919 | ||
920 | param_type = TYPE_TO_GALAXY_TYPE[param.type] | |
921 | if param_type is None: | |
922 | raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s" | |
923 | % {"type": param.type, "name": param.name}) | |
924 | ||
925 | if param.is_list: | |
926 | param_type = "text" | |
927 | ||
928 | if is_selection_parameter(param): | |
929 | param_type = "select" | |
930 | ||
931 | if is_boolean_parameter(param): | |
932 | param_type = "boolean" | |
933 | ||
934 | if param.type is _InFile: | |
935 | # assume it's just text unless restrictions are provided | |
936 | param_format = "text" | |
937 | if param.restrictions is not None: | |
938 | # join all supported_formats for the file... this MUST be a _FileFormat | |
939 | if type(param.restrictions) is _FileFormat: | |
940 | param_format = ','.join(get_supported_file_types(param.restrictions.formats, supported_file_formats)) | |
941 | else: | |
942 | raise InvalidModelException("Expected 'file type' restrictions for input file [%(name)s], " | |
943 | "but instead got [%(type)s]" | |
944 | % {"name": param.name, "type": type(param.restrictions)}) | |
945 | param_node.attrib["type"] = "data" | |
946 | param_node.attrib["format"] = param_format | |
947 | # in the case of multiple input set multiple flag | |
948 | if param.is_list: | |
949 | param_node.attrib["multiple"] = "true" | |
950 | ||
951 | else: | |
952 | param_node.attrib["type"] = param_type | |
953 | ||
954 | # check for parameters with restricted values (which will correspond to a "select" in galaxy) | |
955 | if param.restrictions is not None: | |
956 | # it could be either _Choices or _NumericRange, with special case for boolean types | |
957 | if param_type == "boolean": | |
958 | create_boolean_parameter(param_node, param) | |
959 | elif type(param.restrictions) is _Choices: | |
960 | # create as many <option> elements as restriction values | |
961 | for choice in param.restrictions.choices: | |
962 | option_node = add_child_node(param_node, "option", OrderedDict([("value", str(choice))])) | |
963 | option_node.text = str(choice) | |
964 | ||
965 | elif type(param.restrictions) is _NumericRange: | |
966 | if param.type is not int and param.type is not float: | |
967 | raise InvalidModelException("Expected either 'int' or 'float' in the numeric range restriction for " | |
968 | "parameter [%(name)s], but instead got [%(type)s]" % | |
969 | {"name": param.name, "type": type(param.restrictions)}) | |
970 | # extract the min and max values and add them as attributes | |
971 | # validate the provided min and max values | |
972 | if param.restrictions.n_min is not None: | |
973 | param_node.attrib["min"] = str(param.restrictions.n_min) | |
974 | if param.restrictions.n_max is not None: | |
975 | param_node.attrib["max"] = str(param.restrictions.n_max) | |
976 | elif type(param.restrictions) is _FileFormat: | |
977 | param_node.attrib["format"] = ",".join( | |
978 | get_supported_file_types(param.restrictions.formats, supported_file_formats)) | |
979 | else: | |
980 | raise InvalidModelException("Unrecognized restriction type [%(type)s] for parameter [%(name)s]" | |
981 | % {"type": type(param.restrictions), "name": param.name}) | |
982 | ||
983 | param_node.attrib["optional"] = str(not param.required) | |
984 | ||
985 | if param_type == "text": | |
986 | # add size attribute... this is the length of a textbox field in Galaxy (it could also be 15x2, for instance) | |
987 | param_node.attrib["size"] = "30" | |
988 | # add sanitizer nodes, this is needed for special character like "[" | |
989 | # which are used for example by FeatureFinderMultiplex | |
990 | sanitizer_node = SubElement(param_node, "sanitizer") | |
991 | ||
992 | valid_node = SubElement(sanitizer_node, "valid", OrderedDict([("initial", "string.printable")])) | |
993 | add_child_node(valid_node, "remove", OrderedDict([("value", '\'')])) | |
994 | add_child_node(valid_node, "remove", OrderedDict([("value", '"')])) | |
995 | ||
996 | # check for default value | |
997 | if param.default is not None: | |
998 | if type(param.default) is list: | |
999 | # we ASSUME that a list of parameters looks like: | |
1000 | # $ tool -ignore He Ar Xe | |
1001 | # meaning, that, for example, Helium, Argon and Xenon will be ignored | |
1002 | param_node.attrib["value"] = ' '.join(map(str, param.default)) | |
1003 | ||
1004 | elif param_type != "boolean": | |
1005 | # boolean parameters handle default values by using the "checked" attribute | |
1006 | # there isn't much we can do... just stringify the value | |
1007 | param_node.attrib["value"] = str(param.default) | |
1008 | else: | |
1009 | if param.type is int or param.type is float: | |
1010 | # galaxy requires "value" to be included for int/float | |
1011 | # since no default was included, we need to figure out one in a clever way... but let the user know | |
1012 | # that we are "thinking" for him/her | |
1013 | warning("Generating default value for parameter [%s]. " | |
1014 | "Galaxy requires the attribute 'value' to be set for integer/floats. " | |
1015 | "Edit the CTD file and provide a suitable default value." % param.name, 1) | |
1016 | # check if there's a min/max and try to use them | |
1017 | default_value = None | |
1018 | if param.restrictions is not None: | |
1019 | if type(param.restrictions) is _NumericRange: | |
1020 | default_value = param.restrictions.n_min | |
1021 | if default_value is None: | |
1022 | default_value = param.restrictions.n_max | |
1023 | if default_value is None: | |
1024 | # no min/max provided... just use 0 and see what happens | |
1025 | default_value = 0 | |
1026 | else: | |
1027 | # should never be here, since we have validated this anyway... | |
1028 | # this code is here just for documentation purposes | |
1029 | # however, better safe than sorry! | |
1030 | # (it could be that the code changes and then we have an ugly scenario) | |
1031 | raise InvalidModelException("Expected either a numeric range for parameter [%(name)s], " | |
1032 | "but instead got [%(type)s]" | |
1033 | % {"name": param.name, "type": type(param.restrictions)}) | |
1034 | else: | |
1035 | # no restrictions and no default value provided... | |
1036 | # make up something | |
1037 | default_value = 0 | |
1038 | param_node.attrib["value"] = str(default_value) | |
1039 | ||
1040 | label = "%s parameter" % param.name | |
1041 | help_text = "" | |
1042 | ||
1043 | if param.description is not None: | |
1044 | label, help_text = generate_label_and_help(param.description) | |
1045 | ||
1046 | param_node.attrib["label"] = label | |
1047 | param_node.attrib["help"] = "(-%s)" % param.name + " " + help_text | |
1048 | ||
1049 | ||
1050 | def generate_label_and_help(desc): | |
1051 | label = "" | |
1052 | help_text = "" | |
1053 | # This tag is found in some descriptions | |
1054 | desc = str(desc).replace("#br#", " <br>") | |
1055 | # Get rid of dots in the end | |
1056 | if desc.endswith("."): | |
1057 | desc = desc.rstrip(".") | |
1058 | # Check if first word is a normal word and make it uppercase | |
1059 | if str(desc).find(" ") > -1: | |
1060 | first_word, rest = str(desc).split(" ", 1) | |
1061 | if str(first_word).islower(): | |
1062 | # check if label has a quotient of the form a/b | |
1063 | if first_word.find("/") != 1 : | |
1064 | first_word.capitalize() | |
1065 | desc = first_word + " " + rest | |
1066 | label = desc | |
1067 | ||
1068 | # Try to split the label if it is too long | |
1069 | if len(desc) > 50: | |
1070 | # find an example and put everything before in the label and the e.g. in the help | |
1071 | if desc.find("e.g.") > 1 : | |
1072 | label, help_text = desc.split("e.g.",1) | |
1073 | help_text = "e.g." + help_text | |
1074 | else: | |
1075 | # find the end of the first sentence | |
1076 | # look for ". " because some labels contain .file or something similar | |
1077 | delimiter = "" | |
1078 | if desc.find(". ") > 1 and desc.find("? ") > 1: | |
1079 | if desc.find(". ") < desc.find("? "): | |
1080 | delimiter = ". " | |
1081 | else: | |
1082 | delimiter = "? " | |
1083 | elif desc.find(". ") > 1: | |
1084 | delimiter = ". " | |
1085 | elif desc.find("? ") > 1: | |
1086 | delimiter = "? " | |
1087 | if delimiter != "": | |
1088 | label, help_text = desc.split(delimiter, 1) | |
1089 | ||
1090 | # add the question mark back | |
1091 | if delimiter == "? ": | |
1092 | label += "? " | |
1093 | ||
1094 | # remove all linebreaks | |
1095 | label = label.rstrip().rstrip('<br>').rstrip() | |
1096 | return label, help_text | |
1097 | ||
1098 | ||
1099 | def get_indented_text(text, indentation_level): | |
1100 | return ("%(indentation)s%(text)s" % | |
1101 | {"indentation": " " * (MESSAGE_INDENTATION_INCREMENT * indentation_level), | |
1102 | "text": text}) | |
1103 | ||
1104 | ||
1105 | def warning(warning_text, indentation_level): | |
1106 | sys.stdout.write(get_indented_text("WARNING: %s\n" % warning_text, indentation_level)) | |
1107 | ||
1108 | ||
1109 | def error(error_text, indentation_level): | |
1110 | sys.stderr.write(get_indented_text("ERROR: %s\n" % error_text, indentation_level)) | |
1111 | ||
1112 | ||
1113 | def info(info_text, indentation_level): | |
1114 | sys.stdout.write(get_indented_text("INFO: %s\n" % info_text, indentation_level)) | |
1115 | ||
1116 | ||
1117 | # determines if the given choices are boolean (basically, if the possible values are yes/no, true/false) | |
1118 | def is_boolean_parameter(param): | |
1119 | is_choices = False | |
1120 | if type(param.restrictions) is _Choices: | |
1121 | # for a true boolean experience, we need 2 values | |
1122 | # and also that those two values are either yes/no or true/false | |
1123 | if len(param.restrictions.choices) == 2: | |
1124 | choices = get_lowercase_list(param.restrictions.choices) | |
1125 | if ("yes" in choices and "no" in choices) or ("true" in choices and "false" in choices): | |
1126 | is_choices = True | |
1127 | return is_choices | |
1128 | ||
1129 | ||
1130 | # determines if there are choices for the parameter | |
1131 | def is_selection_parameter(param): | |
1132 | return type(param.restrictions) is _Choices | |
1133 | ||
1134 | ||
1135 | def get_lowercase_list(some_list): | |
1136 | lowercase_list = map(str, some_list) | |
1137 | lowercase_list = map(string.lower, lowercase_list) | |
1138 | lowercase_list = map(strip, lowercase_list) | |
1139 | return lowercase_list | |
1140 | ||
1141 | ||
1142 | # creates a galaxy boolean parameter type | |
1143 | # this method assumes that param has restrictions, and that only two restictions are present | |
1144 | # (either yes/no or true/false) | |
1145 | def create_boolean_parameter(param_node, param): | |
1146 | # first, determine the 'truevalue' and the 'falsevalue' | |
1147 | """TODO: true and false values can be way more than 'true' and 'false' | |
1148 | but for that we need CTD support | |
1149 | """ | |
1150 | # by default, 'true' and 'false' are handled as flags, like the verbose flag (i.e., -v) | |
1151 | true_value = "-%s" % get_param_name(param) | |
1152 | false_value = "" | |
1153 | choices = get_lowercase_list(param.restrictions.choices) | |
1154 | if "yes" in choices: | |
1155 | true_value = "yes" | |
1156 | false_value = "no" | |
1157 | param_node.attrib["truevalue"] = true_value | |
1158 | param_node.attrib["falsevalue"] = false_value | |
1159 | ||
1160 | # set the checked attribute | |
1161 | if param.default is not None: | |
1162 | checked_value = "false" | |
1163 | default = strip(string.lower(param.default)) | |
1164 | if default == "yes" or default == "true": | |
1165 | checked_value = "true" | |
1166 | #attribute_list["checked"] = checked_value | |
1167 | param_node.attrib["checked"] = checked_value | |
1168 | ||
1169 | ||
1170 | def create_outputs(parent, model, **kwargs): | |
1171 | outputs_node = add_child_node(parent, "outputs") | |
1172 | parameter_hardcoder = kwargs["parameter_hardcoder"] | |
1173 | ||
1174 | for param in extract_parameters(model): | |
1175 | ||
1176 | # no need to show hardcoded parameters | |
1177 | hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name) | |
1178 | if param.name in kwargs["blacklisted_parameters"] or hardcoded_value: | |
1179 | # let's not use an extra level of indentation and use NOP | |
1180 | continue | |
1181 | if param.type is _OutFile: | |
1182 | create_output_node(outputs_node, param, model, kwargs["supported_file_formats"]) | |
1183 | ||
1184 | # If there are no outputs defined in the ctd the node will have no children | |
1185 | # and the stdout will be used as output | |
1186 | if len(outputs_node) == 0: | |
1187 | add_child_node(outputs_node, "data", | |
1188 | OrderedDict([("name", "param_stdout"), ("format", "text"), ("label", "Output from stdout")])) | |
1189 | ||
1190 | ||
1191 | def create_output_node(parent, param, model, supported_file_formats): | |
1192 | data_node = add_child_node(parent, "data") | |
1193 | data_node.attrib["name"] = get_galaxy_parameter_name(param) | |
1194 | ||
1195 | data_format = "data" | |
1196 | if param.restrictions is not None: | |
1197 | if type(param.restrictions) is _FileFormat: | |
1198 | # set the first data output node to the first file format | |
1199 | ||
1200 | # check if there are formats that have not been registered yet... | |
1201 | output = "" | |
1202 | for format_name in param.restrictions.formats: | |
1203 | if not format_name in supported_file_formats.keys(): | |
1204 | output += " " + str(format_name) | |
1205 | ||
1206 | # warn only if there's about to complain | |
1207 | if output: | |
1208 | warning("Parameter " + param.name + " has the following unsupported format(s):" + output, 1) | |
1209 | ||
1210 | formats = get_supported_file_types(param.restrictions.formats, supported_file_formats) | |
1211 | try: | |
1212 | data_format = formats.pop() | |
1213 | except KeyError: | |
1214 | # there is not much we can do, other than catching the exception | |
1215 | pass | |
1216 | # if there are more than one output file formats try to take the format from the input parameter | |
1217 | if formats: | |
1218 | corresponding_input = get_input_with_same_restrictions(param, model, supported_file_formats) | |
1219 | if corresponding_input is not None: | |
1220 | data_format = "input" | |
1221 | data_node.attrib["metadata_source"] = get_galaxy_parameter_name(corresponding_input) | |
1222 | else: | |
1223 | raise InvalidModelException("Unrecognized restriction type [%(type)s] " | |
1224 | "for output [%(name)s]" % {"type": type(param.restrictions), | |
1225 | "name": param.name}) | |
1226 | data_node.attrib["format"] = data_format | |
1227 | ||
1228 | #TODO: find a smarter label ? | |
1229 | #if param.description is not None: | |
1230 | # data_node.setAttribute("label", param.description) | |
1231 | return data_node | |
1232 | ||
1233 | ||
1234 | def get_supported_file_types(formats, supported_file_formats): | |
1235 | return set([supported_file_formats.get(format_name, DataType(format_name, format_name)).galaxy_extension | |
1236 | for format_name in formats if format_name in supported_file_formats.keys()]) | |
1237 | ||
1238 | ||
1239 | def create_change_format_node(parent, data_formats, input_ref): | |
1240 | # <change_format> | |
1241 | # <when input="secondary_structure" value="true" format="text"/> | |
1242 | # </change_format> | |
1243 | change_format_node = add_child_node(parent, "change_format") | |
1244 | for data_format in data_formats: | |
1245 | add_child_node(change_format_node, "when", | |
1246 | OrderedDict([("input", input_ref), ("value", data_format), ("format", data_format)])) | |
1247 | ||
1248 | ||
1249 | # Shows basic information about the file, such as data ranges and file type. | |
1250 | def create_help(tool, model): | |
1251 | manual = '' | |
1252 | doc_url = None | |
1253 | if 'manual' in model.opt_attribs.keys(): | |
1254 | manual += '%s\n\n' % model.opt_attribs["manual"] | |
1255 | if 'docurl' in model.opt_attribs.keys(): | |
1256 | doc_url = model.opt_attribs["docurl"] | |
1257 | ||
1258 | help_text = "No help available" | |
1259 | if manual is not None: | |
1260 | help_text = manual | |
1261 | if doc_url is not None: | |
1262 | help_text = ("" if manual is None else manual) + "\nFor more information, visit %s" % doc_url | |
1263 | help_node = add_child_node(tool, "help") | |
1264 | # TODO: do we need CDATA Section here? | |
1265 | help_node.text = help_text | |
1266 | ||
1267 | ||
1268 | # since a model might contain several ParameterGroup elements, | |
1269 | # we want to simply 'flatten' the parameters to generate the Galaxy wrapper | |
1270 | def extract_parameters(model): | |
1271 | parameters = [] | |
1272 | if len(model.parameters.parameters) > 0: | |
1273 | # use this to put parameters that are to be processed | |
1274 | # we know that CTDModel has one parent ParameterGroup | |
1275 | pending = [model.parameters] | |
1276 | while len(pending) > 0: | |
1277 | # take one element from 'pending' | |
1278 | parameter = pending.pop() | |
1279 | if type(parameter) is not ParameterGroup: | |
1280 | parameters.append(parameter) | |
1281 | else: | |
1282 | # append the first-level children of this ParameterGroup | |
1283 | pending.extend(parameter.parameters.values()) | |
1284 | # returned the reversed list of parameters (as it is now, | |
1285 | # we have the last parameter in the CTD as first in the list) | |
1286 | return reversed(parameters) | |
1287 | ||
1288 | ||
1289 | # adds and returns a child node using the given name to the given parent node | |
1290 | def add_child_node(parent_node, child_node_name, attributes=OrderedDict([])): | |
1291 | child_node = SubElement(parent_node, child_node_name, attributes) | |
1292 | return child_node | |
1293 | ||
1294 | ||
1295 | if __name__ == "__main__": | |
1296 | sys.exit(main()) |
2 | 2 | |
3 | 3 | Given one or more CTD files, `CTD2Galaxy` generates the needed Galaxy wrappers to include them in a Galaxy instance. |
4 | 4 | |
5 | ## How to install | |
5 | ## Dependencies | |
6 | ||
7 | `CTD2Galaxy` has the following python dependencies: | |
8 | ||
9 | 1. `lxml`. | |
10 | 1. [CTDopts] | |
11 | ||
12 | You can install the [CTDopts] and `lxml` modules via `conda`, like so: | |
13 | ||
14 | ```sh | |
15 | $ conda install lxml | |
16 | $ conda install -c workflowconversion ctdopts | |
17 | ``` | |
18 | ||
19 | Note that the [CTDopts] module is available on the `workflowconversion` channel. | |
20 | ||
21 | Of course, you can just download [CTDopts] and make it available through your `PYTHONPATH` environment variable. To get more information about how to install python modules, visit: https://docs.python.org/2/install/. | |
22 | ||
23 | ||
24 | ## How to install CTD2Galaxy | |
6 | 25 | |
7 | 26 | 1. Download the source code from https://github.com/genericworkflownodes/CTD2Galaxy. |
8 | 2. Download CTDopts from https://github.com/genericworkflownodes/CTDopts. | |
9 | 3. You can install the `CTDopts` and `CTD2Galaxy` modules, or just make them available through your `PYTHONPATH` environment variable. To get more information about how to install python modules, visit: https://docs.python.org/2/install/. | |
10 | 27 | |
11 | 28 | ## How to use: most common tasks |
12 | 29 | |
13 | 30 | The generator takes several parameters and a varying number of inputs and outputs. The following sub-sections show how to perform the most common operations. |
14 | 31 | |
15 | 32 | Running the generator with the `-h/--help` parameter will print extended information about each of the parameters. |
33 | ||
34 | ### Macros | |
35 | ||
36 | Galaxy supports the use of macros via a `macros.xml` file (`CTD2Galaxy` provides a sample macros file in `supported_formats/macros.xml`). Instead of repeating sections, macros can be used and expanded. If you want fine control over the macros, you can use the `-m` / `--macros` parameter to provide your own macros file. | |
37 | ||
38 | Please note that the used macros file must be copied to your Galaxy installation on the same location in which you place the generated *ToolConfig* files. | |
16 | 39 | |
17 | 40 | ### One input, one output |
18 | 41 | |
71 | 94 | |
72 | 95 | Any of the following invocations will convert `/data/input_one.ctd` and `/data/input_two.ctd`: |
73 | 96 | |
74 | $ python generator.py -i /data/input_one.ctd -i /data/input_two.ctd -o /data/generated | |
97 | $ python generator.py -i /data/input_one.ctd -i /data/input_two.ctd -o /data/generated | |
75 | 98 | $ python generator.py -i /data/input_one.ctd /data/input_two.ctd -o /data/generated |
76 | 99 | $ python generator.py --input /data/input_one.ctd /data/input_two.ctd -o /data/generated |
77 | $ python generator.py --input /data/input_one.ctd --input /data/input_two.ctd -o /data/generated | |
100 | $ python generator.py --input /data/input_one.ctd --input /data/input_two.ctd -o /data/generated | |
78 | 101 | |
79 | 102 | The following invocation will convert `/data/input.ctd` into `/data/output.xml`: |
80 | 103 | |
81 | $ python generator.py -i /data/input.ctd -o /data/output.xml | |
104 | $ python generator.py -i /data/input.ctd -o /data/output.xml -m sample_files/macros.xml | |
82 | 105 | |
83 | 106 | Of course, you can also use wildcards, which will be automatically expanded by any modern operating system. This is extremely useful if you want to convert several files at a time. Imagine that the folder `/data/ctds` contains three files, `input_one.ctd`, `input_two.ctd` and `input_three.ctd`. The following two invocations will produce the same output in the `/data/galaxy`: |
84 | 107 | |
232 | 255 | |
233 | 256 | * Purpose: Include external macros files. |
234 | 257 | * Short/long version: `-m` / `--macros` |
235 | * Required: yes. | |
258 | * Required: no. | |
259 | * Default: `macros.xml` | |
236 | 260 | * Taken values: List of paths of macros files to include. |
237 | 261 | |
238 | 262 | *ToolConfig* supports elaborate sections such as `<stdio>`, `<requirements>`, etc., that are identical across tools of the same suite. Macros files assist in the task of including external xml sections into *ToolConfig* files. For more information about the syntax of macros files, see: https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#Reusing_Repeated_Configuration_Elements |
239 | 263 | |
240 | There are some macros that are required, namely `stdio`, `requirements` and `advanced_options`. A template macro file is included under `support_files/macros.xml`. Although this is a required file, it can be edited to suit your needs and you could add extra macros or leave it as it is and include additional files. | |
264 | There are some macros that are required, namely `stdio`, `requirements` and `advanced_options`. A template macro file is included in [macros.xml]. It can be edited to suit your needs and you could add extra macros or leave it as it is and include additional files. | |
241 | 265 | |
242 | 266 | Every macro found in the included files and in `support_files/macros.xml` will be expanded. Users are responsible for copying the given macros files in their corresponding galaxy folders. |
243 | 267 | |
320 | 344 | * MapAlignerPoseClustering |
321 | 345 | * MapAlignerSpectrum |
322 | 346 | * MapAlignerRTTransformer |
347 | ||
348 | [CTDopts]: https://github.com/genericworkflownodes/CTDopts. | |
349 | [macros.xml]: https://github.com/WorkflowConversion/CTD2Galaxy/blob/master/macros.xml⏎ |
0 | #!/usr/bin/env python | |
1 | # encoding: utf-8 | |
2 | ||
3 | """ | |
4 | @author: delagarza | |
5 | """ | |
6 | ||
7 | ||
8 | import sys | |
9 | import os | |
10 | import traceback | |
11 | import ntpath | |
12 | import string | |
13 | ||
14 | from argparse import ArgumentParser | |
15 | from argparse import RawDescriptionHelpFormatter | |
16 | from CTDopts.CTDopts import CTDModel, _InFile, _OutFile, ParameterGroup, _Choices, _NumericRange, \ | |
17 | _FileFormat, ModelError | |
18 | from collections import OrderedDict | |
19 | from string import strip | |
20 | from lxml import etree | |
21 | from lxml.etree import SubElement, Element, ElementTree, ParseError, parse | |
22 | ||
23 | __all__ = [] | |
24 | __version__ = 1.0 | |
25 | __date__ = '2014-09-17' | |
26 | __updated__ = '2016-05-09' | |
27 | ||
28 | MESSAGE_INDENTATION_INCREMENT = 2 | |
29 | ||
30 | TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'data', | |
31 | _OutFile: 'data', _Choices: 'select'} | |
32 | ||
33 | STDIO_MACRO_NAME = "stdio" | |
34 | REQUIREMENTS_MACRO_NAME = "requirements" | |
35 | ADVANCED_OPTIONS_MACRO_NAME = "advanced_options" | |
36 | ||
37 | REQUIRED_MACROS = [STDIO_MACRO_NAME, REQUIREMENTS_MACRO_NAME, ADVANCED_OPTIONS_MACRO_NAME] | |
38 | ||
39 | ||
40 | class CLIError(Exception): | |
41 | # Generic exception to raise and log different fatal errors. | |
42 | def __init__(self, msg): | |
43 | super(CLIError).__init__(type(self)) | |
44 | self.msg = "E: %s" % msg | |
45 | ||
46 | def __str__(self): | |
47 | return self.msg | |
48 | ||
49 | def __unicode__(self): | |
50 | return self.msg | |
51 | ||
52 | ||
53 | class InvalidModelException(ModelError): | |
54 | def __init__(self, message): | |
55 | super(InvalidModelException, self).__init__() | |
56 | self.message = message | |
57 | ||
58 | def __str__(self): | |
59 | return self.message | |
60 | ||
61 | def __repr__(self): | |
62 | return self.message | |
63 | ||
64 | ||
65 | class ApplicationException(Exception): | |
66 | def __init__(self, msg): | |
67 | super(ApplicationException).__init__(type(self)) | |
68 | self.msg = msg | |
69 | ||
70 | def __str__(self): | |
71 | return self.msg | |
72 | ||
73 | def __unicode__(self): | |
74 | return self.msg | |
75 | ||
76 | ||
77 | class ExitCode: | |
78 | def __init__(self, code_range="", level="", description=None): | |
79 | self.range = code_range | |
80 | self.level = level | |
81 | self.description = description | |
82 | ||
83 | ||
84 | class DataType: | |
85 | def __init__(self, extension, galaxy_extension=None, galaxy_type=None, mimetype=None): | |
86 | self.extension = extension | |
87 | self.galaxy_extension = galaxy_extension | |
88 | self.galaxy_type = galaxy_type | |
89 | self.mimetype = mimetype | |
90 | ||
91 | ||
92 | class ParameterHardcoder: | |
93 | def __init__(self): | |
94 | # map whose keys are the composite names of tools and parameters in the following pattern: | |
95 | # [ToolName][separator][ParameterName] -> HardcodedValue | |
96 | # if the parameter applies to all tools, then the following pattern is used: | |
97 | # [ParameterName] -> HardcodedValue | |
98 | ||
99 | # examples (assuming separator is '#'): | |
100 | # threads -> 24 | |
101 | # XtandemAdapter#adapter -> xtandem.exe | |
102 | # adapter -> adapter.exe | |
103 | self.separator = "!" | |
104 | self.parameter_map = {} | |
105 | ||
106 | # the most specific value will be returned in case of overlap | |
107 | def get_hardcoded_value(self, parameter_name, tool_name): | |
108 | # look for the value that would apply for all tools | |
109 | generic_value = self.parameter_map.get(parameter_name, None) | |
110 | specific_value = self.parameter_map.get(self.build_key(parameter_name, tool_name), None) | |
111 | if specific_value is not None: | |
112 | return specific_value | |
113 | ||
114 | return generic_value | |
115 | ||
116 | def register_parameter(self, parameter_name, parameter_value, tool_name=None): | |
117 | self.parameter_map[self.build_key(parameter_name, tool_name)] = parameter_value | |
118 | ||
119 | def build_key(self, parameter_name, tool_name): | |
120 | if tool_name is None: | |
121 | return parameter_name | |
122 | return "%s%s%s" % (parameter_name, self.separator, tool_name) | |
123 | ||
124 | ||
125 | def main(argv=None): # IGNORE:C0111 | |
126 | # Command line options. | |
127 | if argv is None: | |
128 | argv = sys.argv | |
129 | else: | |
130 | sys.argv.extend(argv) | |
131 | ||
132 | program_version = "v%s" % __version__ | |
133 | program_build_date = str(__updated__) | |
134 | program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) | |
135 | program_short_description = "CTD2Galaxy - A project from the GenericWorkflowNodes family " \ | |
136 | "(https://github.com/orgs/genericworkflownodes)" | |
137 | program_usage = ''' | |
138 | USAGE: | |
139 | ||
140 | I - Parsing a single CTD file and generate a Galaxy wrapper: | |
141 | ||
142 | $ python generator.py -i input.ctd -o output.xml | |
143 | ||
144 | ||
145 | II - Parsing all found CTD files (files with .ctd and .xml extension) in a given folder and | |
146 | output converted Galaxy wrappers in a given folder: | |
147 | ||
148 | $ python generator.py -i /home/user/*.ctd -o /home/user/galaxywrappers | |
149 | ||
150 | ||
151 | III - Providing file formats, mimetypes | |
152 | ||
153 | Galaxy supports the concept of file format in order to connect compatible ports, that is, input ports of a certain | |
154 | data format will be able to receive data from a port from the same format. This converter allows you to provide | |
155 | a personalized file in which you can relate the CTD data formats with supported Galaxy data formats. The layout of | |
156 | this file consists of lines, each of either one or four columns separated by any amount of whitespace. The content | |
157 | of each column is as follows: | |
158 | ||
159 | * 1st column: file extension | |
160 | * 2nd column: data type, as listed in Galaxy | |
161 | * 3rd column: full-named Galaxy data type, as it will appear on datatypes_conf.xml | |
162 | * 4th column: mimetype (optional) | |
163 | ||
164 | The following is an example of a valid "file formats" file: | |
165 | ||
166 | ########################################## FILE FORMATS example ########################################## | |
167 | # Every line starting with a # will be handled as a comment and will not be parsed. | |
168 | # The first column is the file format as given in the CTD and second column is the Galaxy data format. | |
169 | # The second, third, fourth and fifth column can be left empty if the data type has already been registered | |
170 | # in Galaxy, otherwise, all but the mimetype must be provided. | |
171 | ||
172 | # CTD type # Galaxy type # Long Galaxy data type # Mimetype | |
173 | csv tabular galaxy.datatypes.data:Text | |
174 | fasta | |
175 | ini txt galaxy.datatypes.data:Text | |
176 | txt | |
177 | idxml txt galaxy.datatypes.xml:GenericXml application/xml | |
178 | options txt galaxy.datatypes.data:Text | |
179 | grid grid galaxy.datatypes.data:Grid | |
180 | ||
181 | ########################################################################################################## | |
182 | ||
183 | Note that each line consists precisely of either one, three or four columns. In the case of data types already | |
184 | registered in Galaxy (such as fasta and txt in the above example), only the first column is needed. In the case of | |
185 | data types that haven't been yet registered in Galaxy, the first three columns are needed (mimetype is optional). | |
186 | ||
187 | For information about Galaxy data types and subclasses, see the following page: | |
188 | https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes | |
189 | ||
190 | ||
191 | IV - Hardcoding parameters | |
192 | ||
193 | It is possible to hardcode parameters. This makes sense if you want to set a tool in Galaxy in 'quiet' mode or if | |
194 | your tools support multi-threading and accept the number of threads via a parameter, without giving the end user the | |
195 | chance to change the values for these parameters. | |
196 | ||
197 | In order to generate hardcoded parameters, you need to provide a simple file. Each line of this file contains two | |
198 | or three columns separated by whitespace. Any line starting with a '#' will be ignored. The first column contains | |
199 | the name of the parameter, the second column contains the value that will always be set for this parameter. The | |
200 | first two columns are mandatory. | |
201 | ||
202 | If the parameter is to be hardcoded only for a set of tools, then a third column can be added. This column includes | |
203 | a comma-separated list of tool names for which the parameter will be hardcoded. If a third column is not included, | |
204 | then all processed tools containing the given parameter will get a hardcoded value for it. | |
205 | ||
206 | The following is an example of a valid file: | |
207 | ||
208 | ##################################### HARDCODED PARAMETERS example ##################################### | |
209 | # Every line starting with a # will be handled as a comment and will not be parsed. | |
210 | # The first column is the name of the parameter and the second column is the value that will be used. | |
211 | ||
212 | # Parameter name # Value # Tool(s) | |
213 | threads \${GALAXY_SLOTS:-24} | |
214 | mode quiet | |
215 | xtandem_executable xtandem XTandemAdapter | |
216 | verbosity high Foo, Bar | |
217 | ||
218 | ######################################################################################################### | |
219 | ||
220 | Using the above file will produce a <command> similar to: | |
221 | ||
222 | [tool_name] ... -threads \${GALAXY_SLOTS:-24} -mode quiet ... | |
223 | ||
224 | For all tools. For XTandemAdapter, the <command> will be similar to: | |
225 | ||
226 | XtandemAdapter ... -threads \${GALAXY_SLOTS:-24} -mode quiet -xtandem_executable xtandem ... | |
227 | ||
228 | And for tools Foo and Bar, the <command> will be similar to: | |
229 | ||
230 | Foo ... ... -threads \${GALAXY_SLOTS:-24} -mode quiet -verbosity high ... | |
231 | ||
232 | ||
233 | V - Control which tools will be converted | |
234 | ||
235 | Sometimes only a subset of CTDs needs to be converted. It is possible to either explicitly specify which tools will | |
236 | be converted or which tools will not be converted. | |
237 | ||
238 | The value of the -s/--skip-tools parameter is a file in which each line will be interpreted as the name of a tool | |
239 | that will not be converted. Conversely, the value of the -r/--required-tools is a file in which each line will be | |
240 | interpreted as a tool that is required. Only one of these parameters can be specified at a given time. | |
241 | ||
242 | The format of both files is exactly the same. As stated before, each line will be interpreted as the name of a tool; | |
243 | any line starting with a '#' will be ignored. | |
244 | ||
245 | ''' | |
246 | program_license = '''%(short_description)s | |
247 | Copyright 2015, Luis de la Garza | |
248 | ||
249 | Licensed under the Apache License, Version 2.0 (the "License"); | |
250 | you may not use this file except in compliance with the License. | |
251 | You may obtain a copy of the License at | |
252 | ||
253 | http://www.apache.org/licenses/LICENSE-2.0 | |
254 | ||
255 | Unless required by applicable law or agreed to in writing, software | |
256 | distributed under the License is distributed on an "AS IS" BASIS, | |
257 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
258 | See the License for the specific language governing permissions and | |
259 | limitations under the License. | |
260 | ||
261 | %(usage)s | |
262 | ''' % {'short_description': program_short_description, 'usage': program_usage} | |
263 | ||
264 | try: | |
265 | # Setup argument parser | |
266 | parser = ArgumentParser(prog="CTD2Galaxy", description=program_license, | |
267 | formatter_class=RawDescriptionHelpFormatter, add_help=True) | |
268 | parser.add_argument("-i", "--input", dest="input_files", default=[], required=True, nargs="+", action="append", | |
269 | help="List of CTD files to convert.") | |
270 | parser.add_argument("-o", "--output-destination", dest="output_destination", required=True, | |
271 | help="If multiple input files are given, then a folder in which all generated " | |
272 | "XMLs will be generated is expected;" | |
273 | "if a single input file is given, then a destination file is expected.") | |
274 | parser.add_argument("-f", "--formats-file", dest="formats_file", | |
275 | help="File containing the supported file formats. Run with '-h' or '--help' to see a " | |
276 | "brief example on the layout of this file.", default=None, required=False) | |
277 | parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line", | |
278 | help="Adds content to the command line", default="", required=False) | |
279 | parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination", | |
280 | help="Specify the location of a datatypes_conf.xml to modify and add the registered " | |
281 | "data types. If the provided destination does not exist, a new file will be created.", | |
282 | default=None, required=False) | |
283 | parser.add_argument("-x", "--default-executable-path", dest="default_executable_path", | |
284 | help="Use this executable path when <executablePath> is not present in the CTD", | |
285 | default=None, required=False) | |
286 | parser.add_argument("-b", "--blacklist-parameters", dest="blacklisted_parameters", default=[], nargs="+", action="append", | |
287 | help="List of parameters that will be ignored and won't appear on the galaxy stub", | |
288 | required=False) | |
289 | parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False, | |
290 | help="Default category to use for tools lacking a category when generating tool_conf.xml") | |
291 | parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False, | |
292 | help="Specify the location of an existing tool_conf.xml that will be modified to include " | |
293 | "the converted tools. If the provided destination does not exist, a new file will" | |
294 | "be created.") | |
295 | parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False, | |
296 | help="The path that will be prepended to the file names when generating tool_conf.xml") | |
297 | parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False, | |
298 | help="Each line of the file will be interpreted as a tool name that needs translation. " | |
299 | "Run with '-h' or '--help' to see a brief example on the format of this file.") | |
300 | parser.add_argument("-s", "--skip-tools", dest="skip_tools_file", default=None, required=False, | |
301 | help="File containing a list of tools for which a Galaxy stub will not be generated. " | |
302 | "Run with '-h' or '--help' to see a brief example on the format of this file.") | |
303 | parser.add_argument("-m", "--macros", dest="macros_files", default=[['macros.xml']], nargs="+", | |
304 | action="append", required=None, help="Import the additional given file(s) as macros. " | |
305 | "The macros stdio, requirements and advanced_options are required. Please see " | |
306 | "macros.xml for an example of a valid macros file. Al defined macros will be imported.") | |
307 | parser.add_argument("-p", "--hardcoded-parameters", dest="hardcoded_parameters", default=None, required=False, | |
308 | help="File containing hardcoded values for the given parameters. Run with '-h' or '--help' " | |
309 | "to see a brief example on the format of this file.") | |
310 | # TODO: add verbosity, maybe? | |
311 | parser.add_argument("-V", "--version", action='version', version=program_version_message) | |
312 | ||
313 | # Process arguments | |
314 | args = parser.parse_args() | |
315 | ||
316 | # validate and prepare the passed arguments | |
317 | validate_and_prepare_args(args) | |
318 | ||
319 | # extract the names of the macros and check that we have found the ones we need | |
320 | macros_file_names = args.macros_files | |
321 | macros_to_expand = parse_macros_files(macros_file_names) | |
322 | ||
323 | # parse the given supported file-formats file | |
324 | supported_file_formats = parse_file_formats(args.formats_file) | |
325 | ||
326 | # parse the hardcoded parameters file¬ | |
327 | parameter_hardcoder = parse_hardcoded_parameters(args.hardcoded_parameters) | |
328 | ||
329 | # parse the skip/required tools files | |
330 | skip_tools = parse_tools_list_file(args.skip_tools_file) | |
331 | required_tools = parse_tools_list_file(args.required_tools_file) | |
332 | ||
333 | #if verbose > 0: | |
334 | # print("Verbose mode on") | |
335 | parsed_models = convert(args.input_files, | |
336 | args.output_destination, | |
337 | supported_file_formats=supported_file_formats, | |
338 | default_executable_path=args.default_executable_path, | |
339 | add_to_command_line=args.add_to_command_line, | |
340 | blacklisted_parameters=args.blacklisted_parameters, | |
341 | required_tools=required_tools, | |
342 | skip_tools=skip_tools, | |
343 | macros_file_names=macros_file_names, | |
344 | macros_to_expand=macros_to_expand, | |
345 | parameter_hardcoder=parameter_hardcoder) | |
346 | ||
347 | #TODO: add some sort of warning if a macro that doesn't exist is to be expanded | |
348 | ||
349 | # it is not needed to copy the macros files, since the user has provided them | |
350 | ||
351 | # generation of galaxy stubs is ready... now, let's see if we need to generate a tool_conf.xml | |
352 | if args.tool_conf_destination is not None: | |
353 | generate_tool_conf(parsed_models, args.tool_conf_destination, | |
354 | args.galaxy_tool_path, args.default_category) | |
355 | ||
356 | # now datatypes_conf.xml | |
357 | if args.data_types_destination is not None: | |
358 | generate_data_type_conf(supported_file_formats, args.data_types_destination) | |
359 | ||
360 | return 0 | |
361 | ||
362 | except KeyboardInterrupt: | |
363 | # handle keyboard interrupt | |
364 | return 0 | |
365 | except ApplicationException, e: | |
366 | error("CTD2Galaxy could not complete the requested operation.", 0) | |
367 | error("Reason: " + e.msg, 0) | |
368 | return 1 | |
369 | except ModelError, e: | |
370 | error("There seems to be a problem with one of your input CTDs.", 0) | |
371 | error("Reason: " + e.msg, 0) | |
372 | return 1 | |
373 | except Exception, e: | |
374 | traceback.print_exc() | |
375 | return 2 | |
376 | ||
377 | ||
378 | def parse_tools_list_file(tools_list_file): | |
379 | tools_list = None | |
380 | if tools_list_file is not None: | |
381 | tools_list = [] | |
382 | with open(tools_list_file) as f: | |
383 | for line in f: | |
384 | if line is None or not line.strip() or line.strip().startswith("#"): | |
385 | continue | |
386 | else: | |
387 | tools_list.append(line.strip()) | |
388 | ||
389 | return tools_list | |
390 | ||
391 | ||
392 | def parse_macros_files(macros_file_names): | |
393 | macros_to_expand = set() | |
394 | ||
395 | for macros_file_name in macros_file_names: | |
396 | try: | |
397 | macros_file = open(macros_file_name) | |
398 | root = parse(macros_file).getroot() | |
399 | for xml_element in root.findall("xml"): | |
400 | name = xml_element.attrib["name"] | |
401 | if name in macros_to_expand: | |
402 | warning("Macro %s has already been found. Duplicate found in file %s." % | |
403 | (name, macros_file_name), 0) | |
404 | else: | |
405 | macros_to_expand.add(name) | |
406 | except ParseError, e: | |
407 | raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " + | |
408 | str(e)) | |
409 | except IOError, e: | |
410 | raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " + | |
411 | str(e)) | |
412 | ||
413 | # we depend on "stdio", "requirements" and "advanced_options" to exist on all the given macros files | |
414 | missing_needed_macros = [] | |
415 | for required_macro in REQUIRED_MACROS: | |
416 | if required_macro not in macros_to_expand: | |
417 | missing_needed_macros.append(required_macro) | |
418 | ||
419 | if missing_needed_macros: | |
420 | raise ApplicationException( | |
421 | "The following required macro(s) were not found in any of the given macros files: %s, " | |
422 | "see sample_files/macros.xml for an example of a valid macros file." | |
423 | % ", ".join(missing_needed_macros)) | |
424 | ||
425 | # we do not need to "expand" the advanced_options macro | |
426 | macros_to_expand.remove(ADVANCED_OPTIONS_MACRO_NAME) | |
427 | return macros_to_expand | |
428 | ||
429 | def parse_hardcoded_parameters(hardcoded_parameters_file): | |
430 | parameter_hardcoder = ParameterHardcoder() | |
431 | if hardcoded_parameters_file is not None: | |
432 | line_number = 0 | |
433 | with open(hardcoded_parameters_file) as f: | |
434 | for line in f: | |
435 | line_number += 1 | |
436 | if line is None or not line.strip() or line.strip().startswith("#"): | |
437 | pass | |
438 | else: | |
439 | # the third column must not be obtained as a whole, and not split | |
440 | parsed_hardcoded_parameter = line.strip().split(None, 2) | |
441 | # valid lines contain two or three columns | |
442 | if len(parsed_hardcoded_parameter) != 2 and len(parsed_hardcoded_parameter) != 3: | |
443 | warning("Invalid line at line number %d of the given hardcoded parameters file. Line will be" | |
444 | "ignored:\n%s" % (line_number, line), 0) | |
445 | continue | |
446 | ||
447 | parameter_name = parsed_hardcoded_parameter[0] | |
448 | hardcoded_value = parsed_hardcoded_parameter[1] | |
449 | tool_names = None | |
450 | if len(parsed_hardcoded_parameter) == 3: | |
451 | tool_names = parsed_hardcoded_parameter[2].split(',') | |
452 | if tool_names: | |
453 | for tool_name in tool_names: | |
454 | parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name.strip()) | |
455 | else: | |
456 | parameter_hardcoder.register_parameter(parameter_name, hardcoded_value) | |
457 | ||
458 | return parameter_hardcoder | |
459 | ||
460 | ||
461 | def parse_file_formats(formats_file): | |
462 | supported_formats = {} | |
463 | if formats_file is not None: | |
464 | line_number = 0 | |
465 | with open(formats_file) as f: | |
466 | for line in f: | |
467 | line_number += 1 | |
468 | if line is None or not line.strip() or line.strip().startswith("#"): | |
469 | # ignore (it'd be weird to have something like: | |
470 | # if line is not None and not (not line.strip()) ... | |
471 | pass | |
472 | else: | |
473 | # not an empty line, no comment | |
474 | # strip the line and split by whitespace | |
475 | parsed_formats = line.strip().split() | |
476 | # valid lines contain either one or four columns | |
477 | if not (len(parsed_formats) == 1 or len(parsed_formats) == 3 or len(parsed_formats) == 4): | |
478 | warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" % | |
479 | (line_number, line), 0) | |
480 | # ignore the line | |
481 | continue | |
482 | elif len(parsed_formats) == 1: | |
483 | supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[0]) | |
484 | else: | |
485 | mimetype = None | |
486 | # check if mimetype was provided | |
487 | if len(parsed_formats) == 4: | |
488 | mimetype = parsed_formats[3] | |
489 | supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[1], | |
490 | parsed_formats[2], mimetype) | |
491 | return supported_formats | |
492 | ||
493 | ||
494 | def validate_and_prepare_args(args): | |
495 | # check that only one of skip_tools_file and required_tools_file has been provided | |
496 | if args.skip_tools_file is not None and args.required_tools_file is not None: | |
497 | raise ApplicationException( | |
498 | "You have provided both a file with tools to ignore and a file with required tools.\n" | |
499 | "Only one of -s/--skip-tools, -r/--required-tools can be provided.") | |
500 | ||
501 | # first, we convert all list of lists in args to flat lists | |
502 | lists_to_flatten = ["input_files", "blacklisted_parameters", "macros_files"] | |
503 | for list_to_flatten in lists_to_flatten: | |
504 | setattr(args, list_to_flatten, [item for sub_list in getattr(args, list_to_flatten) for item in sub_list]) | |
505 | ||
506 | # if input is a single file, we expect output to be a file (and not a dir that already exists) | |
507 | if len(args.input_files) == 1: | |
508 | if os.path.isdir(args.output_destination): | |
509 | raise ApplicationException("If a single input file is provided, output (%s) is expected to be a file " | |
510 | "and not a folder.\n" % args.output_destination) | |
511 | ||
512 | # if input is a list of files, we expect output to be a folder | |
513 | if len(args.input_files) > 1: | |
514 | if not os.path.isdir(args.output_destination): | |
515 | raise ApplicationException("If several input files are provided, output (%s) is expected to be an " | |
516 | "existing directory.\n" % args.output_destination) | |
517 | ||
518 | # check that the provided input files, if provided, contain a valid file path | |
519 | input_variables_to_check = ["skip_tools_file", "required_tools_file", "macros_files", | |
520 | "input_files", "formats_file", "hardcoded_parameters"] | |
521 | ||
522 | for variable_name in input_variables_to_check: | |
523 | paths_to_check = [] | |
524 | # check if we are handling a single file or a list of files | |
525 | member_value = getattr(args, variable_name) | |
526 | if member_value is not None: | |
527 | if isinstance(member_value, list): | |
528 | for file_name in member_value: | |
529 | paths_to_check.append(strip(str(file_name))) | |
530 | else: | |
531 | paths_to_check.append(strip(str(member_value))) | |
532 | ||
533 | for path_to_check in paths_to_check: | |
534 | if not os.path.isfile(path_to_check) or not os.path.exists(path_to_check): | |
535 | raise ApplicationException( | |
536 | "The provided input file (%s) does not exist or is not a valid file path." | |
537 | % path_to_check) | |
538 | ||
539 | # check that the provided output files, if provided, contain a valid file path (i.e., not a folder) | |
540 | output_variables_to_check = ["data_types_destination", "tool_conf_destination"] | |
541 | ||
542 | for variable_name in output_variables_to_check: | |
543 | file_name = getattr(args, variable_name) | |
544 | if file_name is not None and os.path.isdir(file_name): | |
545 | raise ApplicationException("The provided output file name (%s) points to a directory." % file_name) | |
546 | ||
547 | ||
548 | def convert(input_files, output_destination, **kwargs): | |
549 | # first, generate a model | |
550 | is_converting_multiple_ctds = len(input_files) > 1 | |
551 | parsed_models = [] | |
552 | for input_file in input_files: | |
553 | try: | |
554 | model = CTDModel(from_file=input_file) | |
555 | except Exception, e: | |
556 | error(str(e), 1) | |
557 | continue | |
558 | ||
559 | if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]: | |
560 | info("Skipping tool %s" % model.name, 0) | |
561 | continue | |
562 | elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]: | |
563 | info("Tool %s is not required, skipping it" % model.name, 0) | |
564 | continue | |
565 | else: | |
566 | info("Converting from %s " % input_file, 0) | |
567 | tool = create_tool(model) | |
568 | write_header(tool, model) | |
569 | create_description(tool, model) | |
570 | expand_macros(tool, model, **kwargs) | |
571 | create_command(tool, model, **kwargs) | |
572 | create_inputs(tool, model, **kwargs) | |
573 | create_outputs(tool, model, **kwargs) | |
574 | create_help(tool, model) | |
575 | ||
576 | # finally, serialize the tool | |
577 | output_file = output_destination | |
578 | # if multiple inputs are being converted, | |
579 | # then we need to generate a different output_file for each input | |
580 | if is_converting_multiple_ctds: | |
581 | output_file = os.path.join(output_file, get_filename_without_suffix(input_file) + ".xml") | |
582 | # wrap our tool element into a tree to be able to serialize it | |
583 | tree = ElementTree(tool) | |
584 | tree.write(open(output_file, 'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) | |
585 | # let's use model to hold the name of the output file | |
586 | parsed_models.append([model, get_filename(output_file)]) | |
587 | ||
588 | return parsed_models | |
589 | ||
590 | ||
591 | def write_header(tool, model): | |
592 | tool.addprevious(etree.Comment( | |
593 | "This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). " | |
594 | "This file was automatically generated using CTD2Galaxy.")) | |
595 | tool.addprevious(etree.Comment('Proposed Tool Section: [%s]' % model.opt_attribs.get("category", ""))) | |
596 | ||
597 | ||
598 | def generate_tool_conf(parsed_models, tool_conf_destination, galaxy_tool_path, default_category): | |
599 | # for each category, we keep a list of models corresponding to it | |
600 | categories_to_tools = dict() | |
601 | for model in parsed_models: | |
602 | category = strip(model[0].opt_attribs.get("category", "")) | |
603 | if not category.strip(): | |
604 | category = default_category | |
605 | if category not in categories_to_tools: | |
606 | categories_to_tools[category] = [] | |
607 | categories_to_tools[category].append(model[1]) | |
608 | ||
609 | # at this point, we should have a map for all categories->tools | |
610 | toolbox_node = Element("toolbox") | |
611 | ||
612 | if galaxy_tool_path is not None and not galaxy_tool_path.strip().endswith("/"): | |
613 | galaxy_tool_path = galaxy_tool_path.strip() + "/" | |
614 | if galaxy_tool_path is None: | |
615 | galaxy_tool_path = "" | |
616 | ||
617 | for category, file_names in categories_to_tools.iteritems(): | |
618 | section_node = add_child_node(toolbox_node, "section") | |
619 | section_node.attrib["id"] = "section-id-" + "".join(category.split()) | |
620 | section_node.attrib["name"] = category | |
621 | ||
622 | for filename in file_names: | |
623 | tool_node = add_child_node(section_node, "tool") | |
624 | tool_node.attrib["file"] = galaxy_tool_path + filename | |
625 | ||
626 | toolconf_tree = ElementTree(toolbox_node) | |
627 | toolconf_tree.write(open(tool_conf_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) | |
628 | info("Generated Galaxy tool_conf.xml in %s" % tool_conf_destination, 0) | |
629 | ||
630 | ||
631 | def generate_data_type_conf(supported_file_formats, data_types_destination): | |
632 | data_types_node = Element("datatypes") | |
633 | registration_node = add_child_node(data_types_node, "registration") | |
634 | registration_node.attrib["converters_path"] = "lib/galaxy/datatypes/converters" | |
635 | registration_node.attrib["display_path"] = "display_applications" | |
636 | ||
637 | for format_name in supported_file_formats: | |
638 | data_type = supported_file_formats[format_name] | |
639 | # add only if it's a data type that does not exist in Galaxy | |
640 | if data_type.galaxy_type is not None: | |
641 | data_type_node = add_child_node(registration_node, "datatype") | |
642 | # we know galaxy_extension is not None | |
643 | data_type_node.attrib["extension"] = data_type.galaxy_extension | |
644 | data_type_node.attrib["type"] = data_type.galaxy_type | |
645 | if data_type.mimetype is not None: | |
646 | data_type_node.attrib["mimetype"] = data_type.mimetype | |
647 | ||
648 | data_types_tree = ElementTree(data_types_node) | |
649 | data_types_tree.write(open(data_types_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) | |
650 | info("Generated Galaxy datatypes_conf.xml in %s" % data_types_destination, 0) | |
651 | ||
652 | ||
653 | # taken from | |
654 | # http://stackoverflow.com/questions/8384737/python-extract-file-name-from-path-no-matter-what-the-os-path-format | |
655 | def get_filename(path): | |
656 | head, tail = ntpath.split(path) | |
657 | return tail or ntpath.basename(head) | |
658 | ||
659 | ||
660 | def get_filename_without_suffix(path): | |
661 | root, ext = os.path.splitext(os.path.basename(path)) | |
662 | return root | |
663 | ||
664 | ||
665 | def create_tool(model): | |
666 | return Element("tool", OrderedDict([("id", model.name), ("name", model.name), ("version", model.version)])) | |
667 | ||
668 | ||
669 | def create_description(tool, model): | |
670 | if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None: | |
671 | description = SubElement(tool,"description") | |
672 | description.text = model.opt_attribs["description"] | |
673 | ||
674 | ||
675 | def get_param_name(param): | |
676 | # we generate parameters with colons for subgroups, but not for the topmost parents (OpenMS legacy) | |
677 | if type(param.parent) == ParameterGroup and param.parent.parent != None: | |
678 | return get_param_name(param.parent) + ":" + resolve_param_mapping(param) | |
679 | else: | |
680 | return resolve_param_mapping(param) | |
681 | ||
682 | ||
683 | # some parameters are mapped to command line options, this method helps resolve those mappings, if any | |
684 | # TODO: implement mapping of parameters!!! | |
685 | def resolve_param_mapping(param): | |
686 | return param.name | |
687 | ||
688 | ||
689 | def create_command(tool, model, **kwargs): | |
690 | final_command = get_tool_executable_path(model, kwargs["default_executable_path"]) + '\n' | |
691 | final_command += kwargs["add_to_command_line"] + '\n' | |
692 | advanced_command_start = "#if $adv_opts.adv_opts_selector=='advanced':\n" | |
693 | advanced_command_end = '#end if' | |
694 | advanced_command = '' | |
695 | parameter_hardcoder = kwargs["parameter_hardcoder"] | |
696 | ||
697 | found_output_parameter = False | |
698 | for param in extract_parameters(model): | |
699 | if param.type is _OutFile: | |
700 | found_output_parameter = True | |
701 | command = '' | |
702 | param_name = get_param_name(param) | |
703 | ||
704 | if param.name in kwargs["blacklisted_parameters"]: | |
705 | continue | |
706 | ||
707 | hardcoded_value = parameter_hardcoder.get_hardcoded_value(param_name, model.name) | |
708 | if hardcoded_value: | |
709 | command += '-%s %s\n' % (param_name, hardcoded_value) | |
710 | else: | |
711 | # parameter is neither blacklisted nor hardcoded... | |
712 | galaxy_parameter_name = get_galaxy_parameter_name(param) | |
713 | repeat_galaxy_parameter_name = get_repeat_galaxy_parameter_name(param) | |
714 | ||
715 | # logic for ITEMLISTs | |
716 | if param.is_list: | |
717 | if param.type is _InFile: | |
718 | command += "-" + str(param_name) + "\n" | |
719 | command += " #for token in $" + galaxy_parameter_name + ":\n" | |
720 | command += " $token\n" | |
721 | command += " #end for\n" | |
722 | else: | |
723 | command += "\n#if $" + repeat_galaxy_parameter_name + ":\n" | |
724 | command += "-" + str(param_name) + "\n" | |
725 | command += " #for token in $" + repeat_galaxy_parameter_name + ":\n" | |
726 | command += " #if \" \" in str(token):\n" | |
727 | command += " \"$token." + galaxy_parameter_name + "\"\n" | |
728 | command += " #else\n" | |
729 | command += " $token." + galaxy_parameter_name + "\n" | |
730 | command += " #end if\n" | |
731 | command += " #end for\n" | |
732 | command += "#end if\n" | |
733 | # logic for other ITEMs | |
734 | else: | |
735 | if param.advanced and param.type is not _OutFile: | |
736 | actual_parameter = "$adv_opts.%s" % galaxy_parameter_name | |
737 | else: | |
738 | actual_parameter = "$%s" % galaxy_parameter_name | |
739 | ## if whitespace_validation has been set, we need to generate, for each parameter: | |
740 | ## #if str( $t ).split() != '': | |
741 | ## -t "$t" | |
742 | ## #end if | |
743 | ## TODO only useful for text fields, integers or floats | |
744 | ## not useful for choices, input fields ... | |
745 | ||
746 | if not is_boolean_parameter(param) and type(param.restrictions) is _Choices : | |
747 | command += "#if " + actual_parameter + ":\n" | |
748 | command += ' -%s\n' % param_name | |
749 | command += " #if \" \" in str(" + actual_parameter + "):\n" | |
750 | command += " \"" + actual_parameter + "\"\n" | |
751 | command += " #else\n" | |
752 | command += " " + actual_parameter + "\n" | |
753 | command += " #end if\n" | |
754 | command += "#end if\n" | |
755 | elif is_boolean_parameter(param): | |
756 | command += "#if " + actual_parameter + ":\n" | |
757 | command += ' -%s\n' % param_name | |
758 | command += "#end if\n" | |
759 | elif TYPE_TO_GALAXY_TYPE[param.type] is 'text': | |
760 | command += "#if " + actual_parameter + ":\n" | |
761 | command += " -%s " % param_name | |
762 | command += " \"" + actual_parameter + "\"\n" | |
763 | command += "#end if\n" | |
764 | else: | |
765 | command += "#if " + actual_parameter + ":\n" | |
766 | command += ' -%s ' % param_name | |
767 | command += actual_parameter + "\n" | |
768 | command += "#end if\n" | |
769 | ||
770 | if param.advanced and param.type is not _OutFile: | |
771 | advanced_command += " %s" % command | |
772 | else: | |
773 | final_command += command | |
774 | ||
775 | if advanced_command: | |
776 | final_command += "%s%s%s\n" % (advanced_command_start, advanced_command, advanced_command_end) | |
777 | ||
778 | if not found_output_parameter: | |
779 | final_command += "> $param_stdout\n" | |
780 | ||
781 | command_node = add_child_node(tool, "command") | |
782 | command_node.text = final_command | |
783 | ||
784 | ||
785 | # creates the xml elements needed to import the needed macros files | |
786 | # and to "expand" the macros | |
787 | def expand_macros(tool, model, **kwargs): | |
788 | macros_node = add_child_node(tool, "macros") | |
789 | token_node = add_child_node(macros_node, "token") | |
790 | token_node.attrib["name"] = "@EXECUTABLE@" | |
791 | token_node.text = get_tool_executable_path(model, kwargs["default_executable_path"]) | |
792 | ||
793 | # add <import> nodes | |
794 | for macro_file_name in kwargs["macros_file_names"]: | |
795 | macro_file = open(macro_file_name) | |
796 | import_node = add_child_node(macros_node, "import") | |
797 | # do not add the path of the file, rather, just its basename | |
798 | import_node.text = os.path.basename(macro_file.name) | |
799 | ||
800 | # add <expand> nodes | |
801 | for expand_macro in kwargs["macros_to_expand"]: | |
802 | expand_node = add_child_node(tool, "expand") | |
803 | expand_node.attrib["macro"] = expand_macro | |
804 | ||
805 | ||
806 | def get_tool_executable_path(model, default_executable_path): | |
807 | # rules to build the galaxy executable path: | |
808 | # if executablePath is null, then use default_executable_path and store it in executablePath | |
809 | # if executablePath is null and executableName is null, then the name of the tool will be used | |
810 | # if executablePath is null and executableName is not null, then executableName will be used | |
811 | # if executablePath is not null and executableName is null, | |
812 | # then executablePath and the name of the tool will be used | |
813 | # if executablePath is not null and executableName is not null, then both will be used | |
814 | ||
815 | # first, check if the model has executablePath / executableName defined | |
816 | executable_path = model.opt_attribs.get("executablePath", None) | |
817 | executable_name = model.opt_attribs.get("executableName", None) | |
818 | ||
819 | # check if we need to use the default_executable_path | |
820 | if executable_path is None: | |
821 | executable_path = default_executable_path | |
822 | ||
823 | # fix the executablePath to make sure that there is a '/' in the end | |
824 | if executable_path is not None: | |
825 | executable_path = executable_path.strip() | |
826 | if not executable_path.endswith('/'): | |
827 | executable_path += '/' | |
828 | ||
829 | # assume that we have all information present | |
830 | command = str(executable_path) + str(executable_name) | |
831 | if executable_path is None: | |
832 | if executable_name is None: | |
833 | command = model.name | |
834 | else: | |
835 | command = executable_name | |
836 | else: | |
837 | if executable_name is None: | |
838 | command = executable_path + model.name | |
839 | return command | |
840 | ||
841 | ||
842 | def get_galaxy_parameter_name(param): | |
843 | return "param_%s" % get_param_name(param).replace(':', '_').replace('-', '_') | |
844 | ||
845 | ||
846 | def get_input_with_same_restrictions(out_param, model, supported_file_formats): | |
847 | for param in extract_parameters(model): | |
848 | if param.type is _InFile: | |
849 | if param.restrictions is not None: | |
850 | in_param_formats = get_supported_file_types(param.restrictions.formats, supported_file_formats) | |
851 | out_param_formats = get_supported_file_types(out_param.restrictions.formats, supported_file_formats) | |
852 | if in_param_formats == out_param_formats: | |
853 | return param | |
854 | ||
855 | ||
856 | def create_inputs(tool, model, **kwargs): | |
857 | inputs_node = SubElement(tool, "inputs") | |
858 | ||
859 | # some suites (such as OpenMS) need some advanced options when handling inputs | |
860 | expand_advanced_node = add_child_node(tool, "expand", OrderedDict([("macro", ADVANCED_OPTIONS_MACRO_NAME)])) | |
861 | parameter_hardcoder = kwargs["parameter_hardcoder"] | |
862 | ||
863 | # treat all non output-file parameters as inputs | |
864 | for param in extract_parameters(model): | |
865 | # no need to show hardcoded parameters | |
866 | hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name) | |
867 | if param.name in kwargs["blacklisted_parameters"] or hardcoded_value: | |
868 | # let's not use an extra level of indentation and use NOP | |
869 | continue | |
870 | if param.type is not _OutFile: | |
871 | if param.advanced: | |
872 | if expand_advanced_node is not None: | |
873 | parent_node = expand_advanced_node | |
874 | else: | |
875 | # something went wrong... we are handling an advanced parameter and the | |
876 | # advanced input macro was not set... inform the user about it | |
877 | info("The parameter %s has been set as advanced, but advanced_input_macro has " | |
878 | "not been set." % param.name, 1) | |
879 | # there is not much we can do, other than use the inputs_node as a parent node! | |
880 | parent_node = inputs_node | |
881 | else: | |
882 | parent_node = inputs_node | |
883 | ||
884 | # for lists we need a repeat tag | |
885 | if param.is_list and param.type is not _InFile: | |
886 | rep_node = add_child_node(parent_node, "repeat") | |
887 | create_repeat_attribute_list(rep_node, param) | |
888 | parent_node = rep_node | |
889 | ||
890 | param_node = add_child_node(parent_node, "param") | |
891 | create_param_attribute_list(param_node, param, kwargs["supported_file_formats"]) | |
892 | ||
893 | # advanced parameter selection should be at the end | |
894 | # and only available if an advanced parameter exists | |
895 | if expand_advanced_node is not None and len(expand_advanced_node) > 0: | |
896 | inputs_node.append(expand_advanced_node) | |
897 | ||
898 | ||
899 | def get_repeat_galaxy_parameter_name(param): | |
900 | return "rep_" + get_galaxy_parameter_name(param) | |
901 | ||
902 | ||
903 | def create_repeat_attribute_list(rep_node, param): | |
904 | rep_node.attrib["name"] = get_repeat_galaxy_parameter_name(param) | |
905 | if param.required: | |
906 | rep_node.attrib["min"] = "1" | |
907 | else: | |
908 | rep_node.attrib["min"] = "0" | |
909 | # for the ITEMLISTs which have LISTITEM children we only | |
910 | # need one parameter as it is given as a string | |
911 | if param.default is not None: | |
912 | rep_node.attrib["max"] = "1" | |
913 | rep_node.attrib["title"] = get_galaxy_parameter_name(param) | |
914 | ||
915 | ||
916 | def create_param_attribute_list(param_node, param, supported_file_formats): | |
917 | param_node.attrib["name"] = get_galaxy_parameter_name(param) | |
918 | ||
919 | param_type = TYPE_TO_GALAXY_TYPE[param.type] | |
920 | if param_type is None: | |
921 | raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s" | |
922 | % {"type": param.type, "name": param.name}) | |
923 | ||
924 | if param.is_list: | |
925 | param_type = "text" | |
926 | ||
927 | if is_selection_parameter(param): | |
928 | param_type = "select" | |
929 | ||
930 | if is_boolean_parameter(param): | |
931 | param_type = "boolean" | |
932 | ||
933 | if param.type is _InFile: | |
934 | # assume it's just text unless restrictions are provided | |
935 | param_format = "text" | |
936 | if param.restrictions is not None: | |
937 | # join all supported_formats for the file... this MUST be a _FileFormat | |
938 | if type(param.restrictions) is _FileFormat: | |
939 | param_format = ','.join(get_supported_file_types(param.restrictions.formats, supported_file_formats)) | |
940 | else: | |
941 | raise InvalidModelException("Expected 'file type' restrictions for input file [%(name)s], " | |
942 | "but instead got [%(type)s]" | |
943 | % {"name": param.name, "type": type(param.restrictions)}) | |
944 | param_node.attrib["type"] = "data" | |
945 | param_node.attrib["format"] = param_format | |
946 | # in the case of multiple input set multiple flag | |
947 | if param.is_list: | |
948 | param_node.attrib["multiple"] = "true" | |
949 | ||
950 | else: | |
951 | param_node.attrib["type"] = param_type | |
952 | ||
953 | # check for parameters with restricted values (which will correspond to a "select" in galaxy) | |
954 | if param.restrictions is not None: | |
955 | # it could be either _Choices or _NumericRange, with special case for boolean types | |
956 | if param_type == "boolean": | |
957 | create_boolean_parameter(param_node, param) | |
958 | elif type(param.restrictions) is _Choices: | |
959 | # create as many <option> elements as restriction values | |
960 | for choice in param.restrictions.choices: | |
961 | option_node = add_child_node(param_node, "option", OrderedDict([("value", str(choice))])) | |
962 | option_node.text = str(choice) | |
963 | ||
964 | elif type(param.restrictions) is _NumericRange: | |
965 | if param.type is not int and param.type is not float: | |
966 | raise InvalidModelException("Expected either 'int' or 'float' in the numeric range restriction for " | |
967 | "parameter [%(name)s], but instead got [%(type)s]" % | |
968 | {"name": param.name, "type": type(param.restrictions)}) | |
969 | # extract the min and max values and add them as attributes | |
970 | # validate the provided min and max values | |
971 | if param.restrictions.n_min is not None: | |
972 | param_node.attrib["min"] = str(param.restrictions.n_min) | |
973 | if param.restrictions.n_max is not None: | |
974 | param_node.attrib["max"] = str(param.restrictions.n_max) | |
975 | elif type(param.restrictions) is _FileFormat: | |
976 | param_node.attrib["format"] = ",".join( | |
977 | get_supported_file_types(param.restrictions.formats, supported_file_formats)) | |
978 | else: | |
979 | raise InvalidModelException("Unrecognized restriction type [%(type)s] for parameter [%(name)s]" | |
980 | % {"type": type(param.restrictions), "name": param.name}) | |
981 | ||
982 | param_node.attrib["optional"] = str(not param.required) | |
983 | ||
984 | if param_type == "text": | |
985 | # add size attribute... this is the length of a textbox field in Galaxy (it could also be 15x2, for instance) | |
986 | param_node.attrib["size"] = "30" | |
987 | # add sanitizer nodes, this is needed for special character like "[" | |
988 | # which are used for example by FeatureFinderMultiplex | |
989 | sanitizer_node = SubElement(param_node, "sanitizer") | |
990 | ||
991 | valid_node = SubElement(sanitizer_node, "valid", OrderedDict([("initial", "string.printable")])) | |
992 | add_child_node(valid_node, "remove", OrderedDict([("value", '\'')])) | |
993 | add_child_node(valid_node, "remove", OrderedDict([("value", '"')])) | |
994 | ||
995 | # check for default value | |
996 | if param.default is not None: | |
997 | if type(param.default) is list: | |
998 | # we ASSUME that a list of parameters looks like: | |
999 | # $ tool -ignore He Ar Xe | |
1000 | # meaning, that, for example, Helium, Argon and Xenon will be ignored | |
1001 | param_node.attrib["value"] = ' '.join(map(str, param.default)) | |
1002 | ||
1003 | elif param_type != "boolean": | |
1004 | # boolean parameters handle default values by using the "checked" attribute | |
1005 | # there isn't much we can do... just stringify the value | |
1006 | param_node.attrib["value"] = str(param.default) | |
1007 | else: | |
1008 | if param.type is int or param.type is float: | |
1009 | # galaxy requires "value" to be included for int/float | |
1010 | # since no default was included, we need to figure out one in a clever way... but let the user know | |
1011 | # that we are "thinking" for him/her | |
1012 | warning("Generating default value for parameter [%s]. " | |
1013 | "Galaxy requires the attribute 'value' to be set for integer/floats. " | |
1014 | "Edit the CTD file and provide a suitable default value." % param.name, 1) | |
1015 | # check if there's a min/max and try to use them | |
1016 | default_value = None | |
1017 | if param.restrictions is not None: | |
1018 | if type(param.restrictions) is _NumericRange: | |
1019 | default_value = param.restrictions.n_min | |
1020 | if default_value is None: | |
1021 | default_value = param.restrictions.n_max | |
1022 | if default_value is None: | |
1023 | # no min/max provided... just use 0 and see what happens | |
1024 | default_value = 0 | |
1025 | else: | |
1026 | # should never be here, since we have validated this anyway... | |
1027 | # this code is here just for documentation purposes | |
1028 | # however, better safe than sorry! | |
1029 | # (it could be that the code changes and then we have an ugly scenario) | |
1030 | raise InvalidModelException("Expected either a numeric range for parameter [%(name)s], " | |
1031 | "but instead got [%(type)s]" | |
1032 | % {"name": param.name, "type": type(param.restrictions)}) | |
1033 | else: | |
1034 | # no restrictions and no default value provided... | |
1035 | # make up something | |
1036 | default_value = 0 | |
1037 | param_node.attrib["value"] = str(default_value) | |
1038 | ||
1039 | label = "%s parameter" % param.name | |
1040 | help_text = "" | |
1041 | ||
1042 | if param.description is not None: | |
1043 | label, help_text = generate_label_and_help(param.description) | |
1044 | ||
1045 | param_node.attrib["label"] = label | |
1046 | param_node.attrib["help"] = "(-%s)" % param.name + " " + help_text | |
1047 | ||
1048 | ||
1049 | def generate_label_and_help(desc): | |
1050 | label = "" | |
1051 | help_text = "" | |
1052 | # This tag is found in some descriptions | |
1053 | desc = str(desc).replace("#br#", " <br>") | |
1054 | # Get rid of dots in the end | |
1055 | if desc.endswith("."): | |
1056 | desc = desc.rstrip(".") | |
1057 | # Check if first word is a normal word and make it uppercase | |
1058 | if str(desc).find(" ") > -1: | |
1059 | first_word, rest = str(desc).split(" ", 1) | |
1060 | if str(first_word).islower(): | |
1061 | # check if label has a quotient of the form a/b | |
1062 | if first_word.find("/") != 1 : | |
1063 | first_word.capitalize() | |
1064 | desc = first_word + " " + rest | |
1065 | label = desc | |
1066 | ||
1067 | # Try to split the label if it is too long | |
1068 | if len(desc) > 50: | |
1069 | # find an example and put everything before in the label and the e.g. in the help | |
1070 | if desc.find("e.g.") > 1 : | |
1071 | label, help_text = desc.split("e.g.",1) | |
1072 | help_text = "e.g." + help_text | |
1073 | else: | |
1074 | # find the end of the first sentence | |
1075 | # look for ". " because some labels contain .file or something similar | |
1076 | delimiter = "" | |
1077 | if desc.find(". ") > 1 and desc.find("? ") > 1: | |
1078 | if desc.find(". ") < desc.find("? "): | |
1079 | delimiter = ". " | |
1080 | else: | |
1081 | delimiter = "? " | |
1082 | elif desc.find(". ") > 1: | |
1083 | delimiter = ". " | |
1084 | elif desc.find("? ") > 1: | |
1085 | delimiter = "? " | |
1086 | if delimiter != "": | |
1087 | label, help_text = desc.split(delimiter, 1) | |
1088 | ||
1089 | # add the question mark back | |
1090 | if delimiter == "? ": | |
1091 | label += "? " | |
1092 | ||
1093 | # remove all linebreaks | |
1094 | label = label.rstrip().rstrip('<br>').rstrip() | |
1095 | return label, help_text | |
1096 | ||
1097 | ||
1098 | def get_indented_text(text, indentation_level): | |
1099 | return ("%(indentation)s%(text)s" % | |
1100 | {"indentation": " " * (MESSAGE_INDENTATION_INCREMENT * indentation_level), | |
1101 | "text": text}) | |
1102 | ||
1103 | ||
1104 | def warning(warning_text, indentation_level): | |
1105 | sys.stdout.write(get_indented_text("WARNING: %s\n" % warning_text, indentation_level)) | |
1106 | ||
1107 | ||
1108 | def error(error_text, indentation_level): | |
1109 | sys.stderr.write(get_indented_text("ERROR: %s\n" % error_text, indentation_level)) | |
1110 | ||
1111 | ||
1112 | def info(info_text, indentation_level): | |
1113 | sys.stdout.write(get_indented_text("INFO: %s\n" % info_text, indentation_level)) | |
1114 | ||
1115 | ||
1116 | # determines if the given choices are boolean (basically, if the possible values are yes/no, true/false) | |
1117 | def is_boolean_parameter(param): | |
1118 | is_choices = False | |
1119 | if type(param.restrictions) is _Choices: | |
1120 | # for a true boolean experience, we need 2 values | |
1121 | # and also that those two values are either yes/no or true/false | |
1122 | if len(param.restrictions.choices) == 2: | |
1123 | choices = get_lowercase_list(param.restrictions.choices) | |
1124 | if ("yes" in choices and "no" in choices) or ("true" in choices and "false" in choices): | |
1125 | is_choices = True | |
1126 | return is_choices | |
1127 | ||
1128 | ||
1129 | # determines if there are choices for the parameter | |
1130 | def is_selection_parameter(param): | |
1131 | return type(param.restrictions) is _Choices | |
1132 | ||
1133 | ||
1134 | def get_lowercase_list(some_list): | |
1135 | lowercase_list = map(str, some_list) | |
1136 | lowercase_list = map(string.lower, lowercase_list) | |
1137 | lowercase_list = map(strip, lowercase_list) | |
1138 | return lowercase_list | |
1139 | ||
1140 | ||
1141 | # creates a galaxy boolean parameter type | |
1142 | # this method assumes that param has restrictions, and that only two restictions are present | |
1143 | # (either yes/no or true/false) | |
1144 | def create_boolean_parameter(param_node, param): | |
1145 | # first, determine the 'truevalue' and the 'falsevalue' | |
1146 | """TODO: true and false values can be way more than 'true' and 'false' | |
1147 | but for that we need CTD support | |
1148 | """ | |
1149 | # by default, 'true' and 'false' are handled as flags, like the verbose flag (i.e., -v) | |
1150 | true_value = "-%s" % get_param_name(param) | |
1151 | false_value = "" | |
1152 | choices = get_lowercase_list(param.restrictions.choices) | |
1153 | if "yes" in choices: | |
1154 | true_value = "yes" | |
1155 | false_value = "no" | |
1156 | param_node.attrib["truevalue"] = true_value | |
1157 | param_node.attrib["falsevalue"] = false_value | |
1158 | ||
1159 | # set the checked attribute | |
1160 | if param.default is not None: | |
1161 | checked_value = "false" | |
1162 | default = strip(string.lower(param.default)) | |
1163 | if default == "yes" or default == "true": | |
1164 | checked_value = "true" | |
1165 | #attribute_list["checked"] = checked_value | |
1166 | param_node.attrib["checked"] = checked_value | |
1167 | ||
1168 | ||
1169 | def create_outputs(parent, model, **kwargs): | |
1170 | outputs_node = add_child_node(parent, "outputs") | |
1171 | parameter_hardcoder = kwargs["parameter_hardcoder"] | |
1172 | ||
1173 | for param in extract_parameters(model): | |
1174 | ||
1175 | # no need to show hardcoded parameters | |
1176 | hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name) | |
1177 | if param.name in kwargs["blacklisted_parameters"] or hardcoded_value: | |
1178 | # let's not use an extra level of indentation and use NOP | |
1179 | continue | |
1180 | if param.type is _OutFile: | |
1181 | create_output_node(outputs_node, param, model, kwargs["supported_file_formats"]) | |
1182 | ||
1183 | # If there are no outputs defined in the ctd the node will have no children | |
1184 | # and the stdout will be used as output | |
1185 | if len(outputs_node) == 0: | |
1186 | add_child_node(outputs_node, "data", | |
1187 | OrderedDict([("name", "param_stdout"), ("format", "text"), ("label", "Output from stdout")])) | |
1188 | ||
1189 | ||
1190 | def create_output_node(parent, param, model, supported_file_formats): | |
1191 | data_node = add_child_node(parent, "data") | |
1192 | data_node.attrib["name"] = get_galaxy_parameter_name(param) | |
1193 | ||
1194 | data_format = "data" | |
1195 | if param.restrictions is not None: | |
1196 | if type(param.restrictions) is _FileFormat: | |
1197 | # set the first data output node to the first file format | |
1198 | ||
1199 | # check if there are formats that have not been registered yet... | |
1200 | output = "" | |
1201 | for format_name in param.restrictions.formats: | |
1202 | if not format_name in supported_file_formats.keys(): | |
1203 | output += " " + str(format_name) | |
1204 | ||
1205 | # warn only if there's about to complain | |
1206 | if output: | |
1207 | warning("Parameter " + param.name + " has the following unsupported format(s):" + output, 1) | |
1208 | ||
1209 | formats = get_supported_file_types(param.restrictions.formats, supported_file_formats) | |
1210 | try: | |
1211 | data_format = formats.pop() | |
1212 | except KeyError: | |
1213 | # there is not much we can do, other than catching the exception | |
1214 | pass | |
1215 | # if there are more than one output file formats try to take the format from the input parameter | |
1216 | if formats: | |
1217 | corresponding_input = get_input_with_same_restrictions(param, model, supported_file_formats) | |
1218 | if corresponding_input is not None: | |
1219 | data_format = "input" | |
1220 | data_node.attrib["metadata_source"] = get_galaxy_parameter_name(corresponding_input) | |
1221 | else: | |
1222 | raise InvalidModelException("Unrecognized restriction type [%(type)s] " | |
1223 | "for output [%(name)s]" % {"type": type(param.restrictions), | |
1224 | "name": param.name}) | |
1225 | data_node.attrib["format"] = data_format | |
1226 | ||
1227 | #TODO: find a smarter label ? | |
1228 | #if param.description is not None: | |
1229 | # data_node.setAttribute("label", param.description) | |
1230 | return data_node | |
1231 | ||
1232 | ||
1233 | def get_supported_file_types(formats, supported_file_formats): | |
1234 | return set([supported_file_formats.get(format_name, DataType(format_name, format_name)).galaxy_extension | |
1235 | for format_name in formats if format_name in supported_file_formats.keys()]) | |
1236 | ||
1237 | ||
1238 | def create_change_format_node(parent, data_formats, input_ref): | |
1239 | # <change_format> | |
1240 | # <when input="secondary_structure" value="true" format="text"/> | |
1241 | # </change_format> | |
1242 | change_format_node = add_child_node(parent, "change_format") | |
1243 | for data_format in data_formats: | |
1244 | add_child_node(change_format_node, "when", | |
1245 | OrderedDict([("input", input_ref), ("value", data_format), ("format", data_format)])) | |
1246 | ||
1247 | ||
1248 | # Shows basic information about the file, such as data ranges and file type. | |
1249 | def create_help(tool, model): | |
1250 | manual = '' | |
1251 | doc_url = None | |
1252 | if 'manual' in model.opt_attribs.keys(): | |
1253 | manual += '%s\n\n' % model.opt_attribs["manual"] | |
1254 | if 'docurl' in model.opt_attribs.keys(): | |
1255 | doc_url = model.opt_attribs["docurl"] | |
1256 | ||
1257 | help_text = "No help available" | |
1258 | if manual is not None: | |
1259 | help_text = manual | |
1260 | if doc_url is not None: | |
1261 | help_text = ("" if manual is None else manual) + "\nFor more information, visit %s" % doc_url | |
1262 | help_node = add_child_node(tool, "help") | |
1263 | # TODO: do we need CDATA Section here? | |
1264 | help_node.text = help_text | |
1265 | ||
1266 | ||
1267 | # since a model might contain several ParameterGroup elements, | |
1268 | # we want to simply 'flatten' the parameters to generate the Galaxy wrapper | |
1269 | def extract_parameters(model): | |
1270 | parameters = [] | |
1271 | if len(model.parameters.parameters) > 0: | |
1272 | # use this to put parameters that are to be processed | |
1273 | # we know that CTDModel has one parent ParameterGroup | |
1274 | pending = [model.parameters] | |
1275 | while len(pending) > 0: | |
1276 | # take one element from 'pending' | |
1277 | parameter = pending.pop() | |
1278 | if type(parameter) is not ParameterGroup: | |
1279 | parameters.append(parameter) | |
1280 | else: | |
1281 | # append the first-level children of this ParameterGroup | |
1282 | pending.extend(parameter.parameters.values()) | |
1283 | # returned the reversed list of parameters (as it is now, | |
1284 | # we have the last parameter in the CTD as first in the list) | |
1285 | return reversed(parameters) | |
1286 | ||
1287 | ||
1288 | # adds and returns a child node using the given name to the given parent node | |
1289 | def add_child_node(parent_node, child_node_name, attributes=OrderedDict([])): | |
1290 | child_node = SubElement(parent_node, child_node_name, attributes) | |
1291 | return child_node | |
1292 | ||
1293 | ||
1294 | if __name__ == "__main__": | |
1295 | sys.exit(main()) |
0 | <?xml version='1.0' encoding='UTF-8'?> | |
1 | <!-- CTD2Galaxy depends on this file and on the stdio, advanced_options macros! | |
2 | You can edit this file to add your own macros, if you so desire, or you can | |
3 | add additional macro files using the m/macros parameter --> | |
4 | <macros> | |
5 | <xml name="requirements"> | |
6 | <requirements> | |
7 | <requirement type="binary">@EXECUTABLE@</requirement> | |
8 | </requirements> | |
9 | </xml> | |
10 | <xml name="stdio"> | |
11 | <stdio> | |
12 | <exit_code range="1:"/> | |
13 | <exit_code range=":-1"/> | |
14 | <regex match="Error:"/> | |
15 | <regex match="Exception:"/> | |
16 | </stdio> | |
17 | </xml> | |
18 | <xml name="advanced_options"> | |
19 | <conditional name="adv_opts"> | |
20 | <param name="adv_opts_selector" type="select" label="Advanced Options"> | |
21 | <option value="basic" selected="True">Hide Advanced Options</option> | |
22 | <option value="advanced">Show Advanced Options</option> | |
23 | </param> | |
24 | <when value="basic"/> | |
25 | <when value="advanced"> | |
26 | <yield/> | |
27 | </when> | |
28 | </conditional> | |
29 | </xml> | |
30 | </macros> |
0 | from distutils.core import setup | |
1 | ||
2 | setup( | |
3 | name='CTD2Galaxy', | |
4 | version='1.0', | |
5 | packages=['CTD2Galaxy'], | |
6 | url='https://github.com/WorkflowConversion/CTD2Galaxy', | |
7 | license='', | |
8 | author='Luis de la Garza', | |
9 | author_email='', | |
10 | py_modules=['CTD2Galaxy/generator'], | |
11 | description='A program to convert CTDs to Galaxy tool wrappers.' | |
12 | ) |