Merge pull request #39 from suprematic/master
Add support for UTF-8 encoded binaries
Juan Jose Comellas authored 6 years ago
GitHub committed 6 years ago
56 | 56 | specifications. The type specification for the tuple is: |
57 | 57 | |
58 | 58 | ```erlang |
59 | -type arg_type() :: 'atom' | 'binary' | 'boolean' | 'float' | 'integer' | 'string'. | |
59 | -type arg_type() :: 'atom' | 'binary' | 'utf8_binary' | 'boolean' | 'float' | 'integer' | 'string'. | |
60 | 60 | |
61 | 61 | -type arg_value() :: atom() | binary() | boolean() | float() | integer() | string(). |
62 | 62 | |
243 | 243 | Argument Types |
244 | 244 | -------------- |
245 | 245 | |
246 | The arguments allowed for options are: *atom*; *binary*; *boolean*; *float*; *integer*; *string*. | |
246 | The arguments allowed for options are: *atom*; *binary*; *utf8_binary*; *boolean*; | |
247 | *float*; *integer*; *string*. | |
247 | 248 | The `getopt` module checks every argument to see if it can be converted to its |
248 | 249 | correct type. |
249 | 250 | |
254 | 255 | Numeric arguments can only be negative when passed as part of an assignment expression. |
255 | 256 | |
256 | 257 | e.g. `--increment=-100` is a valid expression; whereas `--increment -100` is invalid |
258 | ||
259 | Arguments of `utf8_binary` type allow proper binary encoding of arguments containing | |
260 | code points greater than 255. The resulting value is a normalized UTF-8 binary. | |
261 | ||
262 | As of Erlang/20, `standard_error` device has `unicode` option set to `false`. | |
263 | It prevents correct printing of usage for arguments containing unicode | |
264 | binaries/strings as default values. To fix this, one needs to enable unicode: | |
265 | ||
266 | ```erlang | |
267 | io:setopts(standard_error, [{unicode, true}]). | |
268 | ``` | |
257 | 269 | |
258 | 270 | |
259 | 271 | Implicit Arguments |
12 | 12 | |
13 | 13 | -export([parse/2, check/2, parse_and_check/2, format_error/2, |
14 | 14 | usage/2, usage/3, usage/4, usage/6, tokenize/1]). |
15 | -export([usage_cmd_line/2]). | |
15 | -export([usage_cmd_line/2, usage_options/1]). | |
16 | 16 | |
17 | 17 | -define(LINE_LENGTH, 75). |
18 | 18 | -define(MIN_USAGE_COMMAND_LINE_OPTION_LENGTH, 25). |
29 | 29 | (Char) =:= $\n orelse (Char) =:= $\r)). |
30 | 30 | |
31 | 31 | %% Atom indicating the data type that an argument can be converted to. |
32 | -type arg_type() :: 'atom' | 'binary' | 'boolean' | 'float' | 'integer' | 'string'. | |
32 | -type arg_type() :: 'atom' | 'binary' | 'utf8_binary' | 'boolean' | 'float' | 'integer' | 'string'. | |
33 | 33 | %% Data type that an argument can be converted to. |
34 | 34 | -type arg_value() :: atom() | binary() | boolean() | float() | integer() | string(). |
35 | 35 | %% Argument specification. |
434 | 434 | to_type(Type, Arg); |
435 | 435 | to_type(binary, Arg) -> |
436 | 436 | list_to_binary(Arg); |
437 | to_type(utf8_binary, Arg) -> | |
438 | unicode:characters_to_nfc_binary(Arg); | |
437 | 439 | to_type(atom, Arg) -> |
438 | 440 | list_to_atom(Arg); |
439 | 441 | to_type(integer, Arg) -> |
729 | 731 | |
730 | 732 | |
731 | 733 | -spec usage_help_text(option_spec()) -> string(). |
732 | usage_help_text({_Name, _Short, _Long, {_ArgType, ArgValue}, [_ | _] = Help}) -> | |
733 | Help ++ " [default: " ++ default_arg_value_to_string(ArgValue) ++ "]"; | |
734 | usage_help_text({_Name, _Short, _Long, {ArgType, ArgValue}, [_ | _] = Help}) -> | |
735 | Help ++ " [default: " ++ default_arg_value_to_string(ArgType, ArgValue) ++ "]"; | |
734 | 736 | usage_help_text({_Name, _Short, _Long, _ArgSpec, Help}) -> |
735 | 737 | Help. |
736 | 738 | |
803 | 805 | lists:reverse(Acc). |
804 | 806 | |
805 | 807 | |
806 | default_arg_value_to_string(Value) when is_atom(Value) -> | |
808 | default_arg_value_to_string(_, Value) when is_atom(Value) -> | |
807 | 809 | atom_to_list(Value); |
808 | default_arg_value_to_string(Value) when is_binary(Value) -> | |
810 | default_arg_value_to_string(binary, Value) when is_binary(Value) -> | |
809 | 811 | binary_to_list(Value); |
810 | default_arg_value_to_string(Value) when is_integer(Value) -> | |
812 | default_arg_value_to_string(utf8_binary, Value) when is_binary(Value) -> | |
813 | unicode:characters_to_list(Value); | |
814 | default_arg_value_to_string(_, Value) when is_integer(Value) -> | |
811 | 815 | integer_to_list(Value); |
812 | default_arg_value_to_string(Value) when is_float(Value) -> | |
816 | default_arg_value_to_string(_, Value) when is_float(Value) -> | |
813 | 817 | lists:flatten(io_lib:format("~w", [Value])); |
814 | default_arg_value_to_string(Value) -> | |
818 | default_arg_value_to_string(_, Value) -> | |
815 | 819 | Value. |
816 | 820 | |
817 | 821 |
322 | 322 | ?_assertEqual("option 'verbose' has invalid argument: 100", |
323 | 323 | format_error(OptSpecList, {error, {invalid_option_arg, {verbose, "100"}}}))} |
324 | 324 | ]. |
325 | ||
326 | utf8_binary_test_() -> | |
327 | OptSpecList = [{utf8, undefined, "utf8", utf8_binary, "UTF-8 arg"}], | |
328 | Unicode = [228, 220, 223, 1455], | |
329 | Utf8 = unicode:characters_to_binary(Unicode), | |
330 | io:setopts(standard_error, [{encoding, utf8}]), | |
331 | OptSpecsWithDefault = [{utf8, undefined, "utf8", {utf8_binary, Utf8}, "UTF-8 arg"}], | |
332 | [{"Empty utf8_binary argument", | |
333 | ?_assertEqual({ok, {[{utf8, <<>>}], []}}, parse(OptSpecList, ["--utf8", ""]))}, | |
334 | {"Non empty utf8_binary argument", | |
335 | ?_assertEqual({ok, {[{utf8, Utf8}], []}}, parse(OptSpecList, ["--utf8", Unicode]))}, | |
336 | {"Default utf8_binary argument", | |
337 | ?_assertEqual({ok, {[{utf8, Utf8}], []}}, parse(OptSpecsWithDefault, []))}, | |
338 | {"Default utf8_binary argument usage", | |
339 | ?_assert(is_list(string:find(getopt:usage_options(OptSpecsWithDefault), Unicode)))}]. |