diff --git a/README.md b/README.md index f4b477e..4e00596 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Ohcount does not support Windows. -Ohcount targets Ruby 1.9.3. The build script requires a bash shell. You +Ohcount targets Ruby 2.2.3. The build script requires a bash shell. You also need a C compiler to build the native extensions. Source Code @@ -63,8 +63,18 @@ Building Ohcount ---------------- +> Last updated: 2018-05-10 + You will need ragel 6.8 or higher, bash, gperf, libpcre3-dev, libmagic-dev, gcc (version 4.8.2 or greater) -and SWIG (2.0.11). Once you have them, go to the top directory of ohcount and run +and SWIG (2.0.11). To get these dependencies on Ubuntu/Debian you can run this command: + + sudo apt-get install libpcre3 libpcre3-dev libmagic-dev gperf gcc ragel swig + +There is a Ruby dependency of 'test-unit' gem for Ruby 2.2.3. You will need to run this command: + + gem install test-unit + +Once you have them, go to the top directory of ohcount and run ``` ./build diff --git a/ruby/x86_64-linux_ubuntu/ohcount.so b/ruby/x86_64-linux_ubuntu/ohcount.so index fffe1b6..6c650da 100755 Binary files a/ruby/x86_64-linux_ubuntu/ohcount.so and b/ruby/x86_64-linux_ubuntu/ohcount.so differ diff --git a/src/detector.c b/src/detector.c index d6b693d..1611c5b 100644 --- a/src/detector.c +++ b/src/detector.c @@ -438,6 +438,37 @@ } return NULL; // only blanks } + +const char *disambiguate_fs(SourceFile *sourcefile) { + /* .fs could be Forth or F# */ + char *contents = ohcount_sourcefile_get_contents(sourcefile); + if (contents == NULL) + return NULL; + char *p = contents; + char c = *p; + long forthcount=0; + long fsharpcount=0; + while (c != '\0') { + while (c == ' ' || c == '\t') + c = *++p; + if (strncmp(p,"\\ ",2)==0) forthcount++; + else if (strncmp(p,": ",2)==0) forthcount++; + else if (strncmp(p,"|",1)==0) fsharpcount++; + else if (strncmp(p,"let ",4)==0) fsharpcount++; + else if (strncmp(p,"type ",5)==0) fsharpcount++; + else if (strncmp(p,"//",2)==0) fsharpcount++; + while (c != '\0' && c != '\n' && c != '\r') + c = *++p; + while (c == '\n' || c == '\r') + c = *++p; + } + if (forthcount > fsharpcount) + return LANG_FORTH; + else if (forthcount < fsharpcount) + return LANG_FSHARP; + else + return NULL; +} const char *disambiguate_fortran(SourceFile *sourcefile) { char *p; diff --git a/src/hash/disambiguatefuncs.gperf b/src/hash/disambiguatefuncs.gperf index a805b7d..87b341e 100644 --- a/src/hash/disambiguatefuncs.gperf +++ b/src/hash/disambiguatefuncs.gperf @@ -10,6 +10,7 @@ const char *disambiguate_dat(SourceFile *sourcefile); const char *disambiguate_def(SourceFile *sourcefile); const char *disambiguate_fortran(SourceFile *sourcefile); +const char *disambiguate_fs(SourceFile *sourcefile); const char *disambiguate_h(SourceFile *sourcefile); const char *disambiguate_in(SourceFile *sourcefile); const char *disambiguate_inc(SourceFile *sourcefile); @@ -32,6 +33,7 @@ dat, disambiguate_dat def, disambiguate_def fortran, disambiguate_fortran +fs, disambiguate_fs h, disambiguate_h in, disambiguate_in inc, disambiguate_inc diff --git a/src/hash/extensions.gperf b/src/hash/extensions.gperf index 62923f9..bc014c9 100755 --- a/src/hash/extensions.gperf +++ b/src/hash/extensions.gperf @@ -43,6 +43,7 @@ coffee, LANG_COFFEESCRIPT com, LANG_DCL cpp, LANG_CPP +cr, LANG_CRYSTAL cs, DISAMBIGUATE("cs") csproj, LANG_XML css, LANG_CSS @@ -74,11 +75,14 @@ factor, LANG_FACTOR fr, LANG_FORTH frag, LANG_GLSL +frt, LANG_FORTH for, DISAMBIGUATE("fortran") +forth, LANG_FORTH fpp, DISAMBIGUATE("fortran") frm, LANG_VISUALBASIC frx, LANG_VISUALBASIC -fs, LANG_FSHARP +fs, DISAMBIGUATE("fs") +fth, LANG_FORTH ftn, DISAMBIGUATE("fortran") gemspec, LANG_RUBY gif, BINARY @@ -112,6 +116,8 @@ js, LANG_JAVASCRIPT jsp, LANG_JSP kdebuild-1, LANG_EBUILD +kt, LANG_KOTLIN +lc, LANG_LIVECODE latex, LANG_TEX lisp, LANG_LISP lsp, LANG_LISP @@ -128,6 +134,7 @@ ml4, LANG_OCAML mli, LANG_OCAML mm, LANG_OBJECTIVE_C +mo, LANG_MODELICA mod, DISAMBIGUATE("mod") mov, BINARY mp, LANG_METAPOST_WITH_TEX @@ -163,6 +170,7 @@ pp, DISAMBIGUATE("pp") ppt, BINARY pro, DISAMBIGUATE("pro") +ps, LANG_POSTSCRIPT py, LANG_PYTHON qml, LANG_QML qt, BINARY @@ -208,6 +216,7 @@ tif, BINARY tiff, BINARY tpl, LANG_HTML +uc, LANG_UNREALSCRIPT ts, LANG_TYPESCRIPT tsx, LANG_TYPESCRIPT vala, LANG_VALA diff --git a/src/hash/filenames.gperf b/src/hash/filenames.gperf index b149019..ede4fbc 100644 --- a/src/hash/filenames.gperf +++ b/src/hash/filenames.gperf @@ -18,3 +18,5 @@ rakefile, LANG_RUBY Gemfile, LANG_RUBY Vagrantfile, LANG_RUBY +PKGBUILD, LANG_SHELL +script.utf8, LANG_LIVECODE diff --git a/src/hash/languages.gperf b/src/hash/languages.gperf index 1cf037e..7c70f95 100755 --- a/src/hash/languages.gperf +++ b/src/hash/languages.gperf @@ -27,6 +27,7 @@ coffescript, LANG_COFFEESCRIPT, "CoffeeScript", 0 coq, LANG_COQ, "Coq", 0 cpp, LANG_CPP, "C++", 0 +crystal, LANG_CRYSTAL, "Crystal", 0 cs_aspx, LANG_CS_ASPX, "", 0 csharp, LANG_CSHARP, "C#", 0 css, LANG_CSS, "CSS", 1 @@ -60,8 +61,10 @@ java, LANG_JAVA, "Java", 0 javascript, LANG_JAVASCRIPT, "JavaScript", 0 jsp, LANG_JSP, "", 0 +kotlin, LANG_KOTLIN, "Kotlin", 0 limbo, LANG_LIMBO, "Limbo", 0 lisp, LANG_LISP, "Lisp", 0 +livecode, LANG_LIVECODE, "LiveCode", 0 logtalk, LANG_LOGTALK, "Logtalk", 0 lua, LANG_LUA, "Lua", 0 make, LANG_MAKE, "Make", 2 @@ -70,6 +73,7 @@ metafont, LANG_METAFONT, "MetaFont", 1 metapost, LANG_METAPOST, "MetaPost", 1 metapost_with_tex, LANG_METAPOST_WITH_TEX, "", 0 +modelica, LANG_MODELICA, "Modelica", 0 modula2, LANG_MODULA2, "Modula-2", 0 modula3, LANG_MODULA3, "Modula-3", 0 mxml, LANG_MXML, "MXML", 1 @@ -84,6 +88,7 @@ perl, LANG_PERL, "Perl", 0 php, LANG_PHP, "PHP", 0 pike, LANG_PIKE, "Pike", 0 +postscript, LANG_POSTSCRIPT, "PostScript", 1 prolog, LANG_PROLOG, "Prolog", 0 puppet, LANG_PUPPET, "Puppet", 0 python, LANG_PYTHON, "Python", 0 @@ -105,6 +110,7 @@ sql, LANG_SQL, "SQL", 0 tcl, LANG_TCL, "TCL", 0 tex, LANG_TEX, "TeX/LaTeX", 1 +unrealscript, LANG_UNREALSCRIPT, "UnrealScript", 0 typescript, LANG_TYPESCRIPT, "TypeScript", 0 vala, LANG_VALA, "Vala", 0 vb_aspx, LANG_VB_ASPX, "", 0 diff --git a/src/hash/parsers.gperf b/src/hash/parsers.gperf index 2446162..577002c 100644 --- a/src/hash/parsers.gperf +++ b/src/hash/parsers.gperf @@ -20,6 +20,7 @@ #include "../parsers/coffeescript.h" #include "../parsers/cmake.h" #include "../parsers/coq.h" +#include "../parsers/crystal.h" #include "../parsers/cs_aspx.h" #include "../parsers/css.h" #include "../parsers/d.h" @@ -47,8 +48,10 @@ #include "../parsers/java.h" #include "../parsers/javascript.h" #include "../parsers/jsp.h" +#include "../parsers/kotlin.h" #include "../parsers/lisp.h" #include "../parsers/limbo.h" +#include "../parsers/livecode.h" #include "../parsers/logtalk.h" #include "../parsers/lua.h" #include "../parsers/makefile.h" @@ -57,6 +60,7 @@ #include "../parsers/metafont.h" #include "../parsers/metapost.h" #include "../parsers/metapost_with_tex.h" +#include "../parsers/modelica.h" #include "../parsers/modula2.h" #include "../parsers/modula3.h" #include "../parsers/mxml.h" @@ -71,6 +75,7 @@ #include "../parsers/perl.h" #include "../parsers/phphtml.h" #include "../parsers/pike.h" +#include "../parsers/postscript.h" #include "../parsers/prolog.h" #include "../parsers/puppet.h" #include "../parsers/python.h" @@ -90,6 +95,7 @@ #include "../parsers/tcl.h" #include "../parsers/tex.h" #include "../parsers/tex_dtx.h" +#include "../parsers/unrealscript.h" #include "../parsers/vb_aspx.h" #include "../parsers/vhdl.h" #include "../parsers/vim.h" @@ -123,6 +129,7 @@ clojure, parse_clojure coffeescript, parse_coffeescript coq, parse_coq +crystal, parse_crystal cpp, parse_cpp cs_aspx, parse_cs_aspx csharp, parse_csharp @@ -156,8 +163,11 @@ java, parse_java javascript, parse_javascript jsp, parse_jsp +kotlin, parse_kotlin lisp, parse_lisp limbo, parse_limbo +lisp, parse_lisp +livecode, parse_livecode logtalk, parse_logtalk lua, parse_lua make, parse_makefile @@ -166,6 +176,7 @@ metafont, parse_metafont metapost, parse_metapost metapost_with_tex, parse_mptex +modelica, parse_modelica modula2, parse_modula2 modula3, parse_modula3 mxml, parse_mxml @@ -180,6 +191,7 @@ perl, parse_perl php, parse_phtml pike, parse_pike +postscript, parse_postscript prolog, parse_prolog puppet, parse_puppet python, parse_python @@ -202,6 +214,7 @@ tcl, parse_tcl tex, parse_tex tex_dtx, parse_tex_dtx +unrealscript, parse_unrealscript typescript, parse_typescript vala, parse_vala vb_aspx, parse_vb_aspx diff --git a/src/languages.h b/src/languages.h index acdc5c0..69506e8 100755 --- a/src/languages.h +++ b/src/languages.h @@ -29,6 +29,7 @@ #define LANG_COFFEESCRIPT "coffeescript" #define LANG_COQ "coq" #define LANG_CPP "cpp" +#define LANG_CRYSTAL "crystal" #define LANG_CS_ASPX "cs_aspx" #define LANG_CSHARP "csharp" #define LANG_CSS "css" @@ -61,8 +62,10 @@ #define LANG_JAVA "java" #define LANG_JAVASCRIPT "javascript" #define LANG_JSP "jsp" +#define LANG_KOTLIN "kotlin" #define LANG_LIMBO "limbo" #define LANG_LISP "lisp" +#define LANG_LIVECODE "livecode" #define LANG_LOGTALK "logtalk" #define LANG_LUA "lua" #define LANG_MAKE "make" @@ -71,6 +74,7 @@ #define LANG_METAFONT "metafont" #define LANG_METAPOST "metapost" #define LANG_METAPOST_WITH_TEX "metapost_with_tex" +#define LANG_MODELICA "modelica" #define LANG_MODULA2 "modula2" #define LANG_MODULA3 "modula3" #define LANG_MXML "mxml" @@ -85,6 +89,7 @@ #define LANG_PERL "perl" #define LANG_PHP "php" #define LANG_PIKE "pike" +#define LANG_POSTSCRIPT "postscript" #define LANG_PROLOG "prolog" #define LANG_PUPPET "puppet" #define LANG_PYTHON "python" @@ -107,6 +112,7 @@ #define LANG_TCL "tcl" #define LANG_TEX "tex" #define LANG_TEX_DTX "tex_dtx" +#define LANG_UNREALSCRIPT "unrealscript" #define LANG_TYPESCRIPT "typescript" #define LANG_VALA "vala" #define LANG_VB_ASPX "vb_aspx" diff --git a/src/parsers/crystal.rl b/src/parsers/crystal.rl new file mode 100644 index 0000000..31b582f --- /dev/null +++ b/src/parsers/crystal.rl @@ -0,0 +1,177 @@ +// renamed copy of ruby.rl(without *_sq_str). + +/************************* Required for every parser *************************/ +#ifndef OHCOUNT_CRYSTAL_PARSER_H +#define OHCOUNT_CRYSTAL_PARSER_H + +#include "../parser_macros.h" + +// the name of the language +const char *CRYSTAL_LANG = LANG_CRYSTAL; + +// the languages entities +const char *crystal_entities[] = { + "space", "comment", "string", "any" +}; + +// constants associated with the entities +enum { + CRYSTAL_SPACE = 0, CRYSTAL_COMMENT, CRYSTAL_STRING, CRYSTAL_ANY +}; + +/*****************************************************************************/ + +%%{ + machine crystal; + write data; + include common "common.rl"; + + # Line counting machine + + action crystal_ccallback { + switch(entity) { + case CRYSTAL_SPACE: + ls + break; + case CRYSTAL_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(CRYSTAL_LANG) + break; + case NEWLINE: + std_newline(CRYSTAL_LANG) + } + } + + crystal_line_comment = '#' @comment nonnewline*; + # TODO: detect =begin and =end at start of their lines + # Can't do that now because using 'when starts_line' fails a Ragel assertion. + crystal_block_comment = + '=begin' @enqueue @comment ( + newline %{ entity = INTERNAL_NL; } %crystal_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '=end' @commit; + crystal_comment = crystal_line_comment | crystal_block_comment; + + crystal_dq_str = + '"' @enqueue @code ( + newline %{ entity = INTERNAL_NL; } %crystal_ccallback + | + ws + | + [^\r\n\f\t "\\] @code + | + '\\' nonnewline @code + )* '"' @commit @code; + # TODO: true literal string detection + # Turns out any non-alphanum char can be after the initial '%' for a literal + # string. I only have '(', '[', '{' for now because they are common(?). Their + # respective closing characters need to be escaped though, which is not + # accurate; only the single closing character needs to be escaped in a literal + # string. + # We need to detect which non-alphanum char opens a literal string, somehow + # let Ragel know what it is (currently unsupported), and put its respective + # closing char in the literal string below. + crystal_lit_str = + '%' [qQ]? [(\[{] @enqueue @code ( + newline %{ entity = INTERNAL_NL; } %crystal_ccallback + | + ws + | + [^\r\n\f\t )\]}\\] @code + | + '\\' nonnewline @code + )* [)\]}] @commit @code; + crystal_cmd_str = + '`' @enqueue @code ( + newline %{ entity = INTERNAL_NL; } %crystal_ccallback + | + ws + | + [^\r\n\f\t `\\] @code + | + '\\' nonnewline @code + )* '`' @commit @code; + crystal_regex = '/' ([^\r\n\f/\\] | '\\' nonnewline)* '/' @code; + # TODO: true literal array and command detection + # See TODO above about literal string detection + crystal_lit_other = + '%' [wrx] [(\[{] @enqueue @code ( + newline %{ entity = INTERNAL_NL; } %crystal_ccallback + | + ws + | + [^\r\n\f\t )\]}\\] @code + | + '\\' nonnewline @code + )* [)\]}] @commit @code; + # TODO: heredoc detection + # This is impossible with current Ragel. We need to extract what the end + # delimiter should be from the heredoc and search up to it on a new line. + # crystal_heredoc = + crystal_string = + crystal_dq_str | crystal_lit_str | crystal_cmd_str | crystal_regex | + crystal_lit_other; + + crystal_line := |* + spaces ${ entity = CRYSTAL_SPACE; } => crystal_ccallback; + crystal_comment; + crystal_string; + newline ${ entity = NEWLINE; } => crystal_ccallback; + ^space ${ entity = CRYSTAL_ANY; } => crystal_ccallback; + *|; + + # Entity machine + + action crystal_ecallback { + callback(CRYSTAL_LANG, crystal_entities[entity], cint(ts), cint(te), userdata); + } + + crystal_line_comment_entity = '#' nonnewline*; + crystal_block_comment_entity = ('=' when starts_line) 'begin' + any* :>> (('=' when starts_line) 'end'); + crystal_comment_entity = crystal_line_comment_entity | crystal_block_comment_entity; + + crystal_entity := |* + space+ ${ entity = CRYSTAL_SPACE; } => crystal_ecallback; + crystal_comment_entity ${ entity = CRYSTAL_COMMENT; } => crystal_ecallback; + # TODO: + ^space; + *|; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with crystal code. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_crystal(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int s, + int e, void *udata), + void *userdata + ) { + init + + %% write init; + cs = (count) ? crystal_en_crystal_line : crystal_en_crystal_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(CRYSTAL_LANG) } +} + +#endif + +/*****************************************************************************/ diff --git a/src/parsers/kotlin.rl b/src/parsers/kotlin.rl new file mode 100644 index 0000000..38ef112 --- /dev/null +++ b/src/parsers/kotlin.rl @@ -0,0 +1,177 @@ +// kotlin.rl written by Tuomas Tynkkynen +// Inspired by rust.rl, python.rl and haskell.rl + +/************************* Required for every parser *************************/ +#ifndef OHCOUNT_KOTLIN_PARSER_H +#define OHCOUNT_KOTLIN_PARSER_H + +#include "../parser_macros.h" + +// the name of the language +const char *KOTLIN_LANG = LANG_KOTLIN; + +// the languages entities +const char *kotlin_entities[] = { + "space", "comment", "string", "number", + "keyword", "identifier", "operator", "any" +}; + +// constants associated with the entities +enum { + KOTLIN_SPACE = 0, KOTLIN_COMMENT, KOTLIN_STRING, KOTLIN_NUMBER, + KOTLIN_KEYWORD, KOTLIN_IDENTIFIER, KOTLIN_OPERATOR, KOTLIN_ANY +}; + +/*****************************************************************************/ + +%%{ + machine kotlin; + write data; + include common "common.rl"; + + # Line counting machine + + action kotlin_ccallback { + switch(entity) { + case KOTLIN_SPACE: + ls + break; + case KOTLIN_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(KOTLIN_LANG) + break; + case NEWLINE: + std_newline(KOTLIN_LANG) + } + } + + action kotlin_comment_nc_res { kotlin_comment_nest_count = 0; } + action kotlin_comment_nc_inc { kotlin_comment_nest_count++; } + action kotlin_comment_nc_dec { kotlin_comment_nest_count--; } + + kotlin_line_comment = '//' @comment nonnewline*; + kotlin_block_comment = + '/*' >kotlin_comment_nc_res @comment ( + newline %{ entity = INTERNAL_NL; } %kotlin_ccallback + | + ws + | + '/*' @kotlin_comment_nc_inc @comment + | + '*/' @kotlin_comment_nc_dec @comment + | + (nonnewline - ws) @comment + )* :>> ('*/' when { kotlin_comment_nest_count == 0 }) @comment; + kotlin_comment = kotlin_line_comment | kotlin_block_comment; + + kotlin_dq_str = + '"' @code ([^"] | '"' [^"] @{ fhold; }) @{ fhold; } # make sure it's not """ + ([^\r\n\f"\\] | '\\' nonnewline)* '"'; + kotlin_raw_str = + '"""' @code ( + newline %{ entity = INTERNAL_NL; } %kotlin_ccallback + | + ws + | + [^\t ] @code + )* '"""'; + kotlin_string = kotlin_dq_str | kotlin_raw_str; + + kotlin_line := |* + spaces ${ entity = KOTLIN_SPACE; } => kotlin_ccallback; + kotlin_comment; + kotlin_string; + newline ${ entity = NEWLINE; } => kotlin_ccallback; + ^space ${ entity = KOTLIN_ANY; } => kotlin_ccallback; + *|; + + # Entity machine + + action kotlin_ecallback { + callback(KOTLIN_LANG, kotlin_entities[entity], cint(ts), cint(te), userdata); + } + + kotlin_line_comment_entity = '//' nonnewline*; + kotlin_block_comment_entity = '/*' any* :>> '*/'; + kotlin_comment_entity = kotlin_line_comment_entity | kotlin_block_comment_entity; + + kotlin_raw_string_entity = '"""' any* :>> '"""'; + kotlin_string_entity = dq_str_with_escapes | kotlin_raw_string_entity; + + kotlin_float_suffix_ty = [fF]; + kotlin_long_suffix = 'L'; + kotlin_hex_suffix = kotlin_long_suffix + | '.' [0-9A-Fa-f]* kotlin_float_suffix_ty?; + + kotlin_dec_lit = [0-9]+; + kotlin_exponent = [Ee] [\-+]? kotlin_dec_lit; + kotlin_float_suffix = (kotlin_exponent | '.' kotlin_dec_lit kotlin_exponent?)? + kotlin_float_suffix_ty?; + + kotlin_num_suffix = kotlin_long_suffix | kotlin_float_suffix; + + kotlin_number_entity = [1-9] [0-9]* kotlin_num_suffix? + | '0' ( [0-9]* kotlin_num_suffix? + | 'b' [01]+ kotlin_long_suffix? + | 'x' [0-9A-Fa-f]+ kotlin_hex_suffix?); + + kotlin_plain_identifier = (alpha | '_') (alnum | '_')*; + kotlin_identifier_entity = kotlin_plain_identifier | '`' kotlin_plain_identifier '`'; + + kotlin_keyword_entity = + 'as' | 'break' | 'class' | 'continue' | 'do' | 'else' | 'false' | 'for' | + 'fun' | 'if' | 'in' | 'is' | 'null' | 'object' | 'package' | 'return' | + 'super' | 'this' | 'This' | 'throw' | 'trait' | 'true' | 'try' | 'type' | + 'val' | 'var' | 'when' | 'while'; + + kotlin_operator_entity = [+\-/*%<>!=^&|?~:;.,()\[\]{}]; + + kotlin_entity := |* + space+ ${ entity = KOTLIN_SPACE; } => kotlin_ecallback; + kotlin_comment_entity ${ entity = KOTLIN_COMMENT; } => kotlin_ecallback; + kotlin_string_entity ${ entity = KOTLIN_STRING; } => kotlin_ecallback; + kotlin_number_entity ${ entity = KOTLIN_NUMBER; } => kotlin_ecallback; + kotlin_identifier_entity ${ entity = KOTLIN_IDENTIFIER; } => kotlin_ecallback; + kotlin_keyword_entity ${ entity = KOTLIN_KEYWORD; } => kotlin_ecallback; + kotlin_operator_entity ${ entity = KOTLIN_OPERATOR; } => kotlin_ecallback; + ^(space | digit) ${ entity = KOTLIN_ANY; } => kotlin_ecallback; + *|; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with C/C++ code. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_kotlin(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int s, + int e, void *udata), + void *userdata + ) { + init + + int kotlin_comment_nest_count = 0; + + %% write init; + cs = (count) ? kotlin_en_kotlin_line : kotlin_en_kotlin_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(KOTLIN_LANG) } +} + +const char *ORIG_KOTLIN_LANG = LANG_KOTLIN; + +#endif + +/*****************************************************************************/ diff --git a/src/parsers/livecode.rl b/src/parsers/livecode.rl new file mode 100644 index 0000000..eb0b257 --- /dev/null +++ b/src/parsers/livecode.rl @@ -0,0 +1,117 @@ +// livecode.rl written by Monte Goulding. montegouldingws. + +/************************* Required for every parser *************************/ +#ifndef OHCOUNT_LIVECODE_PARSER_H +#define OHCOUNT_LIVECODE_PARSER_H + +#include "../parser_macros.h" + +// the name of the language +const char *LIVECODE_LANG = LANG_LIVECODE; + +// the languages entities +const char *livecode_entities[] = { + "space", "comment", "string", "any", +}; + +// constants associated with the entities +enum { + LIVECODE_SPACE = 0, LIVECODE_COMMENT, LIVECODE_STRING, LIVECODE_ANY +}; + +/*****************************************************************************/ + +%%{ + machine livecode; + write data; + include common "common.rl"; + + # Line counting machine + + action livecode_ccallback { + switch(entity) { + case LIVECODE_SPACE: + ls + break; + case LIVECODE_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(LIVECODE_LANG) + break; + case NEWLINE: + std_newline(LIVECODE_LANG) + } + } + + livecode_line_comment = ('--' | '#' | '//') @comment nonnewline*; + livecode_block_comment = + '/*' @comment ( + newline %{ entity = INTERNAL_NL; } %livecode_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '*/'; + + livecode_comment = livecode_line_comment | livecode_block_comment; + + livecode_string = '"' @code ([^\r\n\f"\\] | '\\' nonnewline)* '"'; + + livecode_line := |* + spaces ${ entity = LIVECODE_SPACE; } => livecode_ccallback; + livecode_comment; + livecode_string; + newline ${ entity = NEWLINE; } => livecode_ccallback; + ^space ${ entity = LIVECODE_ANY; } => livecode_ccallback; + *|; + + # Entity machine + + action livecode_ecallback { + callback(LIVECODE_LANG, livecode_entities[entity], cint(ts), cint(te), userdata); + } + + livecode_line_comment_entity = ('--' | '#' | '//') nonnewline*; + livecode_block_comment_entity = '/*' any* :>> '*/'; + livecode_comment_entity = livecode_line_comment_entity | livecode_block_comment_entity; + + livecode_entity := |* + space+ ${ entity = LIVECODE_SPACE; } => livecode_ecallback; + livecode_comment_entity ${ entity = LIVECODE_COMMENT; } => livecode_ecallback; + # TODO: + ^space; + *|; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with LiveCode code. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_livecode(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int s, + int e, void *udata), + void *userdata + ) { + init + + %% write init; + cs = (count) ? livecode_en_livecode_line : livecode_en_livecode_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(LIVECODE_LANG) } +} + +#endif + +/*****************************************************************************/ diff --git a/src/parsers/modelica.rl b/src/parsers/modelica.rl new file mode 100644 index 0000000..49432f3 --- /dev/null +++ b/src/parsers/modelica.rl @@ -0,0 +1,133 @@ +// modelica.rl written by Martin Sjölund. martin.sjolundliuse + +/************************* Required for every parser *************************/ +#ifndef OHCOUNT_MODELICA_PARSER_H +#define OHCOUNT_MODELICA_PARSER_H + +#include "../parser_macros.h" + +// the name of the language +const char *MODELICA_LANG = LANG_MODELICA; + +// the languages entities +const char *modelica_entities[] = { + "space", "comment", "string", "any" +}; + +// constants associated with the entities +enum { + MODELICA_SPACE = 0, MODELICA_COMMENT, MODELICA_STRING, MODELICA_ANY +}; + +/*****************************************************************************/ + +%%{ + machine modelica; + write data; + include common "common.rl"; + + # Line counting machine + + action modelica_ccallback { + switch(entity) { + case MODELICA_SPACE: + ls + break; + case MODELICA_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(MODELICA_LANG) + break; + case NEWLINE: + std_newline(MODELICA_LANG) + } + } + + modelica_line_comment = '//' @comment nonnewline*; + + modelica_block_comment = + '/*' @comment ( + newline %{ entity = INTERNAL_NL; } %modelica_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '*/'; + + modelica_comment = modelica_line_comment | modelica_block_comment; + identifier = '\'' (([^'] - ws + ' ')*|'\\\'') '\'' @code; + string = '\"' @code + (newline %{ entity = INTERNAL_NL; } %modelica_ccallback + |ws + |[^ \t\n"\\] @code + |'\\\"' @code + )* '\"' @code; + + modelica_line := |* + spaces ${ entity = MODELICA_SPACE; } => modelica_ccallback; + modelica_comment; + newline ${ entity = NEWLINE; } => modelica_ccallback; + identifier ${ entity = MODELICA_ANY; } => modelica_ccallback; + string ${ entity = MODELICA_ANY; } => modelica_ccallback; + ^space ${ entity = MODELICA_ANY; } => modelica_ccallback; + *|; + + # Entity machine + + action modelica_ecallback { + callback(MODELICA_LANG, modelica_entities[entity], cint(ts), cint(te), userdata); + } + + modelica_eline_comment = '//' @comment nonnewline*; + + modelica_eblock_comment = + '/*' @comment ( + newline %{ entity = INTERNAL_NL; } %modelica_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '*/'; + + modelica_ecomment = modelica_line_comment | modelica_block_comment; + + modelica_entity := |* + space+ ${ entity = MODELICA_SPACE; } => modelica_ecallback; + modelica_ecomment ${ entity = MODELICA_COMMENT; } => modelica_ecallback; + identifier ${ entity = MODELICA_ANY; } => modelica_ecallback; + string ${ entity = MODELICA_ANY; } => modelica_ecallback; + ^space; + *|; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with Modelica code. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_modelica(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int s, + int e, void *udata), + void *userdata + ) { + init + %% write init; + cs = (count) ? modelica_en_modelica_line : modelica_en_modelica_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(MODELICA_LANG) } +} + +#endif + +/*****************************************************************************/ diff --git a/src/parsers/postscript.rl b/src/parsers/postscript.rl new file mode 100644 index 0000000..ac8a0a8 --- /dev/null +++ b/src/parsers/postscript.rl @@ -0,0 +1,104 @@ +/************************* Required for every parser *************************/ +#ifndef OHCOUNT_PS_PARSER_H +#define OHCOUNT_PS_PARSER_H + +#include "../parser_macros.h" + +// the name of the language +const char *POSTSCRIPT_LANG = LANG_POSTSCRIPT; + +// the languages entities +const char *postscript_entities[] = { + "space", "comment", "string", "any" +}; + +// constants associated with the entities +enum { + POSTSCRIPT_SPACE = 0, POSTSCRIPT_COMMENT, POSTSCRIPT_STRING, POSTSCRIPT_ANY +}; + +/*****************************************************************************/ + +%%{ + machine postscript; + write data; + include common "common.rl"; + + # Line counting + + action postscript_ccallback { + switch(entity) { + case POSTSCRIPT_SPACE: + ls + break; + case POSTSCRIPT_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(POSTSCRIPT_LANG) + break; + case NEWLINE: + std_newline(POSTSCRIPT_LANG) + } + } + + postscript_comment = '%' @comment nonnewline*; + + postscript_string = '(' @code ([^\r\n\f])* ')'; + + postscript_line := |* + spaces ${ entity = POSTSCRIPT_SPACE; } => postscript_ccallback; + postscript_comment; + postscript_string; + newline ${ entity = NEWLINE; } => postscript_ccallback; + ^space ${ entity = POSTSCRIPT_ANY; } => postscript_ccallback; + *|; + + # Entity Machine + + action postscript_ecallback { + callback(POSTSCRIPT_LANG, postscript_entities[entity], cint(ts), cint(te), + userdata); + } + + postscript_comment_entity = '%' nonnewline*; + + postscript_entity := |* + space+ ${ entity = POSTSCRIPT_SPACE; } => postscript_ecallback; + postscript_comment_entity ${ entity = POSTSCRIPT_COMMENT; } => postscript_ecallback; + # TODO: + ^space; + *|; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with PostScript code. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_postscript(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int s, + int e, void *udata), + void *userdata + ) { + init + + %% write init; + cs = (count) ? postscript_en_postscript_line : postscript_en_postscript_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(POSTSCRIPT_LANG) } +} + +#endif + +/*****************************************************************************/ diff --git a/src/parsers/unrealscript.rl b/src/parsers/unrealscript.rl new file mode 100644 index 0000000..7e6d8e4 --- /dev/null +++ b/src/parsers/unrealscript.rl @@ -0,0 +1,136 @@ +// unrealscript.rl derived from code written by Mitchell Foral. mitchellcaladbolgnet. + +/************************* Required for every parser *************************/ +#ifndef OHCOUNT_UNREALSCRIPT_PARSER_H +#define OHCOUNT_UNREALSCRIPT_PARSER_H + +#include "../parser_macros.h" + +// the name of the language +const char *UNREALSCRIPT_LANG = LANG_UNREALSCRIPT; + +// the languages entities +const char *unrealscript_entities[] = { + "space", "comment", "string", "any" +}; + +// constants associated with the entities +enum { + UNREALSCRIPT_SPACE = 0, UNREALSCRIPT_COMMENT, UNREALSCRIPT_STRING, UNREALSCRIPT_ANY +}; + +/*****************************************************************************/ + +%%{ + machine unrealscript; + write data; + include common "common.rl"; + + # Line counting machine + + action unrealscript_ccallback { + switch(entity) { + case UNREALSCRIPT_SPACE: + ls + break; + case UNREALSCRIPT_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(UNREALSCRIPT_LANG) + break; + case NEWLINE: + std_newline(UNREALSCRIPT_LANG) + } + } + + unrealscript_line_comment = '//' @comment nonnewline*; + unrealscript_block_comment = + '/*' @comment ( + newline %{ entity = INTERNAL_NL; } %unrealscript_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '*/'; + unrealscript_comment = unrealscript_line_comment | unrealscript_block_comment; + + unrealscript_sq_str = + '\'' @code ( + escaped_newline %{ entity = INTERNAL_NL; } %unrealscript_ccallback + | + ws + | + [^\t '\\] @code + | + '\\' nonnewline @code + )* '\''; + unrealscript_dq_str = + '"' @code ( + escaped_newline %{ entity = INTERNAL_NL; } %unrealscript_ccallback + | + ws + | + [^\t "\\] @code + | + '\\' nonnewline @code + )* '"'; + unrealscript_string = unrealscript_sq_str | unrealscript_dq_str; + + unrealscript_line := |* + spaces ${ entity = UNREALSCRIPT_SPACE; } => unrealscript_ccallback; + unrealscript_comment; + unrealscript_string; + newline ${ entity = NEWLINE; } => unrealscript_ccallback; + ^space ${ entity = UNREALSCRIPT_ANY; } => unrealscript_ccallback; + *|; + + # Entity machine + + action unrealscript_ecallback { + callback(UNREALSCRIPT_LANG, unrealscript_entities[entity], cint(ts), cint(te), userdata); + } + + unrealscript_line_comment_entity = '//' nonnewline*; + unrealscript_block_comment_entity = '/*' any* :>> '*/'; + unrealscript_comment_entity = unrealscript_line_comment_entity | unrealscript_block_comment_entity; + + unrealscript_entity := |* + space+ ${ entity = UNREALSCRIPT_SPACE; } => unrealscript_ecallback; + unrealscript_comment_entity ${ entity = UNREALSCRIPT_COMMENT; } => unrealscript_ecallback; + # TODO: + ^space; + *|; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with UnrealScript code. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_unrealscript(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int s, + int e, void *udata), + void *userdata + ) { + init + + %% write init; + cs = (count) ? unrealscript_en_unrealscript_line : unrealscript_en_unrealscript_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(UNREALSCRIPT_LANG) } +} + +#endif + +/*****************************************************************************/ diff --git a/test/detect_files/foo.uc b/test/detect_files/foo.uc new file mode 100644 index 0000000..4f0b7d6 --- /dev/null +++ b/test/detect_files/foo.uc @@ -0,0 +1,19 @@ +class foo extends Actor; + +/** An UnrealScript 3 styled comment. */ +var bool bFoo; + +simulated function PostBeginPlay() +{ + // Comment + log(self@"Hello World! Foo is"@bFoo); // Another comment + /* A + block + comment */ + Super.PostBeginPlay(); +} + +defaultproperties +{ + bFoo = true +} diff --git a/test/detect_files/forth.fs b/test/detect_files/forth.fs new file mode 100644 index 0000000..75ce18d --- /dev/null +++ b/test/detect_files/forth.fs @@ -0,0 +1,7 @@ +\ Sample Forth code + +( This is a comment + spanning multiple lines ) + +: somedefinition ; + diff --git a/test/expected_dir/abstract_crystal.cr b/test/expected_dir/abstract_crystal.cr new file mode 100644 index 0000000..5c23c3c --- /dev/null +++ b/test/expected_dir/abstract_crystal.cr @@ -0,0 +1,4 @@ +crystal code abstract class Animal +crystal code abstract def talk +crystal code end +crystal code end diff --git a/test/expected_dir/c_binding_crystal.cr b/test/expected_dir/c_binding_crystal.cr new file mode 100644 index 0000000..7e04101 --- /dev/null +++ b/test/expected_dir/c_binding_crystal.cr @@ -0,0 +1,5 @@ +crystal code @[Link(ldflags: "/home/alex/projects/fisbot/lib/ohcount/ohcount.so")] +crystal blank +crystal code lib OhcountC +crystal code fun ohcount_sourcefile_new(filepath : LibC::Char*) : Int32* +crystal code end diff --git a/test/expected_dir/foo.lc b/test/expected_dir/foo.lc new file mode 100644 index 0000000..206e387 --- /dev/null +++ b/test/expected_dir/foo.lc @@ -0,0 +1,13 @@ +livecode comment # Comment +livecode comment -- Comment +livecode comment // Comment +livecode blank +livecode comment /* +livecode blank +livecode comment Block Comment +livecode blank +livecode comment */ +livecode blank +livecode code on mouseUp +livecode code doSomething +livecode code end mouseUp diff --git a/test/expected_dir/forth.fs b/test/expected_dir/forth.fs new file mode 100644 index 0000000..e7f5ab5 --- /dev/null +++ b/test/expected_dir/forth.fs @@ -0,0 +1,7 @@ +forth comment \ Sample Forth code +forth blank +forth comment ( This is a comment +forth comment spanning multiple lines ) +forth blank +forth code : somedefinition ; +forth blank diff --git a/test/expected_dir/kotlin.kt b/test/expected_dir/kotlin.kt new file mode 100644 index 0000000..58dfcd2 --- /dev/null +++ b/test/expected_dir/kotlin.kt @@ -0,0 +1,24 @@ +kotlin comment // Line comment +kotlin code fun sum(a : Double, b : Double) : Double { +kotlin code return a + b +kotlin code } +kotlin blank +kotlin comment /* +kotlin comment * Block comment +kotlin comment */ +kotlin blank +kotlin code fun hello(place : String) : Unit { +kotlin code print("Hello, \"$place\"\n") +kotlin code } +kotlin blank +kotlin comment /* +kotlin comment * /* +kotlin comment * * Block comments nest +kotlin comment * */ +kotlin comment */ +kotlin blank +kotlin code fun main() : Unit { +kotlin code hello("""Very, very, very +kotlin code // long place +kotlin code somewhere""") +kotlin code } diff --git a/test/expected_dir/macro_crystal.cr b/test/expected_dir/macro_crystal.cr new file mode 100644 index 0000000..b526e62 --- /dev/null +++ b/test/expected_dir/macro_crystal.cr @@ -0,0 +1,5 @@ +crystal comment # This is a comment. +crystal blank +crystal code macro create_foo(name, &block) +crystal code {% name.id = "bar" %} +crystal code {{block}} diff --git a/test/expected_dir/modelica.mo b/test/expected_dir/modelica.mo new file mode 100644 index 0000000..1f2fd9c --- /dev/null +++ b/test/expected_dir/modelica.mo @@ -0,0 +1,14 @@ +modelica code within fooBar; +modelica code package Examples +modelica blank +modelica code annotation (Icon(graphics={ +modelica code Polygon( +modelica code points={{-48,50},{52,-10},{-48,-70},{-48,50}}, +modelica code lineColor={0,0,255}, +modelica code pattern=LinePattern.None, +modelica code fillColor={95,95,95}, +modelica code fillPattern=FillPattern.Solid)}), Documentation(revisions=" +modelica code +modelica code

Copyright 2015-2016 RTE (France), SmarTS Lab (Sweden), AIA (Spain) and DTU (Denmark)

+modelica code ")); +modelica code end Examples; diff --git a/test/expected_dir/postscipt.ps b/test/expected_dir/postscipt.ps new file mode 100644 index 0000000..ba3c1c8 --- /dev/null +++ b/test/expected_dir/postscipt.ps @@ -0,0 +1,9 @@ +postscript comment %!PS +postscript comment % Postscript Directive at the top. Just like its supposed to be. +postscript code /Courier +postscript blank +postscript code 15 selectfont +postscript code 72 500 moveto +postscript blank +postscript code (Hello world) show +postscript code showpage diff --git a/test/expected_dir/standard_crystal.cr b/test/expected_dir/standard_crystal.cr new file mode 100644 index 0000000..e386f9f --- /dev/null +++ b/test/expected_dir/standard_crystal.cr @@ -0,0 +1,20 @@ +crystal comment #comment +crystal comment # comment with "string" +crystal blank +crystal code class Rest +crystal code def one +crystal code two do |c| +crystal code puts c +crystal code end +crystal code end +crystal blank +crystal code def two(&block : Int32 -> _) +crystal code three { |x| yield x } # yield is faster than passing blocks. +crystal code end +crystal blank +crystal code def three +crystal code yield 3 +crystal code end +crystal code end +crystal blank +crystal code Rest.new.one diff --git a/test/expected_dir/unrealscript.uc b/test/expected_dir/unrealscript.uc new file mode 100644 index 0000000..2f5787a --- /dev/null +++ b/test/expected_dir/unrealscript.uc @@ -0,0 +1,19 @@ +unrealscript code class foo extends Actor; +unrealscript blank +unrealscript comment /** An UnrealScript 3 styled comment. */ +unrealscript code var bool bFoo; +unrealscript blank +unrealscript code simulated function PostBeginPlay() +unrealscript code { +unrealscript comment // Comment +unrealscript code log(self@"Hello World! Foo is"@bFoo); // Another comment +unrealscript comment /* A +unrealscript comment block +unrealscript comment comment */ +unrealscript code Super.PostBeginPlay(); +unrealscript code } +unrealscript blank +unrealscript code defaultproperties +unrealscript code { +unrealscript code bFoo = true +unrealscript code } diff --git a/test/src_dir/abstract_crystal.cr b/test/src_dir/abstract_crystal.cr new file mode 100644 index 0000000..897df7b --- /dev/null +++ b/test/src_dir/abstract_crystal.cr @@ -0,0 +1,4 @@ +abstract class Animal + abstract def talk + end +end diff --git a/test/src_dir/c_binding_crystal.cr b/test/src_dir/c_binding_crystal.cr new file mode 100644 index 0000000..5e3a7c4 --- /dev/null +++ b/test/src_dir/c_binding_crystal.cr @@ -0,0 +1,5 @@ +@[Link(ldflags: "/home/alex/projects/fisbot/lib/ohcount/ohcount.so")] + +lib OhcountC + fun ohcount_sourcefile_new(filepath : LibC::Char*) : Int32* +end diff --git a/test/src_dir/foo.lc b/test/src_dir/foo.lc new file mode 100644 index 0000000..6dca52c --- /dev/null +++ b/test/src_dir/foo.lc @@ -0,0 +1,13 @@ +# Comment +-- Comment +// Comment + +/* + +Block Comment + +*/ + +on mouseUp + doSomething +end mouseUp diff --git a/test/src_dir/forth.fs b/test/src_dir/forth.fs new file mode 100644 index 0000000..75ce18d --- /dev/null +++ b/test/src_dir/forth.fs @@ -0,0 +1,7 @@ +\ Sample Forth code + +( This is a comment + spanning multiple lines ) + +: somedefinition ; + diff --git a/test/src_dir/kotlin.kt b/test/src_dir/kotlin.kt new file mode 100644 index 0000000..d825f7f --- /dev/null +++ b/test/src_dir/kotlin.kt @@ -0,0 +1,24 @@ +// Line comment +fun sum(a : Double, b : Double) : Double { + return a + b +} + +/* + * Block comment + */ + +fun hello(place : String) : Unit { + print("Hello, \"$place\"\n") +} + +/* + * /* + * * Block comments nest + * */ + */ + +fun main() : Unit { + hello("""Very, very, very + // long place + somewhere""") +} diff --git a/test/src_dir/macro_crystal.cr b/test/src_dir/macro_crystal.cr new file mode 100644 index 0000000..12b25c0 --- /dev/null +++ b/test/src_dir/macro_crystal.cr @@ -0,0 +1,5 @@ +# This is a comment. + +macro create_foo(name, &block) + {% name.id = "bar" %} + {{block}} diff --git a/test/src_dir/modelica.mo b/test/src_dir/modelica.mo new file mode 100644 index 0000000..4c3ce02 --- /dev/null +++ b/test/src_dir/modelica.mo @@ -0,0 +1,14 @@ +within fooBar; +package Examples + +annotation (Icon(graphics={ + Polygon( + points={{-48,50},{52,-10},{-48,-70},{-48,50}}, + lineColor={0,0,255}, + pattern=LinePattern.None, + fillColor={95,95,95}, + fillPattern=FillPattern.Solid)}), Documentation(revisions=" + +

Copyright 2015-2016 RTE (France), SmarTS Lab (Sweden), AIA (Spain) and DTU (Denmark)

+")); +end Examples; diff --git a/test/src_dir/postscript.ps b/test/src_dir/postscript.ps new file mode 100644 index 0000000..110817f --- /dev/null +++ b/test/src_dir/postscript.ps @@ -0,0 +1,9 @@ +%!PS +% Postscript Directive at the top. Just like its supposed to be. +/Courier + +15 selectfont +72 500 moveto + +(Hello world) show +showpage diff --git a/test/src_dir/standard_crystal.cr b/test/src_dir/standard_crystal.cr new file mode 100644 index 0000000..9f632da --- /dev/null +++ b/test/src_dir/standard_crystal.cr @@ -0,0 +1,20 @@ +#comment + # comment with "string" + +class Rest + def one + two do |c| + puts c + end + end + + def two(&block : Int32 -> _) + three { |x| yield x } # yield is faster than passing blocks. + end + + def three + yield 3 + end +end + +Rest.new.one diff --git a/test/src_dir/unrealscript.uc b/test/src_dir/unrealscript.uc new file mode 100644 index 0000000..4f0b7d6 --- /dev/null +++ b/test/src_dir/unrealscript.uc @@ -0,0 +1,19 @@ +class foo extends Actor; + +/** An UnrealScript 3 styled comment. */ +var bool bFoo; + +simulated function PostBeginPlay() +{ + // Comment + log(self@"Hello World! Foo is"@bFoo); // Another comment + /* A + block + comment */ + Super.PostBeginPlay(); +} + +defaultproperties +{ + bFoo = true +} diff --git a/test/unit/detector_test.h b/test/unit/detector_test.h index f320aac..ff544ff 100755 --- a/test/unit/detector_test.h +++ b/test/unit/detector_test.h @@ -125,10 +125,12 @@ ASSERT_DETECT(LANG_CPP, "uses_cpp_stdlib_headers.h"); ASSERT_DETECT(LANG_CPP, "uses_cpp_keywords.h"); ASSERT_DETECT(LANG_RUBY, "foo.rb"); + ASSERT_DETECT(LANG_CRYSTAL, "foo.cr"); ASSERT_DETECT(LANG_MAKE, "foo.mk"); ASSERT_DETECT(LANG_MATHEMATICA, "foo.mt"); ASSERT_DETECT(LANG_MATHEMATICA, "foo.wl"); ASSERT_DETECT(LANG_MATHEMATICA, "foo.wlt"); + ASSERT_DETECT(LANG_MODELICA, "foo.mo"); ASSERT_DETECT(LANG_OBJECTIVE_C, "foo_objective_c.h"); ASSERT_DETECT(LANG_PHP, "upper_case_php"); ASSERT_DETECT(LANG_SMALLTALK, "example.st"); @@ -163,13 +165,18 @@ ASSERT_DETECT(LANG_FSHARP, "fs1.fs"); ASSERT_DETECT(LANG_GRACE, "grace1.grace"); ASSERT_DETECT(LANG_GRACE, "grace2.grc"); + ASSERT_DETECT(LANG_FORTH, "forth.fs"); ASSERT_DETECT(LANG_AUTOCONF, "m4.m4"); ASSERT_DETECT(LANG_NSIS, "foo.nsi"); ASSERT_DETECT(LANG_NSIS, "foo.nsh"); ASSERT_DETECT(LANG_COFFEESCRIPT, "foo.coffee"); ASSERT_DETECT(LANG_QML, "foo.qml"); ASSERT_DETECT(LANG_COQ, "coq.v"); + ASSERT_DETECT(LANG_UNREALSCRIPT, "foo.uc"); ASSERT_DETECT(LANG_AMPL, "foo.run"); + ASSERT_DETECT(LANG_LIVECODE, "foo.lc"); + ASSERT_DETECT(LANG_LIVECODE, "script.utf8"); + ASSERT_DETECT(LANG_POSTSCRIPT, "foo.ps"); ASSERT_NODETECT("empty.inc"); } @@ -243,7 +250,7 @@ } void test_non_existent_file(){ - ASSERT_NODETECT("xxx_non_exists_xxxi.pp"); + ASSERT_NODETECT("xxx_non_exists_xxxi.pp"); } void all_detector_tests() { diff --git a/test/unit/parser_test.h b/test/unit/parser_test.h index 6a622ac..4c9c588 100644 --- a/test/unit/parser_test.h +++ b/test/unit/parser_test.h @@ -93,6 +93,7 @@ #include "parsers/test_clearsilver.h" #include "parsers/test_clojure.h" #include "parsers/test_coq.h" +#include "parsers/test_crystal.h" #include "parsers/test_cs_aspx.h" #include "parsers/test_csharp.h" #include "parsers/test_css.h" @@ -121,6 +122,7 @@ #include "parsers/test_javascript.h" #include "parsers/test_jsp.h" #include "parsers/test_lisp.h" +#include "parsers/test_livecode.h" #include "parsers/test_logtalk.h" #include "parsers/test_lua.h" #include "parsers/test_make.h" @@ -137,6 +139,7 @@ #include "parsers/test_pascal.h" #include "parsers/test_perl.h" #include "parsers/test_pike.h" +#include "parsers/test_postscript.h" #include "parsers/test_puppet.h" #include "parsers/test_python.h" #include "parsers/test_qml.h" @@ -155,6 +158,7 @@ #include "parsers/test_stratego.h" #include "parsers/test_tcl.h" #include "parsers/test_tex.h" +#include "parsers/test_unrealscript.h" #include "parsers/test_typescript.h" #include "parsers/test_vala.h" #include "parsers/test_vb_aspx.h" @@ -280,6 +284,7 @@ all_clearsilver_tests(); all_clojure_tests(); all_coq_tests(); + all_crystal_tests(); all_cs_aspx_tests(); all_csharp_tests(); all_css_tests(); @@ -307,6 +312,7 @@ all_javascript_tests(); all_jsp_tests(); all_lisp_tests(); + all_livecode_tests(); all_logtalk_tests(); all_lua_tests(); all_make_tests(); @@ -323,6 +329,7 @@ all_pascal_tests(); all_perl_tests(); all_pike_tests(); + all_postscript_tests(); all_python_tests(); all_r_tests(); all_racket_tests(); @@ -339,6 +346,7 @@ all_stratego_tests(); all_tcl_tests(); all_tex_tests(); + all_unrealscript_tests(); all_vala_tests(); all_vb_aspx_tests(); all_vhdl_tests(); diff --git a/test/unit/parsers/test_crystal.h b/test/unit/parsers/test_crystal.h new file mode 100644 index 0000000..689054d --- /dev/null +++ b/test/unit/parsers/test_crystal.h @@ -0,0 +1,23 @@ + +void test_crystal_comments() { + test_parser_verify_parse( + test_parser_sourcefile("crystal", " #comment"), + "crystal", "", "#comment", 0 + ); +} + +void test_crystal_comment_entities() { + test_parser_verify_entity( + test_parser_sourcefile("crystal", " #comment"), + "comment", "#comment" + ); + test_parser_verify_entity( + test_parser_sourcefile("crystal", "=begin\ncomment\n=end"), + "comment", "=begin\ncomment\n=end" + ); +} + +void all_crystal_tests() { + test_crystal_comments(); + test_crystal_comment_entities(); +} diff --git a/test/unit/parsers/test_livecode.h b/test/unit/parsers/test_livecode.h new file mode 100644 index 0000000..1f78a15 --- /dev/null +++ b/test/unit/parsers/test_livecode.h @@ -0,0 +1,47 @@ + +void test_livecode_comments() { + test_parser_verify_parse( + test_parser_sourcefile("livecode", " --comment"), + "livecode", "", "--comment", 0 + ); +} + +void test_livecode_empty_comments() { + test_parser_verify_parse( + test_parser_sourcefile("livecode", " --\n"), + "livecode", "", "--\n", 0 + ); +} + +void test_livecode_block_comment() { + test_parser_verify_parse( + test_parser_sourcefile("livecode", " /*livecode*/"), + "livecode", "", "/*livecode*/", 0 + ); +} + +void test_livecode_comment_entities() { + test_parser_verify_entity( + test_parser_sourcefile("livecode", " --comment"), + "comment", "--comment" + ); + test_parser_verify_entity( + test_parser_sourcefile("livecode", " #comment"), + "comment", "#comment" + ); + test_parser_verify_entity( + test_parser_sourcefile("livecode", " //comment"), + "comment", "//comment" + ); + test_parser_verify_entity( + test_parser_sourcefile("livecode", " /*comment*/"), + "comment", "/*comment*/" + ); +} + +void all_livecode_tests() { + test_livecode_comments(); + test_livecode_empty_comments(); + test_livecode_block_comment(); + test_livecode_comment_entities(); +} diff --git a/test/unit/parsers/test_postscript.h b/test/unit/parsers/test_postscript.h new file mode 100644 index 0000000..acaebb3 --- /dev/null +++ b/test/unit/parsers/test_postscript.h @@ -0,0 +1,11 @@ + +void test_postscript_comment_entities(){ + test_parser_verify_entity( + test_parser_sourcefile("postscript", "%comment"), + "comment", "%comment" + ); +} + +void all_postscript_tests(){ + test_postscript_comment_entities(); +} diff --git a/test/unit/parsers/test_unrealscript.h b/test/unit/parsers/test_unrealscript.h new file mode 100644 index 0000000..20479dc --- /dev/null +++ b/test/unit/parsers/test_unrealscript.h @@ -0,0 +1,23 @@ + +void test_unrealscript_comments() { + test_parser_verify_parse( + test_parser_sourcefile("unrealscript", " //comment"), + "unrealscript", "", "//comment", 0 + ); +} + +void test_unrealscript_comment_entities() { + test_parser_verify_entity( + test_parser_sourcefile("unrealscript", " //comment"), + "comment", "//comment" + ); + test_parser_verify_entity( + test_parser_sourcefile("unrealscript", " /*comment*/"), + "comment", "/*comment*/" + ); +} + +void all_unrealscript_tests() { + test_unrealscript_comments(); + test_unrealscript_comment_entities(); +}