Codebase list ohcount / 3581673
Initial commit Sylvestre Ledru 5 years ago
879 changed file(s) with 59132 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 pkg
1 lib/i[3-9]86*
2 lib/x86*
3 *.swp
4 tags
5 .DS_Store
6 src/hash/*_hash.h
7 src/hash/language_hash.c
8 src/parsers/*.h
9 src/parser.o
10 test/unit/run_tests
11 bin/
12 ruby/ohcount.so
13 ruby/ohcount_wrap.c
14 test/unit/run_tests.dSYM/
0 debian/patches
0 fix_null_dereference_2.patch
1 fix_null_dereference.patch
2 txx_support.patch
3 disabled_test_suite.patch
4 rbconfig.patch
0 #!/usr/bin/env bash
1 # Build script for Ohcount.
2 # Written by Mitchell Foral. mitchell<att>caladbolg.net.
3
4 # Options
5 # Change these for your system configuration.
6 if [ `uname` != "Darwin" ]
7 then
8 # Linux
9 INC_DIR=
10 LIB_DIR=
11
12 if [ `uname` == "FreeBSD" ]
13 then
14 INC_DIR=/usr/local/include
15 LIB_DIR=/usr/local/lib
16 fi
17
18 # You shouldn't have to change the following.
19 CFLAGS=-O3
20 CFLAGS="$CFLAGS -DTMP_FILES_ARE_DT_UNKNOWN" # workaround bug on centos/SF servers
21 WARN="-Wall -Wno-pointer-to-int-cast -Wno-parentheses"
22 SHARED=-shared
23 SHARED_NAME=libohcount.so
24 RB_SHARED=-shared
25 RB_SHARED_NAME=ohcount.so
26 else
27 # Mac OSX
28 INC_DIR=/opt/local/include
29 LIB_DIR=/opt/local/lib
30 # You shouldn't have to change the following.
31 CFLAGS="-fno-common -g"
32 WARN="-Wall -Wno-parentheses"
33 SHARED="-dynamiclib -L$LIB_DIR -lpcre"
34 SHARED_NAME=libohcount.dylib
35 RB_SHARED="-dynamic -bundle -lruby"
36 RB_SHARED_NAME=ohcount.bundle
37 fi
38
39 # C compiler and flags
40 cc="gcc -fPIC -g $CFLAGS $WARN -I$INC_DIR -L$LIB_DIR"
41
42 # Ohcount source files
43 files="src/sourcefile.c \
44 src/detector.c \
45 src/licenses.c \
46 src/parser.o \
47 src/loc.c \
48 src/log.c \
49 src/diff.c \
50 src/parsed_language.c \
51 src/hash/language_hash.c"
52
53 # If any src/hash/*.gperf file is newer than the header files (which were
54 # presumably generated together), regenerate the headers.
55 build_hash_headers()
56 {
57 if [[ -z `ls src/hash/ | grep "_hash.h$"` ||
58 ! -z `find src/hash/*.gperf -newer src/hash/parser_hash.h` ]]
59 then
60 echo "Generating hash headers"
61 sh -c "cd src/hash/ && ./generate_headers" || exit 1
62 fi
63 }
64
65 # If src/parser.o does not exist, or if there are Ragel parsers or parser
66 # header files newer than the existing parser.o, recompile parser.o.
67 build_parser_o()
68 {
69 if [[ ! -f src/parser.o ||
70 ! -z `find src/parsers/*.{h,rl} -newer src/parser.o` ]]
71 then
72 bash -c "cd src/parsers/ && bash ./compile" || exit 1
73 echo "Building src/parser.c (will take a while)"
74 bash -c "$cc -c src/parser.c -o src/parser.o" || exit 1
75 fi
76 }
77
78 build_shared()
79 {
80 build_hash_headers
81 build_parser_o
82 if [[ ! -f src/$SHARED_NAME ||
83 ! -z `find src/*.{h,c} -newer src/$SHARED_NAME` ]]
84 then
85 echo "Building shared library"
86 sh -c "$cc $SHARED $files -o src/$SHARED_NAME" || exit 1
87 fi
88 }
89
90 build_ohcount()
91 {
92 build_hash_headers
93 build_parser_o
94 echo "Building Ohcount"
95 mkdir -p bin/
96 sh -c "$cc src/ohcount.c $files -o bin/ohcount -lpcre" || exit 1
97 }
98
99 build_test_suite()
100 {
101 build_hash_headers
102 build_parser_o
103 echo "Building test suite"
104 sh -c "$cc test/unit/all_tests.c $files -o test/unit/run_tests -lpcre" \
105 || exit 1
106 }
107
108 run_test_suite()
109 {
110 echo "Running test suite"
111 sh -c "cd test/unit/ && ./run_tests"
112 }
113
114 build_ruby_bindings()
115 {
116 arch=`ruby -rmkmf -e 'print Config::expand(CONFIG["arch"])'`
117 echo "Generating Ruby bindings for $arch"
118 sh -c "swig -ruby -o ruby/ohcount_wrap.c ruby/ohcount.i" || exit 1
119 mkdir -p ruby/$arch
120 sh -c "$cc $RB_SHARED ruby/ohcount_wrap.c $files -o ruby/$arch/$RB_SHARED_NAME \
121 -I`ruby -rmkmf -e 'print Config::expand(CONFIG["archdir"])'` \
122 -lpcre" || exit 1
123 sh -c "cd test/unit/ruby && ruby ruby_test.rb" || exit 1
124 }
125
126 if [ $# -eq 0 ] || [ $1 == "all" ]
127 then
128 build_ohcount
129 build_test_suite
130 run_test_suite
131 echo $success
132 elif [ $1 == "shared" ]
133 then
134 build_shared
135 echo "Build successful; $SHARED_NAME is in src/"
136 elif [ $1 == "ohcount" ]
137 then
138 build_ohcount
139 echo "Build successful; ohcount is in bin/"
140 elif [ $1 == "tests" ]
141 then
142 build_test_suite
143 run_test_suite
144 elif [ $1 == "ruby" ]
145 then
146 build_ruby_bindings
147 echo "Build successful; $RB_SHARED_NAME is in ruby/$arch"
148 elif [ $1 == "clean" ]
149 then
150 rm -f bin/ohcount
151 rm -f test/unit/run_tests
152 rm -f src/parser.o
153 rm -f src/parsers/*.h
154 rm -f src/hash/*.h
155 rm -f src/hash/*.c
156 rm -f src/$SHARED_NAME
157 rm -f ruby/$RB_SHARED_NAME
158 rm -rf ruby/`ruby -rmkmf -e 'print Config::expand(CONFIG["arch"])'`/*
159 else
160 echo "Usage: build [all|ohcount|shared|tests|ruby|clean]"
161 fi
0 // detector.c written by Mitchell Foral. mitchell<att>caladbolg.net.
1 // See COPYING for license information.
2
3 #include <ctype.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8
9 #include "detector.h"
10 #include "languages.h"
11 #include "log.h"
12
13 #include "hash/cppheader_hash.h"
14 #include "hash/disambiguatefunc_hash.h"
15 #include "hash/extension_hash.h"
16 #include "hash/filename_hash.h"
17
18 #define ISBINARY(x) (x[0] == '\1')
19 #define ISAMBIGUOUS(x) (x[0] == '\2')
20 #define DISAMBIGUATEWHAT(x) &x[1]
21
22 const char *ohcount_detect_language(SourceFile *sourcefile) {
23 const char *language = NULL;
24 char *p, *pe;
25 int length;
26
27 // Attempt to detect based on file extension.
28 length = strlen(sourcefile->ext);
29 struct ExtensionMap *re = ohcount_hash_language_from_ext(sourcefile->ext,
30 length);
31 if (re) language = re->value;
32 if (language == NULL) {
33 // Try the lower-case version of this extension.
34 char lowerext[length + 1];
35 strncpy(lowerext, sourcefile->ext, length);
36 lowerext[length] = '\0';
37 for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
38 struct ExtensionMap *re = ohcount_hash_language_from_ext(lowerext, length);
39 if (re) return re->value;
40 }
41 if (language) {
42 if (ISAMBIGUOUS(language)) {
43 // Call the appropriate function for disambiguation.
44 length = strlen(DISAMBIGUATEWHAT(language));
45 struct DisambiguateFuncsMap *rd =
46 ohcount_hash_disambiguate_func_from_id(DISAMBIGUATEWHAT(language),
47 length);
48 if (rd) return rd->value(sourcefile);
49 } else return ISBINARY(language) ? NULL : language;
50 }
51
52 // Attempt to detect based on filename.
53 length = strlen(sourcefile->filename);
54 struct FilenameMap *rf =
55 ohcount_hash_language_from_filename(sourcefile->filename, length);
56 if (rf) return rf->value;
57
58 char line[81] = { '\0' }, buf[81];
59
60 // Attempt to detect using Emacs mode line (/^-\*-\s*mode[\s:]*\w/i).
61 p = ohcount_sourcefile_get_contents(sourcefile);
62 pe = p;
63 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
64 while (pe < eof) {
65 // Get the contents of the first line.
66 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
67 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
68 strncpy(line, p, length);
69 line[length] = '\0';
70 if (*line == '#' && *(line + 1) == '!') {
71 // First line was sh-bang; loop to get contents of second line.
72 while (*pe == '\r' || *pe == '\n') pe++;
73 p = pe;
74 } else break;
75 }
76 char *eol = line + strlen(line);
77 for (p = line; p < eol; p++) *p = tolower(*p);
78 p = strstr(line, "-*-");
79 if (p) {
80 p += 3;
81 while (*p == ' ' || *p == '\t') p++;
82 if (strncmp(p, "mode", 4) == 0) {
83 p += 4;
84 while (*p == ' ' || *p == '\t' || *p == ':') p++;
85 }
86 pe = p;
87 while (isalnum(*pe)) pe++;
88 length = pe - p;
89 strncpy(buf, p, length);
90 buf[length] = '\0';
91 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
92 if (rl) return rl->name;
93 }
94
95 // Attempt to detect based on Unix 'file' command.
96 int tmpfile = 0;
97 char *path = sourcefile->filepath;
98 if (sourcefile->diskpath)
99 path = sourcefile->diskpath;
100 if (access(path, F_OK) != 0) { // create temporary file
101 path = malloc(21);
102 strncpy(path, "/tmp/ohcount_XXXXXXX", 20);
103 *(path + 21) = '\0';
104 int fd = mkstemp(path);
105 char *contents = ohcount_sourcefile_get_contents(sourcefile);
106 log_it("contents:");
107 log_it(contents);
108 length = contents ? strlen(contents) : 0;
109 write(fd, contents, length);
110 close(fd);
111 tmpfile = 1;
112 }
113 char command[strlen(path) + 11];
114 sprintf(command, "file -b '%s'", path);
115 FILE *f = popen(command, "r");
116 if (f) {
117 fgets(line, sizeof(line), f);
118 char *eol = line + strlen(line);
119 for (p = line; p < eol; p++) *p = tolower(*p);
120 p = strstr(line, "script text");
121 if (p && p == line) { // /^script text(?: executable)? for \w/
122 p = strstr(line, "for ");
123 if (p) {
124 p += 4;
125 pe = p;
126 while (isalnum(*pe)) pe++;
127 length = pe - p;
128 strncpy(buf, p, length);
129 buf[length] = '\0';
130 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
131 if (rl) language = rl->name;
132 }
133 } else if (p) { // /(\w+)(?: -\w+)* script text/
134 do {
135 p--;
136 pe = p;
137 while (*p == ' ') p--;
138 while (p != line && isalnum(*(p - 1))) p--;
139 if (p != line && *(p - 1) == '-') p--;
140 } while (*p == '-'); // Skip over any switches.
141 length = pe - p;
142 strncpy(buf, p, length);
143 buf[length] = '\0';
144 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
145 if (rl) language = rl->name;
146 } else if (strstr(line, "xml")) language = LANG_XML;
147 pclose(f);
148 if (tmpfile) {
149 remove(path);
150 free(path);
151 }
152 if (language) return language;
153 }
154
155 return NULL;
156 }
157
158 const char *disambiguate_aspx(SourceFile *sourcefile) {
159 char *p = ohcount_sourcefile_get_contents(sourcefile);
160 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
161 for (; p < eof; p++) {
162 // /<%@\s*Page[^>]+Language="VB"[^>]+%>/
163 p = strstr(p, "<%@");
164 if (!p)
165 break;
166 char *pe = strstr(p, "%>");
167 if (p && pe) {
168 p += 3;
169 const int length = pe - p;
170 char buf[length];
171 strncpy(buf, p, length);
172 buf[length] = '\0';
173 char *eol = buf + strlen(buf);
174 for (p = buf; p < eol; p++) *p = tolower(*p);
175 p = buf;
176 while (*p == ' ' || *p == '\t') p++;
177 if (strncmp(p, "page", 4) == 0) {
178 p += 4;
179 if (strstr(p, "language=\"vb\""))
180 return LANG_VB_ASPX;
181 }
182 }
183 }
184 return LANG_CS_ASPX;
185 }
186
187 const char *disambiguate_b(SourceFile *sourcefile) {
188 char *p = ohcount_sourcefile_get_contents(sourcefile);
189 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
190 while (p < eof) {
191 // /(implement[ \t])|(include[ \t]+"[^"]*";)|
192 // ((return|break|continue).*;|(pick|case).*\{)/
193 if (strncmp(p, "implement", 9) == 0 &&
194 (*(p + 9) == ' ' || *(p + 9) == '\t'))
195 return LANG_LIMBO;
196 else if (strncmp(p, "include", 7) == 0 &&
197 (*(p + 7) == ' ' || *(p + 7) == '\t')) {
198 p += 7;
199 while (*p == ' ' || *p == '\t') p++;
200 if (*p == '"') {
201 while (*p != '"' && p < eof) p++;
202 if (*p == '"' && *(p + 1) == ';')
203 return LANG_LIMBO;
204 }
205 } else if (strncmp(p, "return", 6) == 0 ||
206 strncmp(p, "break", 5) == 0 ||
207 strncmp(p, "continue", 8) == 0) {
208 if (strstr(p, ";"))
209 return LANG_LIMBO;
210 } else if (strncmp(p, "pick", 4) == 0 ||
211 strncmp(p, "case", 4) == 0) {
212 if (strstr(p, "{"))
213 return LANG_LIMBO;
214 }
215 p++;
216 }
217 return disambiguate_basic(sourcefile);
218 }
219
220 const char *disambiguate_basic(SourceFile *sourcefile) {
221 char *p, *pe;
222 int length;
223
224 // Attempt to detect based on file contents.
225 char line[81];
226 p = ohcount_sourcefile_get_contents(sourcefile);
227 pe = p;
228 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
229 while (pe < eof) {
230 // Get a line at a time.
231 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
232 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
233 strncpy(line, p, length);
234 line[length] = '\0';
235 char *line_end = pe;
236
237 p = line;
238 if (isdigit(*p)) {
239 // /^\d+\s+\w/
240 p++;
241 while (isdigit(*p)) p++;
242 if (*p == ' ' || *p == '\t') {
243 p++;
244 while (*p == ' ' || *p == '\t') p++;
245 if (isalnum(*p))
246 return LANG_CLASSIC_BASIC;
247 }
248 }
249
250 // Next line.
251 pe = line_end;
252 while (*pe == '\r' || *pe == '\n') pe++;
253 p = pe;
254 }
255
256 // Attempt to detect from associated VB files in file context.
257 char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
258 if (filenames) {
259 int i;
260 for (i = 0; filenames[i] != NULL; i++) {
261 pe = filenames[i] + strlen(filenames[i]);
262 p = pe;
263 while (p > filenames[i] && *(p - 1) != '.') p--;
264 length = pe - p;
265 if (length == 3 &&
266 (strncmp(p, "frm", length) == 0 ||
267 strncmp(p, "frx", length) == 0 ||
268 strncmp(p, "vba", length) == 0 ||
269 strncmp(p, "vbp", length) == 0 ||
270 strncmp(p, "vbs", length) == 0)) {
271 return LANG_VISUALBASIC;
272 }
273 }
274 }
275
276 return LANG_STRUCTURED_BASIC;
277 }
278
279 const char *disambiguate_cs(SourceFile *sourcefile) {
280 // Attempt to detect based on file contents.
281 char *contents = ohcount_sourcefile_get_contents(sourcefile);
282 if (contents && strstr(contents, "<?cs"))
283 return LANG_CLEARSILVER_TEMPLATE;
284 else
285 return LANG_CSHARP;
286 }
287
288 const char *disambiguate_fortran(SourceFile *sourcefile) {
289 char *p, *pe;
290
291 p = ohcount_sourcefile_get_contents(sourcefile);
292 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
293 while (p < eof) {
294 if (*p == ' ' && p + 5 < eof) {
295 int i;
296 for (i = 1; i <= 5; i++)
297 if (!isdigit(*(p + i)) && *(p + i) != ' ')
298 return LANG_FORTRANFIXED; // definately not f77
299 // Possibly fixed (doesn't match /^\s*\d+\s*$/).
300 pe = p;
301 while (*pe == ' ' || *pe == '\t') pe++;
302 if (pe - p <= 5) {
303 if (!isdigit(*pe))
304 return LANG_FORTRANFIXED;
305 while (isdigit(*pe)) pe++;
306 while (*pe == ' ' || *pe == '\t') pe++;
307 if (*pe != '\r' && *pe != '\n' && pe - p == 5)
308 return LANG_FORTRANFIXED;
309 }
310 }
311 while (*p != '\r' && *p != '\n' && *p != '&' && p < eof) p++;
312 if (*p == '&') {
313 p++;
314 // Look for free-form continuation.
315 while (*p == ' ' || *p == '\t') p++;
316 if (*p == '\r' || *p == '\n') {
317 pe = p;
318 while (*pe == '\r' || *pe == '\n' || *pe == ' ' || *pe == '\t') pe++;
319 if (*pe == '&')
320 return LANG_FORTRANFREE;
321 }
322 }
323 while (*p == '\r' || *p == '\n') p++;
324 }
325 return LANG_FORTRANFREE; // might as well be free-form
326 }
327
328 const char *disambiguate_h(SourceFile *sourcefile) {
329 char *p, *pe;
330 int length;
331
332 // If the directory contains a matching *.m file, likely Objective C.
333 length = strlen(sourcefile->filename);
334 if (strcmp(sourcefile->ext, "h") == 0) {
335 char path[length];
336 strncpy(path, sourcefile->filename, length);
337 path[length] = '\0';
338 *(path + length - 1) = 'm';
339 char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
340 if (filenames) {
341 int i;
342 for (i = 0; filenames[i] != NULL; i++)
343 if (strcmp(path, filenames[i]) == 0)
344 return LANG_OBJECTIVE_C;
345 }
346 }
347
348 // Attempt to detect based on file contents.
349 char line[81], buf[81];
350 p = ohcount_sourcefile_get_contents(sourcefile);
351 pe = p;
352 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
353 while (pe < eof) {
354 // Get a line at a time.
355 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
356 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
357 strncpy(line, p, length);
358 line[length] = '\0';
359 char *eol = line + strlen(line);
360 char *line_end = pe;
361
362 // Look for C++ headers.
363 if (*line == '#') {
364 p = line + 1;
365 while (*p == ' ' || *p == '\t') p++;
366 if (strncmp(p, "include", 7) == 0 &&
367 (*(p + 7) == ' ' || *(p + 7) == '\t')) {
368 // /^#\s*include\s+[<"][^>"]+[>"]/
369 p += 8;
370 while (*p == ' ' || *p == '\t') p++;
371 if (*p == '<' || *p == '"') {
372 // Is the header file a C++ header file?
373 p++;
374 pe = p;
375 while (pe < eol && *pe != '>' && *pe != '"') pe++;
376 length = pe - p;
377 strncpy(buf, p, length);
378 buf[length] = '\0';
379 if (ohcount_hash_is_cppheader(buf, length))
380 return LANG_CPP;
381 // Is the extension for the header file a C++ file?
382 p = pe;
383 while (p > line && *(p - 1) != '.') p--;
384 length = pe - p;
385 strncpy(buf, p, length);
386 buf[length] = '\0';
387 struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
388 if (re && strcmp(re->value, LANG_CPP) == 0)
389 return LANG_CPP;
390 }
391 }
392 }
393
394 // Look for C++ keywords.
395 p = line;
396 while (p < eol) {
397 if (islower(*p) && p != line && !isalnum(*(p - 1)) && *(p - 1) != '_') {
398 pe = p;
399 while (islower(*pe)) pe++;
400 if (!isalnum(*pe) && *pe != '_') {
401 length = pe - p;
402 strncpy(buf, p, length);
403 buf[length] = '\0';
404 if (strcmp(buf, "class") == 0 ||
405 strcmp(buf, "namespace") == 0 ||
406 strcmp(buf, "template") == 0 ||
407 strcmp(buf, "typename") == 0)
408 return LANG_CPP;
409 }
410 p = pe + 1;
411 } else p++;
412 }
413
414 // Next line.
415 pe = line_end;
416 while (*pe == '\r' || *pe == '\n') pe++;
417 p = pe;
418 }
419
420 // Nothing to suggest C++.
421 return LANG_C;
422 }
423
424 const char *disambiguate_in(SourceFile *sourcefile) {
425 char *p, *pe;
426 int length;
427 const char *language = NULL;
428
429 p = sourcefile->filepath;
430 pe = p + strlen(p) - 3;
431 if (strstr(p, ".") <= pe) {
432 // Only if the filename has an extension prior to the .in
433 length = pe - p;
434 char buf[length];
435 strncpy(buf, p, length);
436 buf[length] = '\0';
437 SourceFile *undecorated = ohcount_sourcefile_new(buf);
438 p = ohcount_sourcefile_get_contents(sourcefile);
439 if (!p) {
440 return NULL;
441 }
442 // The filepath without the '.in' extension does not exist on disk. The
443 // sourcefile->diskpath field must be set incase the detector needs to run
444 // 'file -b' on the file.
445 ohcount_sourcefile_set_diskpath(undecorated, sourcefile->filepath);
446 ohcount_sourcefile_set_contents(undecorated, p);
447 char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
448 ohcount_sourcefile_set_filenames(undecorated, filenames);
449 language = ohcount_sourcefile_get_language(undecorated);
450 ohcount_sourcefile_free(undecorated);
451 }
452 return language;
453 }
454
455 const char *disambiguate_inc(SourceFile *sourcefile) {
456 char *p = ohcount_sourcefile_get_contents(sourcefile);
457 char *eof = p + strlen(p);
458 while (p < eof) {
459 if (*p == '\0')
460 return BINARY;
461 else if (*p == '?' && strncmp(p + 1, "php", 3) == 0)
462 return LANG_PHP;
463 p++;
464 }
465 return NULL;
466 }
467
468 const char *disambiguate_m(SourceFile *sourcefile) {
469 char *p, *pe;
470 int length;
471
472 // Attempt to detect based on a weighted heuristic of file contents.
473 int matlab_score = 0;
474 int objective_c_score = 0;
475 int limbo_score = 0;
476 int octave_syntax_detected = 0;
477
478 int i, has_h_headers = 0, has_c_files = 0;
479 char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
480 if (filenames) {
481 for (i = 0; filenames[i] != NULL; i++) {
482 p = filenames[i];
483 pe = p + strlen(p);
484 if (pe - p >= 4) {
485 if (*(pe - 4) == '.' && *(pe - 3) == 'c' &&
486 ((*(pe - 2) == 'p' && *(pe - 1) == 'p') ||
487 (*(pe - 2) == '+' && *(pe - 1) == '+') ||
488 (*(pe - 2) == 'x' && *(pe - 1) == 'x'))) {
489 has_c_files = 1;
490 break; // short circuit
491 }
492 } else if (pe - p >= 3) {
493 if (*(pe - 3) == '.' && *(pe - 2) == 'c' && *(pe - 1) == 'c') {
494 has_c_files = 1;
495 break; // short circuit
496 }
497 } else if (pe - p >= 2) {
498 if (*(pe - 2) == '.') {
499 if (*(pe - 1) == 'h')
500 has_h_headers = 1;
501 else if (*(pe - 1) == 'c' || *(pe - 1) == 'C') {
502 has_c_files = 1;
503 break; // short circuit
504 }
505 }
506 }
507 }
508 }
509 if (has_h_headers && !has_c_files)
510 objective_c_score += 5;
511
512 char line[81], buf[81];
513 p = ohcount_sourcefile_get_contents(sourcefile);
514 pe = p;
515 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
516 while (pe < eof) {
517 // Get a line at a time.
518 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
519 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
520 strncpy(line, p, length);
521 line[length] = '\0';
522 char *eol = line + strlen(line);
523 char *line_end = pe;
524
525 // Look for tell-tale lines.
526 p = line;
527 while (*p == ' ' || *p == '\t') p++;
528 if (*p == '%') { // Matlab comment
529 matlab_score++;
530 } else if (*p == '#' && strncmp(p, "#import", 7) == 0) { // Objective C
531 objective_c_score++;
532 } else if (*p == '#') { // Limbo or Octave comment
533 while (*p == '#') p++;
534 if (*p == ' ' || *p == '\t') {
535 limbo_score++;
536 matlab_score++;
537 octave_syntax_detected = 1;
538 }
539 } else if (*p == '/' && *(p + 1) == '/' || *(p + 1) == '*') {
540 objective_c_score++; // Objective C comment
541 } else if (*p == '+' || *p == '-') { // Objective C method signature
542 objective_c_score++;
543 } else if (*p == '@' || *p == '#') { // Objective C method signature
544 if (strncmp(p, "@implementation", 15) == 0 ||
545 strncmp(p, "@interface", 10) == 0)
546 objective_c_score++;
547 } else if (strncmp(p, "function", 8) == 0) { // Matlab or Octave function
548 p += 8;
549 while (*p == ' ' || *p == '\t') p++;
550 if (*p == '(')
551 matlab_score++;
552 } else if (strncmp(p, "include", 7) == 0) { // Limbo include
553 // /^include[ \t]+"[^"]+\.m";/
554 p += 7;
555 if (*p == ' ' || *p == '\t') {
556 while (*p == ' ' || *p == '\t') p++;
557 if (*p == '"') {
558 while (*p != '"' && p < eol) p++;
559 if (*p == '"' && *(p - 2) == '.' && *(p - 1) == 'm')
560 limbo_score++;
561 }
562 }
563 }
564
565 // Look for Octave keywords.
566 p = line;
567 while (p < eol) {
568 if (islower(*p) && p != line && !isalnum(*(p - 1))) {
569 pe = p;
570 while (islower(*pe) || *pe == '_') pe++;
571 if (!isalnum(*pe)) {
572 length = pe - p;
573 strncpy(buf, p, length);
574 buf[length] = '\0';
575 if (strcmp(buf, "end_try_catch") == 0 ||
576 strcmp(buf, "end_unwind_protect") == 0 ||
577 strcmp(buf, "endfunction") == 0 ||
578 strcmp(buf, "endwhile") == 0)
579 octave_syntax_detected = 1;
580 }
581 p = pe + 1;
582 } else p++;
583 }
584
585 // Look for Limbo declarations
586 p = line;
587 while (p < eol) {
588 if (*p == ':' && (*(p + 1) == ' ' || *(p + 1) == '\t')) {
589 // /:[ \t]+(module|adt|fn ?\(|con[ \t])/
590 p += 2;
591 if (strncmp(p, "module", 6) == 0 && !isalnum(*(p + 6)) ||
592 strncmp(p, "adt", 3) == 0 && !isalnum(*(p + 3)) ||
593 strncmp(p, "fn", 2) == 0 &&
594 (*(p + 2) == ' ' && *(p + 3) == '(' || *(p + 2) == '(') ||
595 strncmp(p, "con", 3) == 0 &&
596 (*(p + 3) == ' ' || *(p + 3) == '\t'))
597 limbo_score++;
598 } else p++;
599 }
600
601 // Next line.
602 pe = line_end;
603 while (*pe == '\r' || *pe == '\n') pe++;
604 p = pe;
605 }
606
607 if (limbo_score > objective_c_score && limbo_score > matlab_score)
608 return LANG_LIMBO;
609 else if (objective_c_score > matlab_score)
610 return LANG_OBJECTIVE_C;
611 else
612 return octave_syntax_detected ? LANG_OCTAVE : LANG_MATLAB;
613 }
614
615 #define QMAKE_SOURCES_SPACE "SOURCES +="
616 #define QMAKE_SOURCES "SOURCES+="
617 #define QMAKE_CONFIG_SPACE "CONFIG +="
618 #define QMAKE_CONFIG "CONFIG+="
619
620 const char *disambiguate_pro(SourceFile *sourcefile) {
621 char *p = ohcount_sourcefile_get_contents(sourcefile);
622 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
623 for (; p < eof; p++) {
624 if (strncmp(p, QMAKE_SOURCES_SPACE, strlen(QMAKE_SOURCES_SPACE)) == 0 ||
625 strncmp(p, QMAKE_SOURCES, strlen(QMAKE_SOURCES)) == 0 ||
626 strncmp(p, QMAKE_CONFIG_SPACE, strlen(QMAKE_CONFIG_SPACE)) == 0 ||
627 strncmp(p, QMAKE_CONFIG, strlen(QMAKE_CONFIG)) == 0)
628 return LANG_MAKE; // really QMAKE
629 }
630 return LANG_IDL_PVWAVE;
631 }
632
633 const char *disambiguate_st(SourceFile *sourcefile) {
634 char *p, *pe;
635 int length;
636
637 // Attempt to detect based on file contents.
638 int found_assignment = 0, found_block_start = 0, found_block_end = 0;
639
640 char line[81];
641 p = ohcount_sourcefile_get_contents(sourcefile);
642 pe = p;
643 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
644 while (pe < eof) {
645 // Get a line at a time.
646 while (p < eof && *pe != '\r' && *pe != '\n') pe++;
647 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
648 strncpy(line, p, length);
649 line[length] = '\0';
650 char *eol = line + strlen(line);
651 char *line_end = pe;
652
653 for (p = line; p < eol; p++) {
654 if (*p == ':') {
655 p++;
656 while (p < eol && (*p == ' ' || *p == '\t')) p++;
657 if (*p == '=')
658 found_assignment = 1;
659 else if (*p == '[')
660 found_block_start = 1;
661 } else if (*p == ']' && *(p + 1) == '.') found_block_end = 1;
662 if (found_assignment && found_block_start && found_block_end)
663 return LANG_SMALLTALK;
664 }
665
666 // Next line.
667 pe = line_end;
668 while (*pe == '\r' || *pe == '\n') pe++;
669 p = pe;
670 }
671
672 return NULL;
673 }
674
675 int ohcount_is_binary_filename(const char *filename) {
676 char *p = (char *)filename + strlen(filename);
677 while (p > filename && *(p - 1) != '.') p--;
678 if (p > filename) {
679 struct ExtensionMap *re;
680 int length = strlen(p);
681 re = ohcount_hash_language_from_ext(p, length);
682 if (re) return ISBINARY(re->value);
683 // Try the lower-case version of this extension.
684 char lowerext[length];
685 strncpy(lowerext, p, length);
686 lowerext[length] = '\0';
687 for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
688 re = ohcount_hash_language_from_ext(lowerext, length);
689 if (re) return ISBINARY(re->value);
690 }
691 return 0;
692 }
0 // detector_test.h written by Mitchell Foral. mitchell<att>caladbolg.net.
1 // See COPYING for license information.
2
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <string.h>
6
7 #include "../../src/detector.h"
8 #include "../../src/languages.h"
9 #include "../../src/sourcefile.h"
10
11 #define ASSERT_DETECT(x, y) { \
12 SourceFile *sf = ohcount_sourcefile_new("../detect_files/" y); \
13 const char *lang = ohcount_detect_language(sf); \
14 assert(lang); \
15 assert(strcmp(x, lang) == 0); \
16 ohcount_sourcefile_free(sf); \
17 }
18 #define ASSERT_NODETECT(x) { \
19 SourceFile *sf = ohcount_sourcefile_new("../detect_files/" x); \
20 assert(ohcount_detect_language(sf) == NULL); \
21 ohcount_sourcefile_free(sf); \
22 }
23
24 void test_detector_smalltalk() {
25 ASSERT_DETECT(LANG_SMALLTALK, "example.st");
26 ASSERT_NODETECT("english.st");
27 }
28
29 void test_detector_disambiguate_m() {
30 ASSERT_DETECT(LANG_OBJECTIVE_C, "t1.m");
31 ASSERT_DETECT(LANG_OBJECTIVE_C, "t2.m");
32 ASSERT_DETECT(LANG_OBJECTIVE_C, "TCPSocket.m");
33 ASSERT_DETECT(LANG_OBJECTIVE_C, "foo_objective_c.m");
34 ASSERT_DETECT(LANG_MATLAB, "foo_matlab.m");
35 ASSERT_DETECT(LANG_OCTAVE, "foo_octave.m");
36 }
37
38 void test_detector_disambiguate_in() {
39 ASSERT_NODETECT("empty.in");
40 }
41 void test_detector_disambiguate_pro() {
42 ASSERT_DETECT(LANG_IDL_PVWAVE, "foo.pro");
43 ASSERT_DETECT(LANG_MAKE, "qmake.pro");
44 }
45
46 void test_detector_fortran_fixedfree() {
47 ASSERT_DETECT(LANG_FORTRANFIXED, "fortranfixed.f");
48 ASSERT_DETECT(LANG_FORTRANFREE, "fortranfree.f");
49 }
50
51 void test_detector_detect_polyglot() {
52 ASSERT_DETECT(LANG_C, "foo.c");
53 ASSERT_DETECT(LANG_C, "uses_no_cpp.h");
54 ASSERT_DETECT(LANG_CPP, "uses_cpp_headers.h");
55 ASSERT_DETECT(LANG_CPP, "uses_cpp_stdlib_headers.h");
56 ASSERT_DETECT(LANG_CPP, "uses_cpp_keywords.h");
57 ASSERT_DETECT(LANG_RUBY, "foo.rb");
58 ASSERT_DETECT(LANG_MAKE, "foo.mk");
59 ASSERT_DETECT(LANG_OBJECTIVE_C, "foo_objective_c.h");
60 ASSERT_DETECT(LANG_PHP, "upper_case_php");
61 ASSERT_DETECT(LANG_SMALLTALK, "example.st");
62 ASSERT_DETECT(LANG_VALA, "foo.vala");
63 ASSERT_DETECT(LANG_TEX, "foo.tex");
64 ASSERT_DETECT(LANG_XSLT, "example.xsl");
65 ASSERT_DETECT(LANG_LISP, "core.lisp");
66 ASSERT_DETECT(LANG_DMD, "foo.d");
67 ASSERT_DETECT(LANG_VIM, "foo.vim");
68 ASSERT_DETECT(LANG_EBUILD, "foo.ebuild");
69 ASSERT_DETECT(LANG_EBUILD, "foo.eclass");
70 ASSERT_DETECT(LANG_EXHERES, "foo.exheres-0");
71 ASSERT_DETECT(LANG_EXHERES, "foo.exlib");
72 ASSERT_DETECT(LANG_EIFFEL, "eiffel.e");
73 ASSERT_DETECT(LANG_OCAML, "ocaml.ml");
74 ASSERT_DETECT(LANG_STRATEGO, "stratego.str");
75 ASSERT_DETECT(LANG_R, "foo.R");
76 ASSERT_DETECT(LANG_GLSL, "foo.glsl");
77 ASSERT_DETECT(LANG_GLSL, "foo_glsl.vert");
78 ASSERT_DETECT(LANG_GLSL, "foo_glsl.frag");
79 ASSERT_DETECT(LANG_IDL_PVWAVE, "foo.pro");
80 ASSERT_DETECT(LANG_ASSEMBLER, "foo.z80");
81 ASSERT_DETECT(LANG_PHP, "php.inc");
82 ASSERT_DETECT(LANG_FSHARP, "fs1.fs");
83 }
84
85 void test_detector_upper_case_extensions() {
86 ASSERT_DETECT(LANG_CPP, "foo_upper_case.C");
87 ASSERT_DETECT(LANG_RUBY, "foo_upper_case.RB");
88 }
89
90 void test_detector_no_extensions() {
91 ASSERT_DETECT(LANG_PYTHON, "py_script");
92 ASSERT_DETECT(LANG_RUBY, "ruby_script");
93 ASSERT_DETECT(LANG_SHELL, "bourne_again_script");
94 ASSERT_DETECT(LANG_SHELL, "bash_script");
95 ASSERT_DETECT(LANG_PERL, "perl_w");
96 ASSERT_DETECT(LANG_DMD, "d_script");
97 ASSERT_DETECT(LANG_TCL, "tcl_script");
98 ASSERT_DETECT(LANG_PYTHON, "python.data");
99 ASSERT_DETECT(LANG_PYTHON, "python2.data");
100 }
101
102 void test_detector_csharp_or_clearsilver() {
103 ASSERT_DETECT(LANG_CSHARP, "cs1.cs");
104 ASSERT_DETECT(LANG_CLEARSILVER_TEMPLATE, "clearsilver_template1.cs");
105 }
106
107 void test_detector_basic() {
108 ASSERT_DETECT(LANG_VISUALBASIC, "visual_basic.bas");
109 ASSERT_DETECT(LANG_CLASSIC_BASIC, "classic_basic.b");
110 system("mv ../detect_files/frx1.frx ../detect_files/frx1.frx2");
111 ASSERT_DETECT(LANG_STRUCTURED_BASIC, "visual_basic.bas");
112 ASSERT_DETECT(LANG_STRUCTURED_BASIC, "structured_basic.b");
113 system("mv ../detect_files/frx1.frx2 ../detect_files/frx1.frx");
114 }
115
116 void test_detector_xml_with_custom_extension() {
117 ASSERT_DETECT(LANG_XML, "xml.custom_ext");
118 }
119
120 void all_detector_tests() {
121 test_detector_smalltalk();
122 test_detector_disambiguate_m();
123 test_detector_disambiguate_in();
124 test_detector_disambiguate_pro();
125 test_detector_fortran_fixedfree();
126 test_detector_detect_polyglot();
127 test_detector_upper_case_extensions();
128 test_detector_no_extensions();
129 test_detector_csharp_or_clearsilver();
130 test_detector_basic();
131 test_detector_xml_with_custom_extension();
132 }
0 // detector.c written by Mitchell Foral. mitchell<att>caladbolg.net.
1 // See COPYING for license information.
2
3 #include <ctype.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8
9 #include "detector.h"
10 #include "languages.h"
11 #include "log.h"
12
13 #include "hash/cppheader_hash.h"
14 #include "hash/disambiguatefunc_hash.h"
15 #include "hash/extension_hash.h"
16 #include "hash/filename_hash.h"
17
18 #define ISBINARY(x) (x[0] == '\1')
19 #define ISAMBIGUOUS(x) (x[0] == '\2')
20 #define DISAMBIGUATEWHAT(x) &x[1]
21
22 const char *ohcount_detect_language(SourceFile *sourcefile) {
23 const char *language = NULL;
24 char *p, *pe;
25 int length;
26
27 // Attempt to detect based on file extension.
28 length = strlen(sourcefile->ext);
29 struct ExtensionMap *re = ohcount_hash_language_from_ext(sourcefile->ext,
30 length);
31 if (re) language = re->value;
32 if (language == NULL) {
33 // Try the lower-case version of this extension.
34 char lowerext[length + 1];
35 strncpy(lowerext, sourcefile->ext, length);
36 lowerext[length] = '\0';
37 for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
38 struct ExtensionMap *re = ohcount_hash_language_from_ext(lowerext, length);
39 if (re) return re->value;
40 }
41 if (language) {
42 if (ISAMBIGUOUS(language)) {
43 // Call the appropriate function for disambiguation.
44 length = strlen(DISAMBIGUATEWHAT(language));
45 struct DisambiguateFuncsMap *rd =
46 ohcount_hash_disambiguate_func_from_id(DISAMBIGUATEWHAT(language),
47 length);
48 if (rd) return rd->value(sourcefile);
49 } else return ISBINARY(language) ? NULL : language;
50 }
51
52 // Attempt to detect based on filename.
53 length = strlen(sourcefile->filename);
54 struct FilenameMap *rf =
55 ohcount_hash_language_from_filename(sourcefile->filename, length);
56 if (rf) return rf->value;
57
58 char line[81] = { '\0' }, buf[81];
59
60 // Attempt to detect using Emacs mode line (/^-\*-\s*mode[\s:]*\w/i).
61 p = ohcount_sourcefile_get_contents(sourcefile);
62 pe = p;
63 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
64 while (pe < eof) {
65 // Get the contents of the first line.
66 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
67 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
68 strncpy(line, p, length);
69 line[length] = '\0';
70 if (*line == '#' && *(line + 1) == '!') {
71 // First line was sh-bang; loop to get contents of second line.
72 while (*pe == '\r' || *pe == '\n') pe++;
73 p = pe;
74 } else break;
75 }
76 char *eol = line + strlen(line);
77 for (p = line; p < eol; p++) *p = tolower(*p);
78 p = strstr(line, "-*-");
79 if (p) {
80 p += 3;
81 while (*p == ' ' || *p == '\t') p++;
82 if (strncmp(p, "mode", 4) == 0) {
83 p += 4;
84 while (*p == ' ' || *p == '\t' || *p == ':') p++;
85 }
86 pe = p;
87 while (isalnum(*pe)) pe++;
88 length = pe - p;
89 strncpy(buf, p, length);
90 buf[length] = '\0';
91 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
92 if (rl) return rl->name;
93 }
94
95 // Attempt to detect based on Unix 'file' command.
96 int tmpfile = 0;
97 char *path = sourcefile->filepath;
98 if (sourcefile->diskpath)
99 path = sourcefile->diskpath;
100 if (access(path, F_OK) != 0) { // create temporary file
101 path = malloc(21);
102 strncpy(path, "/tmp/ohcount_XXXXXXX", 20);
103 *(path + 21) = '\0';
104 int fd = mkstemp(path);
105 char *contents = ohcount_sourcefile_get_contents(sourcefile);
106 log_it("contents:");
107 log_it(contents);
108 length = contents ? strlen(contents) : 0;
109 write(fd, contents, length);
110 close(fd);
111 tmpfile = 1;
112 }
113 char command[strlen(path) + 11];
114 sprintf(command, "file -b '%s'", path);
115 FILE *f = popen(command, "r");
116 if (f) {
117 fgets(line, sizeof(line), f);
118 char *eol = line + strlen(line);
119 for (p = line; p < eol; p++) *p = tolower(*p);
120 p = strstr(line, "script text");
121 if (p && p == line) { // /^script text(?: executable)? for \w/
122 p = strstr(line, "for ");
123 if (p) {
124 p += 4;
125 pe = p;
126 while (isalnum(*pe)) pe++;
127 length = pe - p;
128 strncpy(buf, p, length);
129 buf[length] = '\0';
130 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
131 if (rl) language = rl->name;
132 }
133 } else if (p) { // /(\w+)(?: -\w+)* script text/
134 do {
135 p--;
136 pe = p;
137 while (*p == ' ') p--;
138 while (p != line && isalnum(*(p - 1))) p--;
139 if (p != line && *(p - 1) == '-') p--;
140 } while (*p == '-'); // Skip over any switches.
141 length = pe - p;
142 strncpy(buf, p, length);
143 buf[length] = '\0';
144 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
145 if (rl) language = rl->name;
146 } else if (strstr(line, "xml")) language = LANG_XML;
147 pclose(f);
148 if (tmpfile) {
149 remove(path);
150 free(path);
151 }
152 if (language) return language;
153 }
154
155 return NULL;
156 }
157
158 const char *disambiguate_aspx(SourceFile *sourcefile) {
159 char *p = ohcount_sourcefile_get_contents(sourcefile);
160 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
161 for (; p < eof; p++) {
162 // /<%@\s*Page[^>]+Language="VB"[^>]+%>/
163 p = strstr(p, "<%@");
164 if (!p)
165 break;
166 char *pe = strstr(p, "%>");
167 if (p && pe) {
168 p += 3;
169 const int length = pe - p;
170 char buf[length];
171 strncpy(buf, p, length);
172 buf[length] = '\0';
173 char *eol = buf + strlen(buf);
174 for (p = buf; p < eol; p++) *p = tolower(*p);
175 p = buf;
176 while (*p == ' ' || *p == '\t') p++;
177 if (strncmp(p, "page", 4) == 0) {
178 p += 4;
179 if (strstr(p, "language=\"vb\""))
180 return LANG_VB_ASPX;
181 }
182 }
183 }
184 return LANG_CS_ASPX;
185 }
186
187 const char *disambiguate_b(SourceFile *sourcefile) {
188 char *p = ohcount_sourcefile_get_contents(sourcefile);
189 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
190 while (p < eof) {
191 // /(implement[ \t])|(include[ \t]+"[^"]*";)|
192 // ((return|break|continue).*;|(pick|case).*\{)/
193 if (strncmp(p, "implement", 9) == 0 &&
194 (*(p + 9) == ' ' || *(p + 9) == '\t'))
195 return LANG_LIMBO;
196 else if (strncmp(p, "include", 7) == 0 &&
197 (*(p + 7) == ' ' || *(p + 7) == '\t')) {
198 p += 7;
199 while (*p == ' ' || *p == '\t') p++;
200 if (*p == '"') {
201 while (*p != '"' && p < eof) p++;
202 if (*p == '"' && *(p + 1) == ';')
203 return LANG_LIMBO;
204 }
205 } else if (strncmp(p, "return", 6) == 0 ||
206 strncmp(p, "break", 5) == 0 ||
207 strncmp(p, "continue", 8) == 0) {
208 if (strstr(p, ";"))
209 return LANG_LIMBO;
210 } else if (strncmp(p, "pick", 4) == 0 ||
211 strncmp(p, "case", 4) == 0) {
212 if (strstr(p, "{"))
213 return LANG_LIMBO;
214 }
215 p++;
216 }
217 return disambiguate_basic(sourcefile);
218 }
219
220 const char *disambiguate_basic(SourceFile *sourcefile) {
221 char *p, *pe;
222 int length;
223
224 // Attempt to detect based on file contents.
225 char line[81];
226 p = ohcount_sourcefile_get_contents(sourcefile);
227 pe = p;
228 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
229 while (pe < eof) {
230 // Get a line at a time.
231 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
232 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
233 strncpy(line, p, length);
234 line[length] = '\0';
235 char *line_end = pe;
236
237 p = line;
238 if (isdigit(*p)) {
239 // /^\d+\s+\w/
240 p++;
241 while (isdigit(*p)) p++;
242 if (*p == ' ' || *p == '\t') {
243 p++;
244 while (*p == ' ' || *p == '\t') p++;
245 if (isalnum(*p))
246 return LANG_CLASSIC_BASIC;
247 }
248 }
249
250 // Next line.
251 pe = line_end;
252 while (*pe == '\r' || *pe == '\n') pe++;
253 p = pe;
254 }
255
256 // Attempt to detect from associated VB files in file context.
257 char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
258 if (filenames) {
259 int i;
260 for (i = 0; filenames[i] != NULL; i++) {
261 pe = filenames[i] + strlen(filenames[i]);
262 p = pe;
263 while (p > filenames[i] && *(p - 1) != '.') p--;
264 length = pe - p;
265 if (length == 3 &&
266 (strncmp(p, "frm", length) == 0 ||
267 strncmp(p, "frx", length) == 0 ||
268 strncmp(p, "vba", length) == 0 ||
269 strncmp(p, "vbp", length) == 0 ||
270 strncmp(p, "vbs", length) == 0)) {
271 return LANG_VISUALBASIC;
272 }
273 }
274 }
275
276 return LANG_STRUCTURED_BASIC;
277 }
278
279 const char *disambiguate_cs(SourceFile *sourcefile) {
280 // Attempt to detect based on file contents.
281 char *contents = ohcount_sourcefile_get_contents(sourcefile);
282 if (contents && strstr(contents, "<?cs"))
283 return LANG_CLEARSILVER_TEMPLATE;
284 else
285 return LANG_CSHARP;
286 }
287
288 const char *disambiguate_fortran(SourceFile *sourcefile) {
289 char *p, *pe;
290
291 p = ohcount_sourcefile_get_contents(sourcefile);
292 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
293 while (p < eof) {
294 if (*p == ' ' && p + 5 < eof) {
295 int i;
296 for (i = 1; i <= 5; i++)
297 if (!isdigit(*(p + i)) && *(p + i) != ' ')
298 return LANG_FORTRANFIXED; // definately not f77
299 // Possibly fixed (doesn't match /^\s*\d+\s*$/).
300 pe = p;
301 while (*pe == ' ' || *pe == '\t') pe++;
302 if (pe - p <= 5) {
303 if (!isdigit(*pe))
304 return LANG_FORTRANFIXED;
305 while (isdigit(*pe)) pe++;
306 while (*pe == ' ' || *pe == '\t') pe++;
307 if (*pe != '\r' && *pe != '\n' && pe - p == 5)
308 return LANG_FORTRANFIXED;
309 }
310 }
311 while (*p != '\r' && *p != '\n' && *p != '&' && p < eof) p++;
312 if (*p == '&') {
313 p++;
314 // Look for free-form continuation.
315 while (*p == ' ' || *p == '\t') p++;
316 if (*p == '\r' || *p == '\n') {
317 pe = p;
318 while (*pe == '\r' || *pe == '\n' || *pe == ' ' || *pe == '\t') pe++;
319 if (*pe == '&')
320 return LANG_FORTRANFREE;
321 }
322 }
323 while (*p == '\r' || *p == '\n') p++;
324 }
325 return LANG_FORTRANFREE; // might as well be free-form
326 }
327
328 const char *disambiguate_h(SourceFile *sourcefile) {
329 char *p, *pe;
330 int length;
331
332 // If the directory contains a matching *.m file, likely Objective C.
333 length = strlen(sourcefile->filename);
334 if (strcmp(sourcefile->ext, "h") == 0) {
335 char path[length];
336 strncpy(path, sourcefile->filename, length);
337 path[length] = '\0';
338 *(path + length - 1) = 'm';
339 char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
340 if (filenames) {
341 int i;
342 for (i = 0; filenames[i] != NULL; i++)
343 if (strcmp(path, filenames[i]) == 0)
344 return LANG_OBJECTIVE_C;
345 }
346 }
347
348 // Attempt to detect based on file contents.
349 char line[81], buf[81];
350 p = ohcount_sourcefile_get_contents(sourcefile);
351 pe = p;
352 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
353 while (pe < eof) {
354 // Get a line at a time.
355 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
356 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
357 strncpy(line, p, length);
358 line[length] = '\0';
359 char *eol = line + strlen(line);
360 char *line_end = pe;
361
362 // Look for C++ headers.
363 if (*line == '#') {
364 p = line + 1;
365 while (*p == ' ' || *p == '\t') p++;
366 if (strncmp(p, "include", 7) == 0 &&
367 (*(p + 7) == ' ' || *(p + 7) == '\t')) {
368 // /^#\s*include\s+[<"][^>"]+[>"]/
369 p += 8;
370 while (*p == ' ' || *p == '\t') p++;
371 if (*p == '<' || *p == '"') {
372 // Is the header file a C++ header file?
373 p++;
374 pe = p;
375 while (pe < eol && *pe != '>' && *pe != '"') pe++;
376 length = pe - p;
377 strncpy(buf, p, length);
378 buf[length] = '\0';
379 if (ohcount_hash_is_cppheader(buf, length))
380 return LANG_CPP;
381 // Is the extension for the header file a C++ file?
382 p = pe;
383 while (p > line && *(p - 1) != '.') p--;
384 length = pe - p;
385 strncpy(buf, p, length);
386 buf[length] = '\0';
387 struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
388 if (re && strcmp(re->value, LANG_CPP) == 0)
389 return LANG_CPP;
390 }
391 }
392 }
393
394 // Look for C++ keywords.
395 p = line;
396 while (p < eol) {
397 if (islower(*p) && p != line && !isalnum(*(p - 1)) && *(p - 1) != '_') {
398 pe = p;
399 while (islower(*pe)) pe++;
400 if (!isalnum(*pe) && *pe != '_') {
401 length = pe - p;
402 strncpy(buf, p, length);
403 buf[length] = '\0';
404 if (strcmp(buf, "class") == 0 ||
405 strcmp(buf, "namespace") == 0 ||
406 strcmp(buf, "template") == 0 ||
407 strcmp(buf, "typename") == 0)
408 return LANG_CPP;
409 }
410 p = pe + 1;
411 } else p++;
412 }
413
414 // Next line.
415 pe = line_end;
416 while (*pe == '\r' || *pe == '\n') pe++;
417 p = pe;
418 }
419
420 // Nothing to suggest C++.
421 return LANG_C;
422 }
423
424 const char *disambiguate_in(SourceFile *sourcefile) {
425 char *p, *pe;
426 int length;
427 const char *language = NULL;
428
429 p = sourcefile->filepath;
430 pe = p + strlen(p) - 3;
431 if (strstr(p, ".") <= pe) {
432 // Only if the filename has an extension prior to the .in
433 length = pe - p;
434 char buf[length];
435 strncpy(buf, p, length);
436 buf[length] = '\0';
437 SourceFile *undecorated = ohcount_sourcefile_new(buf);
438 p = ohcount_sourcefile_get_contents(sourcefile);
439 // The filepath without the '.in' extension does not exist on disk. The
440 // sourcefile->diskpath field must be set incase the detector needs to run
441 // 'file -b' on the file.
442 ohcount_sourcefile_set_diskpath(undecorated, sourcefile->filepath);
443 ohcount_sourcefile_set_contents(undecorated, p);
444 char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
445 ohcount_sourcefile_set_filenames(undecorated, filenames);
446 language = ohcount_sourcefile_get_language(undecorated);
447 ohcount_sourcefile_free(undecorated);
448 }
449 return language;
450 }
451
452 const char *disambiguate_inc(SourceFile *sourcefile) {
453 char *p = ohcount_sourcefile_get_contents(sourcefile);
454 char *eof = p + strlen(p);
455 while (p < eof) {
456 if (*p == '\0')
457 return BINARY;
458 else if (*p == '?' && strncmp(p + 1, "php", 3) == 0)
459 return LANG_PHP;
460 p++;
461 }
462 return NULL;
463 }
464
465 const char *disambiguate_m(SourceFile *sourcefile) {
466 char *p, *pe;
467 int length;
468
469 // Attempt to detect based on a weighted heuristic of file contents.
470 int matlab_score = 0;
471 int objective_c_score = 0;
472 int limbo_score = 0;
473 int octave_syntax_detected = 0;
474
475 int i, has_h_headers = 0, has_c_files = 0;
476 char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
477 if (filenames) {
478 for (i = 0; filenames[i] != NULL; i++) {
479 p = filenames[i];
480 pe = p + strlen(p);
481 if (pe - p >= 4) {
482 if (*(pe - 4) == '.' && *(pe - 3) == 'c' &&
483 ((*(pe - 2) == 'p' && *(pe - 1) == 'p') ||
484 (*(pe - 2) == '+' && *(pe - 1) == '+') ||
485 (*(pe - 2) == 'x' && *(pe - 1) == 'x'))) {
486 has_c_files = 1;
487 break; // short circuit
488 }
489 } else if (pe - p >= 3) {
490 if (*(pe - 3) == '.' && *(pe - 2) == 'c' && *(pe - 1) == 'c') {
491 has_c_files = 1;
492 break; // short circuit
493 }
494 } else if (pe - p >= 2) {
495 if (*(pe - 2) == '.') {
496 if (*(pe - 1) == 'h')
497 has_h_headers = 1;
498 else if (*(pe - 1) == 'c' || *(pe - 1) == 'C') {
499 has_c_files = 1;
500 break; // short circuit
501 }
502 }
503 }
504 }
505 }
506 if (has_h_headers && !has_c_files)
507 objective_c_score += 5;
508
509 char line[81], buf[81];
510 p = ohcount_sourcefile_get_contents(sourcefile);
511 pe = p;
512 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
513 while (pe < eof) {
514 // Get a line at a time.
515 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
516 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
517 strncpy(line, p, length);
518 line[length] = '\0';
519 char *eol = line + strlen(line);
520 char *line_end = pe;
521
522 // Look for tell-tale lines.
523 p = line;
524 while (*p == ' ' || *p == '\t') p++;
525 if (*p == '%') { // Matlab comment
526 matlab_score++;
527 } else if (*p == '#' && strncmp(p, "#import", 7) == 0) { // Objective C
528 objective_c_score++;
529 } else if (*p == '#') { // Limbo or Octave comment
530 while (*p == '#') p++;
531 if (*p == ' ' || *p == '\t') {
532 limbo_score++;
533 matlab_score++;
534 octave_syntax_detected = 1;
535 }
536 } else if (*p == '/' && *(p + 1) == '/' || *(p + 1) == '*') {
537 objective_c_score++; // Objective C comment
538 } else if (*p == '+' || *p == '-') { // Objective C method signature
539 objective_c_score++;
540 } else if (*p == '@' || *p == '#') { // Objective C method signature
541 if (strncmp(p, "@implementation", 15) == 0 ||
542 strncmp(p, "@interface", 10) == 0)
543 objective_c_score++;
544 } else if (strncmp(p, "function", 8) == 0) { // Matlab or Octave function
545 p += 8;
546 while (*p == ' ' || *p == '\t') p++;
547 if (*p == '(')
548 matlab_score++;
549 } else if (strncmp(p, "include", 7) == 0) { // Limbo include
550 // /^include[ \t]+"[^"]+\.m";/
551 p += 7;
552 if (*p == ' ' || *p == '\t') {
553 while (*p == ' ' || *p == '\t') p++;
554 if (*p == '"') {
555 while (*p != '"' && p < eol) p++;
556 if (*p == '"' && *(p - 2) == '.' && *(p - 1) == 'm')
557 limbo_score++;
558 }
559 }
560 }
561
562 // Look for Octave keywords.
563 p = line;
564 while (p < eol) {
565 if (islower(*p) && p != line && !isalnum(*(p - 1))) {
566 pe = p;
567 while (islower(*pe) || *pe == '_') pe++;
568 if (!isalnum(*pe)) {
569 length = pe - p;
570 strncpy(buf, p, length);
571 buf[length] = '\0';
572 if (strcmp(buf, "end_try_catch") == 0 ||
573 strcmp(buf, "end_unwind_protect") == 0 ||
574 strcmp(buf, "endfunction") == 0 ||
575 strcmp(buf, "endwhile") == 0)
576 octave_syntax_detected = 1;
577 }
578 p = pe + 1;
579 } else p++;
580 }
581
582 // Look for Limbo declarations
583 p = line;
584 while (p < eol) {
585 if (*p == ':' && (*(p + 1) == ' ' || *(p + 1) == '\t')) {
586 // /:[ \t]+(module|adt|fn ?\(|con[ \t])/
587 p += 2;
588 if (strncmp(p, "module", 6) == 0 && !isalnum(*(p + 6)) ||
589 strncmp(p, "adt", 3) == 0 && !isalnum(*(p + 3)) ||
590 strncmp(p, "fn", 2) == 0 &&
591 (*(p + 2) == ' ' && *(p + 3) == '(' || *(p + 2) == '(') ||
592 strncmp(p, "con", 3) == 0 &&
593 (*(p + 3) == ' ' || *(p + 3) == '\t'))
594 limbo_score++;
595 } else p++;
596 }
597
598 // Next line.
599 pe = line_end;
600 while (*pe == '\r' || *pe == '\n') pe++;
601 p = pe;
602 }
603
604 if (limbo_score > objective_c_score && limbo_score > matlab_score)
605 return LANG_LIMBO;
606 else if (objective_c_score > matlab_score)
607 return LANG_OBJECTIVE_C;
608 else
609 return octave_syntax_detected ? LANG_OCTAVE : LANG_MATLAB;
610 }
611
612 #define QMAKE_SOURCES_SPACE "SOURCES +="
613 #define QMAKE_SOURCES "SOURCES+="
614 #define QMAKE_CONFIG_SPACE "CONFIG +="
615 #define QMAKE_CONFIG "CONFIG+="
616
617 const char *disambiguate_pro(SourceFile *sourcefile) {
618 char *p = ohcount_sourcefile_get_contents(sourcefile);
619 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
620 for (; p < eof; p++) {
621 if (strncmp(p, QMAKE_SOURCES_SPACE, strlen(QMAKE_SOURCES_SPACE)) == 0 ||
622 strncmp(p, QMAKE_SOURCES, strlen(QMAKE_SOURCES)) == 0 ||
623 strncmp(p, QMAKE_CONFIG_SPACE, strlen(QMAKE_CONFIG_SPACE)) == 0 ||
624 strncmp(p, QMAKE_CONFIG, strlen(QMAKE_CONFIG)) == 0)
625 return LANG_MAKE; // really QMAKE
626 }
627 return LANG_IDL_PVWAVE;
628 }
629
630 const char *disambiguate_st(SourceFile *sourcefile) {
631 char *p, *pe;
632 int length;
633
634 // Attempt to detect based on file contents.
635 int found_assignment = 0, found_block_start = 0, found_block_end = 0;
636
637 char line[81];
638 p = ohcount_sourcefile_get_contents(sourcefile);
639 pe = p;
640 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
641 while (pe < eof) {
642 // Get a line at a time.
643 while (p < eof && *pe != '\r' && *pe != '\n') pe++;
644 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
645 strncpy(line, p, length);
646 line[length] = '\0';
647 char *eol = line + strlen(line);
648 char *line_end = pe;
649
650 for (p = line; p < eol; p++) {
651 if (*p == ':') {
652 p++;
653 while (p < eol && (*p == ' ' || *p == '\t')) p++;
654 if (*p == '=')
655 found_assignment = 1;
656 else if (*p == '[')
657 found_block_start = 1;
658 } else if (*p == ']' && *(p + 1) == '.') found_block_end = 1;
659 if (found_assignment && found_block_start && found_block_end)
660 return LANG_SMALLTALK;
661 }
662
663 // Next line.
664 pe = line_end;
665 while (*pe == '\r' || *pe == '\n') pe++;
666 p = pe;
667 }
668
669 return NULL;
670 }
671
672 int ohcount_is_binary_filename(const char *filename) {
673 char *p = (char *)filename + strlen(filename);
674 while (p > filename && *(p - 1) != '.') p--;
675 if (p > filename) {
676 struct ExtensionMap *re;
677 int length = strlen(p);
678 re = ohcount_hash_language_from_ext(p, length);
679 if (re) return ISBINARY(re->value);
680 // Try the lower-case version of this extension.
681 char lowerext[length];
682 strncpy(lowerext, p, length);
683 lowerext[length] = '\0';
684 for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
685 re = ohcount_hash_language_from_ext(lowerext, length);
686 if (re) return ISBINARY(re->value);
687 }
688 return 0;
689 }
0 // detector_test.h written by Mitchell Foral. mitchell<att>caladbolg.net.
1 // See COPYING for license information.
2
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <string.h>
6
7 #include "../../src/detector.h"
8 #include "../../src/languages.h"
9 #include "../../src/sourcefile.h"
10
11 #define ASSERT_DETECT(x, y) { \
12 SourceFile *sf = ohcount_sourcefile_new("../detect_files/" y); \
13 const char *lang = ohcount_detect_language(sf); \
14 assert(lang); \
15 assert(strcmp(x, lang) == 0); \
16 ohcount_sourcefile_free(sf); \
17 }
18 #define ASSERT_NODETECT(x) { \
19 SourceFile *sf = ohcount_sourcefile_new("../detect_files/" x); \
20 assert(ohcount_detect_language(sf) == NULL); \
21 ohcount_sourcefile_free(sf); \
22 }
23
24 void test_detector_smalltalk() {
25 ASSERT_DETECT(LANG_SMALLTALK, "example.st");
26 ASSERT_NODETECT("english.st");
27 }
28
29 void test_detector_disambiguate_m() {
30 ASSERT_DETECT(LANG_OBJECTIVE_C, "t1.m");
31 ASSERT_DETECT(LANG_OBJECTIVE_C, "t2.m");
32 ASSERT_DETECT(LANG_OBJECTIVE_C, "TCPSocket.m");
33 ASSERT_DETECT(LANG_OBJECTIVE_C, "foo_objective_c.m");
34 ASSERT_DETECT(LANG_MATLAB, "foo_matlab.m");
35 ASSERT_DETECT(LANG_OCTAVE, "foo_octave.m");
36 }
37
38 void test_detector_disambiguate_pro() {
39 ASSERT_DETECT(LANG_IDL_PVWAVE, "foo.pro");
40 ASSERT_DETECT(LANG_MAKE, "qmake.pro");
41 }
42
43 void test_detector_fortran_fixedfree() {
44 ASSERT_DETECT(LANG_FORTRANFIXED, "fortranfixed.f");
45 ASSERT_DETECT(LANG_FORTRANFREE, "fortranfree.f");
46 }
47
48 void test_detector_detect_polyglot() {
49 ASSERT_DETECT(LANG_C, "foo.c");
50 ASSERT_DETECT(LANG_C, "uses_no_cpp.h");
51 ASSERT_DETECT(LANG_CPP, "uses_cpp_headers.h");
52 ASSERT_DETECT(LANG_CPP, "uses_cpp_stdlib_headers.h");
53 ASSERT_DETECT(LANG_CPP, "uses_cpp_keywords.h");
54 ASSERT_DETECT(LANG_RUBY, "foo.rb");
55 ASSERT_DETECT(LANG_MAKE, "foo.mk");
56 ASSERT_DETECT(LANG_OBJECTIVE_C, "foo_objective_c.h");
57 ASSERT_DETECT(LANG_PHP, "upper_case_php");
58 ASSERT_DETECT(LANG_SMALLTALK, "example.st");
59 ASSERT_DETECT(LANG_VALA, "foo.vala");
60 ASSERT_DETECT(LANG_TEX, "foo.tex");
61 ASSERT_DETECT(LANG_XSLT, "example.xsl");
62 ASSERT_DETECT(LANG_LISP, "core.lisp");
63 ASSERT_DETECT(LANG_DMD, "foo.d");
64 ASSERT_DETECT(LANG_VIM, "foo.vim");
65 ASSERT_DETECT(LANG_EBUILD, "foo.ebuild");
66 ASSERT_DETECT(LANG_EBUILD, "foo.eclass");
67 ASSERT_DETECT(LANG_EXHERES, "foo.exheres-0");
68 ASSERT_DETECT(LANG_EXHERES, "foo.exlib");
69 ASSERT_DETECT(LANG_EIFFEL, "eiffel.e");
70 ASSERT_DETECT(LANG_OCAML, "ocaml.ml");
71 ASSERT_DETECT(LANG_STRATEGO, "stratego.str");
72 ASSERT_DETECT(LANG_R, "foo.R");
73 ASSERT_DETECT(LANG_GLSL, "foo.glsl");
74 ASSERT_DETECT(LANG_GLSL, "foo_glsl.vert");
75 ASSERT_DETECT(LANG_GLSL, "foo_glsl.frag");
76 ASSERT_DETECT(LANG_IDL_PVWAVE, "foo.pro");
77 ASSERT_DETECT(LANG_ASSEMBLER, "foo.z80");
78 ASSERT_DETECT(LANG_PHP, "php.inc");
79 ASSERT_DETECT(LANG_FSHARP, "fs1.fs");
80 }
81
82 void test_detector_upper_case_extensions() {
83 ASSERT_DETECT(LANG_CPP, "foo_upper_case.C");
84 ASSERT_DETECT(LANG_RUBY, "foo_upper_case.RB");
85 }
86
87 void test_detector_no_extensions() {
88 ASSERT_DETECT(LANG_PYTHON, "py_script");
89 ASSERT_DETECT(LANG_RUBY, "ruby_script");
90 ASSERT_DETECT(LANG_SHELL, "bourne_again_script");
91 ASSERT_DETECT(LANG_SHELL, "bash_script");
92 ASSERT_DETECT(LANG_PERL, "perl_w");
93 ASSERT_DETECT(LANG_DMD, "d_script");
94 ASSERT_DETECT(LANG_TCL, "tcl_script");
95 ASSERT_DETECT(LANG_PYTHON, "python.data");
96 ASSERT_DETECT(LANG_PYTHON, "python2.data");
97 }
98
99 void test_detector_csharp_or_clearsilver() {
100 ASSERT_DETECT(LANG_CSHARP, "cs1.cs");
101 ASSERT_DETECT(LANG_CLEARSILVER_TEMPLATE, "clearsilver_template1.cs");
102 }
103
104 void test_detector_basic() {
105 ASSERT_DETECT(LANG_VISUALBASIC, "visual_basic.bas");
106 ASSERT_DETECT(LANG_CLASSIC_BASIC, "classic_basic.b");
107 system("mv ../detect_files/frx1.frx ../detect_files/frx1.frx2");
108 ASSERT_DETECT(LANG_STRUCTURED_BASIC, "visual_basic.bas");
109 ASSERT_DETECT(LANG_STRUCTURED_BASIC, "structured_basic.b");
110 system("mv ../detect_files/frx1.frx2 ../detect_files/frx1.frx");
111 }
112
113 void test_detector_xml_with_custom_extension() {
114 ASSERT_DETECT(LANG_XML, "xml.custom_ext");
115 }
116
117 void all_detector_tests() {
118 test_detector_smalltalk();
119 test_detector_disambiguate_m();
120 test_detector_disambiguate_pro();
121 test_detector_fortran_fixedfree();
122 test_detector_detect_polyglot();
123 test_detector_upper_case_extensions();
124 test_detector_no_extensions();
125 test_detector_csharp_or_clearsilver();
126 test_detector_basic();
127 test_detector_xml_with_custom_extension();
128 }
0 #!/usr/bin/env bash
1 # Build script for Ohcount.
2 # Written by Mitchell Foral. mitchell<att>caladbolg.net.
3
4 # Options
5 # Change these for your system configuration.
6 if [ `uname` != "Darwin" ]
7 then
8 # Linux
9 INC_DIR=
10 LIB_DIR=
11
12 if [ `uname` == "FreeBSD" ]
13 then
14 INC_DIR=/usr/local/include
15 LIB_DIR=/usr/local/lib
16 fi
17
18 # You shouldn't have to change the following.
19 CFLAGS=-O3
20 CFLAGS="$CFLAGS -DTMP_FILES_ARE_DT_UNKNOWN" # workaround bug on centos/SF servers
21 WARN="-Wall -Wno-pointer-to-int-cast -Wno-parentheses"
22 SHARED=-shared
23 SHARED_NAME=libohcount.so
24 RB_SHARED=-shared
25 RB_SHARED_NAME=ohcount.so
26 else
27 # Mac OSX
28 INC_DIR=/opt/local/include
29 LIB_DIR=/opt/local/lib
30 # You shouldn't have to change the following.
31 CFLAGS="-fno-common -g"
32 WARN="-Wall -Wno-parentheses"
33 SHARED="-dynamiclib -L$LIB_DIR -lpcre"
34 SHARED_NAME=libohcount.dylib
35 RB_SHARED="-dynamic -bundle -lruby"
36 RB_SHARED_NAME=ohcount.bundle
37 fi
38
39 # C compiler and flags
40 cc="gcc -fPIC -g $CFLAGS $WARN -I$INC_DIR -L$LIB_DIR"
41
42 # Ohcount source files
43 files="src/sourcefile.c \
44 src/detector.c \
45 src/licenses.c \
46 src/parser.o \
47 src/loc.c \
48 src/log.c \
49 src/diff.c \
50 src/parsed_language.c \
51 src/hash/language_hash.c"
52
53 # If any src/hash/*.gperf file is newer than the header files (which were
54 # presumably generated together), regenerate the headers.
55 build_hash_headers()
56 {
57 if [[ -z `ls src/hash/ | grep "_hash.h$"` ||
58 ! -z `find src/hash/*.gperf -newer src/hash/parser_hash.h` ]]
59 then
60 echo "Generating hash headers"
61 sh -c "cd src/hash/ && ./generate_headers" || exit 1
62 fi
63 }
64
65 # If src/parser.o does not exist, or if there are Ragel parsers or parser
66 # header files newer than the existing parser.o, recompile parser.o.
67 build_parser_o()
68 {
69 if [[ ! -f src/parser.o ||
70 ! -z `find src/parsers/*.{h,rl} -newer src/parser.o` ]]
71 then
72 bash -c "cd src/parsers/ && bash ./compile" || exit 1
73 echo "Building src/parser.c (will take a while)"
74 bash -c "$cc -c src/parser.c -o src/parser.o" || exit 1
75 fi
76 }
77
78 build_shared()
79 {
80 build_hash_headers
81 build_parser_o
82 if [[ ! -f src/$SHARED_NAME ||
83 ! -z `find src/*.{h,c} -newer src/$SHARED_NAME` ]]
84 then
85 echo "Building shared library"
86 sh -c "$cc $SHARED $files -o src/$SHARED_NAME" || exit 1
87 fi
88 }
89
90 build_ohcount()
91 {
92 build_hash_headers
93 build_parser_o
94 echo "Building Ohcount"
95 mkdir -p bin/
96 sh -c "$cc src/ohcount.c $files -o bin/ohcount -lpcre" || exit 1
97 }
98
99 build_test_suite()
100 {
101 build_hash_headers
102 build_parser_o
103 echo "Building test suite"
104 sh -c "$cc test/unit/all_tests.c $files -o test/unit/run_tests -lpcre" \
105 || exit 1
106 }
107
108 run_test_suite()
109 {
110 echo "Running test suite"
111 echo "disabled test suite, does not work"
112 }
113
114 build_ruby_bindings()
115 {
116 arch=`ruby -rmkmf -e 'print Config::expand(CONFIG["arch"])'`
117 echo "Generating Ruby bindings for $arch"
118 sh -c "swig -ruby -o ruby/ohcount_wrap.c ruby/ohcount.i" || exit 1
119 mkdir -p ruby/$arch
120 sh -c "$cc $RB_SHARED ruby/ohcount_wrap.c $files -o ruby/$arch/$RB_SHARED_NAME \
121 -I`ruby -rmkmf -e 'print Config::expand(CONFIG["archdir"])'` \
122 -lpcre" || exit 1
123 sh -c "cd test/unit/ruby && ruby ruby_test.rb" || exit 1
124 }
125
126 if [ $# -eq 0 ] || [ $1 == "all" ]
127 then
128 build_ohcount
129 build_test_suite
130 run_test_suite
131 echo $success
132 elif [ $1 == "shared" ]
133 then
134 build_shared
135 echo "Build successful; $SHARED_NAME is in src/"
136 elif [ $1 == "ohcount" ]
137 then
138 build_ohcount
139 echo "Build successful; ohcount is in bin/"
140 elif [ $1 == "tests" ]
141 then
142 build_test_suite
143 run_test_suite
144 elif [ $1 == "ruby" ]
145 then
146 build_ruby_bindings
147 echo "Build successful; $RB_SHARED_NAME is in ruby/$arch"
148 elif [ $1 == "clean" ]
149 then
150 rm -f bin/ohcount
151 rm -f test/unit/run_tests
152 rm -f src/parser.o
153 rm -f src/parsers/*.h
154 rm -f src/hash/*.h
155 rm -f src/hash/*.c
156 rm -f src/$SHARED_NAME
157 rm -f ruby/$RB_SHARED_NAME
158 rm -rf ruby/`ruby -rmkmf -e 'print Config::expand(CONFIG["arch"])'`/*
159 else
160 echo "Usage: build [all|ohcount|shared|tests|ruby|clean]"
161 fi
0 %{
1 #include "../languages.h"
2
3 #define BINARY "\1"
4 #define DISAMBIGUATE(x) ("\2" x)
5 %}
6 struct ExtensionMap { const char *key; const char *value; };
7 %%
8 C, LANG_CPP
9 H, LANG_CPP
10 ada, LANG_ADA
11 adb, LANG_ADA
12 ads, LANG_ADA
13 aiff, BINARY
14 as, LANG_ACTIONSCRIPT
15 ascx, DISAMBIGUATE("aspx")
16 asm, LANG_ASSEMBLER
17 aspx, DISAMBIGUATE("aspx")
18 au, BINARY
19 avi, BINARY
20 awk, LANG_AWK
21 b, DISAMBIGUATE("b")
22 bas, DISAMBIGUATE("basic")
23 bat, LANG_BAT
24 bi, DISAMBIGUATE("basic")
25 bmp, BINARY
26 bmx, LANG_BLITZMAX
27 boo, LANG_BOO
28 c, LANG_C
29 c++, LANG_CPP
30 cache, BINARY
31 cc, LANG_CPP
32 cmake, LANG_CMAKE
33 com, LANG_DCL
34 cpp, LANG_CPP
35 cs, DISAMBIGUATE("cs")
36 csproj, LANG_XML
37 css, LANG_CSS
38 ctp, LANG_PHP
39 cxx, LANG_CPP
40 d, LANG_DMD
41 dat, BINARY
42 di, LANG_DMD
43 doc, BINARY
44 dylan, LANG_DYLAN
45 e, LANG_EIFFEL
46 ebuild, LANG_EBUILD
47 eclass, LANG_EBUILD
48 el, LANG_EMACSLISP
49 erl, LANG_ERLANG
50 exheres-0, LANG_EXHERES
51 exlib, LANG_EXHERES
52 f, DISAMBIGUATE("fortran")
53 f03, DISAMBIGUATE("fortran")
54 f77, DISAMBIGUATE("fortran")
55 f90, DISAMBIGUATE("fortran")
56 f95, DISAMBIGUATE("fortran")
57 factor, LANG_FACTOR
58 frag, LANG_GLSL
59 frm, LANG_VISUALBASIC
60 frx, LANG_VISUALBASIC
61 fs, LANG_FSHARP
62 ftn, DISAMBIGUATE("fortran")
63 gif, BINARY
64 glsl, LANG_GLSL
65 groovy, LANG_GROOVY
66 gz, BINARY
67 h, DISAMBIGUATE("h")
68 h++, LANG_CPP
69 haml, LANG_HAML
70 hh, LANG_CPP
71 hpp, LANG_CPP
72 hrl, LANG_ERLANG
73 hs, LANG_HASKELL
74 htm, LANG_HTML
75 html, LANG_HTML
76 hx, LANG_HAXE
77 hxx, LANG_CPP
78 icns, BINARY
79 in, DISAMBIGUATE("in")
80 inc, DISAMBIGUATE("inc")
81 j, LANG_OBJECTIVE_J
82 jar, BINARY
83 java, LANG_JAVA
84 jpeg, BINARY
85 jpg, BINARY
86 js, LANG_JAVASCRIPT
87 jsp, LANG_JSP
88 kdebuild-1, LANG_EBUILD
89 latex, LANG_TEX
90 lisp, LANG_LISP
91 lsp, LANG_LISP
92 ltx, LANG_TEX
93 lua, LANG_LUA
94 m, DISAMBIGUATE("m")
95 m4a, BINARY
96 mf, LANG_METAFONT
97 mk, LANG_MAKE
98 ml, LANG_OCAML
99 ml4, LANG_OCAML
100 mli, LANG_OCAML
101 mm, LANG_OBJECTIVE_C
102 mov, BINARY
103 mp, LANG_METAPOST_WITH_TEX
104 mp3, BINARY
105 mpg, BINARY
106 mxml, LANG_MXML
107 nix, LANG_NIX
108 nse, LANG_LUA
109 ogg, BINARY
110 p6, LANG_PERL
111 pas, LANG_PASCAL
112 perl, LANG_PERL
113 pdf, BINARY
114 ph, LANG_PERL
115 php, LANG_PHP
116 php3, LANG_PHP
117 php4, LANG_PHP
118 php5, LANG_PHP
119 pike, LANG_PIKE
120 pl, LANG_PERL
121 pm, LANG_PERL
122 pmc, LANG_C
123 pmod, LANG_PIKE
124 png, BINARY
125 pnt, BINARY
126 pod, LANG_PERL
127 pp, LANG_PASCAL
128 ppt, BINARY
129 pro, DISAMBIGUATE("pro")
130 py, LANG_PYTHON
131 qt, BINARY
132 r, LANG_R
133 ra, BINARY
134 rb, LANG_RUBY
135 rex, LANG_REXX
136 rexx, LANG_REXX
137 rhtml, LANG_RHTML
138 s, LANG_ASSEMBLER
139 sc, LANG_SCHEME
140 scala, LANG_SCALA
141 sce, LANG_SCILAB
142 sci, LANG_SCILAB
143 scm, LANG_SCHEME
144 sh, LANG_SHELL
145 sls, LANG_SCHEME
146 sps, LANG_SCHEME
147 sql, LANG_SQL
148 ss, LANG_SCHEME
149 st, DISAMBIGUATE("st")
150 str, LANG_STRATEGO
151 svg, BINARY
152 svgz, BINARY
153 svn, BINARY
154 swf, BINARY
155 t, LANG_PERL
156 tar, BINARY
157 tcl, LANG_TCL
158 tex, LANG_TEX
159 tgz, BINARY
160 tif, BINARY
161 tiff, BINARY
162 tpl, LANG_HTML
163 vala, LANG_VALA
164 vb, LANG_VISUALBASIC
165 vba, LANG_VISUALBASIC
166 vbs, LANG_VISUALBASIC
167 vert, LANG_GLSL
168 vhd, LANG_VHDL
169 vhdl, LANG_VHDL
170 vim, LANG_VIM
171 wav, BINARY
172 xaml, LANG_XAML
173 xls, BINARY
174 xlw, BINARY
175 xml, LANG_XML
176 xs, LANG_C
177 xsd, LANG_XMLSCHEMA
178 xsl, LANG_XSLT
179 z80, LANG_ASSEMBLER
180 zip, BINARY
0 GNU GENERAL PUBLIC LICENSE
1 Version 2, June 1991
2
3 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
4 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
5 Everyone is permitted to copy and distribute verbatim copies
6 of this license document, but changing it is not allowed.
7
8 Preamble
9
10 The licenses for most software are designed to take away your
11 freedom to share and change it. By contrast, the GNU General Public
12 License is intended to guarantee your freedom to share and change free
13 software--to make sure the software is free for all its users. This
14 General Public License applies to most of the Free Software
15 Foundation's software and to any other program whose authors commit to
16 using it. (Some other Free Software Foundation software is covered by
17 the GNU Lesser General Public License instead.) You can apply it to
18 your programs, too.
19
20 When we speak of free software, we are referring to freedom, not
21 price. Our General Public Licenses are designed to make sure that you
22 have the freedom to distribute copies of free software (and charge for
23 this service if you wish), that you receive source code or can get it
24 if you want it, that you can change the software or use pieces of it
25 in new free programs; and that you know you can do these things.
26
27 To protect your rights, we need to make restrictions that forbid
28 anyone to deny you these rights or to ask you to surrender the rights.
29 These restrictions translate to certain responsibilities for you if you
30 distribute copies of the software, or if you modify it.
31
32 For example, if you distribute copies of such a program, whether
33 gratis or for a fee, you must give the recipients all the rights that
34 you have. You must make sure that they, too, receive or can get the
35 source code. And you must show them these terms so they know their
36 rights.
37
38 We protect your rights with two steps: (1) copyright the software, and
39 (2) offer you this license which gives you legal permission to copy,
40 distribute and/or modify the software.
41
42 Also, for each author's protection and ours, we want to make certain
43 that everyone understands that there is no warranty for this free
44 software. If the software is modified by someone else and passed on, we
45 want its recipients to know that what they have is not the original, so
46 that any problems introduced by others will not reflect on the original
47 authors' reputations.
48
49 Finally, any free program is threatened constantly by software
50 patents. We wish to avoid the danger that redistributors of a free
51 program will individually obtain patent licenses, in effect making the
52 program proprietary. To prevent this, we have made it clear that any
53 patent must be licensed for everyone's free use or not licensed at all.
54
55 The precise terms and conditions for copying, distribution and
56 modification follow.
57
58 GNU GENERAL PUBLIC LICENSE
59 TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
60
61 0. This License applies to any program or other work which contains
62 a notice placed by the copyright holder saying it may be distributed
63 under the terms of this General Public License. The "Program", below,
64 refers to any such program or work, and a "work based on the Program"
65 means either the Program or any derivative work under copyright law:
66 that is to say, a work containing the Program or a portion of it,
67 either verbatim or with modifications and/or translated into another
68 language. (Hereinafter, translation is included without limitation in
69 the term "modification".) Each licensee is addressed as "you".
70
71 Activities other than copying, distribution and modification are not
72 covered by this License; they are outside its scope. The act of
73 running the Program is not restricted, and the output from the Program
74 is covered only if its contents constitute a work based on the
75 Program (independent of having been made by running the Program).
76 Whether that is true depends on what the Program does.
77
78 1. You may copy and distribute verbatim copies of the Program's
79 source code as you receive it, in any medium, provided that you
80 conspicuously and appropriately publish on each copy an appropriate
81 copyright notice and disclaimer of warranty; keep intact all the
82 notices that refer to this License and to the absence of any warranty;
83 and give any other recipients of the Program a copy of this License
84 along with the Program.
85
86 You may charge a fee for the physical act of transferring a copy, and
87 you may at your option offer warranty protection in exchange for a fee.
88
89 2. You may modify your copy or copies of the Program or any portion
90 of it, thus forming a work based on the Program, and copy and
91 distribute such modifications or work under the terms of Section 1
92 above, provided that you also meet all of these conditions:
93
94 a) You must cause the modified files to carry prominent notices
95 stating that you changed the files and the date of any change.
96
97 b) You must cause any work that you distribute or publish, that in
98 whole or in part contains or is derived from the Program or any
99 part thereof, to be licensed as a whole at no charge to all third
100 parties under the terms of this License.
101
102 c) If the modified program normally reads commands interactively
103 when run, you must cause it, when started running for such
104 interactive use in the most ordinary way, to print or display an
105 announcement including an appropriate copyright notice and a
106 notice that there is no warranty (or else, saying that you provide
107 a warranty) and that users may redistribute the program under
108 these conditions, and telling the user how to view a copy of this
109 License. (Exception: if the Program itself is interactive but
110 does not normally print such an announcement, your work based on
111 the Program is not required to print an announcement.)
112
113 These requirements apply to the modified work as a whole. If
114 identifiable sections of that work are not derived from the Program,
115 and can be reasonably considered independent and separate works in
116 themselves, then this License, and its terms, do not apply to those
117 sections when you distribute them as separate works. But when you
118 distribute the same sections as part of a whole which is a work based
119 on the Program, the distribution of the whole must be on the terms of
120 this License, whose permissions for other licensees extend to the
121 entire whole, and thus to each and every part regardless of who wrote it.
122
123 Thus, it is not the intent of this section to claim rights or contest
124 your rights to work written entirely by you; rather, the intent is to
125 exercise the right to control the distribution of derivative or
126 collective works based on the Program.
127
128 In addition, mere aggregation of another work not based on the Program
129 with the Program (or with a work based on the Program) on a volume of
130 a storage or distribution medium does not bring the other work under
131 the scope of this License.
132
133 3. You may copy and distribute the Program (or a work based on it,
134 under Section 2) in object code or executable form under the terms of
135 Sections 1 and 2 above provided that you also do one of the following:
136
137 a) Accompany it with the complete corresponding machine-readable
138 source code, which must be distributed under the terms of Sections
139 1 and 2 above on a medium customarily used for software interchange; or,
140
141 b) Accompany it with a written offer, valid for at least three
142 years, to give any third party, for a charge no more than your
143 cost of physically performing source distribution, a complete
144 machine-readable copy of the corresponding source code, to be
145 distributed under the terms of Sections 1 and 2 above on a medium
146 customarily used for software interchange; or,
147
148 c) Accompany it with the information you received as to the offer
149 to distribute corresponding source code. (This alternative is
150 allowed only for noncommercial distribution and only if you
151 received the program in object code or executable form with such
152 an offer, in accord with Subsection b above.)
153
154 The source code for a work means the preferred form of the work for
155 making modifications to it. For an executable work, complete source
156 code means all the source code for all modules it contains, plus any
157 associated interface definition files, plus the scripts used to
158 control compilation and installation of the executable. However, as a
159 special exception, the source code distributed need not include
160 anything that is normally distributed (in either source or binary
161 form) with the major components (compiler, kernel, and so on) of the
162 operating system on which the executable runs, unless that component
163 itself accompanies the executable.
164
165 If distribution of executable or object code is made by offering
166 access to copy from a designated place, then offering equivalent
167 access to copy the source code from the same place counts as
168 distribution of the source code, even though third parties are not
169 compelled to copy the source along with the object code.
170
171 4. You may not copy, modify, sublicense, or distribute the Program
172 except as expressly provided under this License. Any attempt
173 otherwise to copy, modify, sublicense or distribute the Program is
174 void, and will automatically terminate your rights under this License.
175 However, parties who have received copies, or rights, from you under
176 this License will not have their licenses terminated so long as such
177 parties remain in full compliance.
178
179 5. You are not required to accept this License, since you have not
180 signed it. However, nothing else grants you permission to modify or
181 distribute the Program or its derivative works. These actions are
182 prohibited by law if you do not accept this License. Therefore, by
183 modifying or distributing the Program (or any work based on the
184 Program), you indicate your acceptance of this License to do so, and
185 all its terms and conditions for copying, distributing or modifying
186 the Program or works based on it.
187
188 6. Each time you redistribute the Program (or any work based on the
189 Program), the recipient automatically receives a license from the
190 original licensor to copy, distribute or modify the Program subject to
191 these terms and conditions. You may not impose any further
192 restrictions on the recipients' exercise of the rights granted herein.
193 You are not responsible for enforcing compliance by third parties to
194 this License.
195
196 7. If, as a consequence of a court judgment or allegation of patent
197 infringement or for any other reason (not limited to patent issues),
198 conditions are imposed on you (whether by court order, agreement or
199 otherwise) that contradict the conditions of this License, they do not
200 excuse you from the conditions of this License. If you cannot
201 distribute so as to satisfy simultaneously your obligations under this
202 License and any other pertinent obligations, then as a consequence you
203 may not distribute the Program at all. For example, if a patent
204 license would not permit royalty-free redistribution of the Program by
205 all those who receive copies directly or indirectly through you, then
206 the only way you could satisfy both it and this License would be to
207 refrain entirely from distribution of the Program.
208
209 If any portion of this section is held invalid or unenforceable under
210 any particular circumstance, the balance of the section is intended to
211 apply and the section as a whole is intended to apply in other
212 circumstances.
213
214 It is not the purpose of this section to induce you to infringe any
215 patents or other property right claims or to contest validity of any
216 such claims; this section has the sole purpose of protecting the
217 integrity of the free software distribution system, which is
218 implemented by public license practices. Many people have made
219 generous contributions to the wide range of software distributed
220 through that system in reliance on consistent application of that
221 system; it is up to the author/donor to decide if he or she is willing
222 to distribute software through any other system and a licensee cannot
223 impose that choice.
224
225 This section is intended to make thoroughly clear what is believed to
226 be a consequence of the rest of this License.
227
228 8. If the distribution and/or use of the Program is restricted in
229 certain countries either by patents or by copyrighted interfaces, the
230 original copyright holder who places the Program under this License
231 may add an explicit geographical distribution limitation excluding
232 those countries, so that distribution is permitted only in or among
233 countries not thus excluded. In such case, this License incorporates
234 the limitation as if written in the body of this License.
235
236 9. The Free Software Foundation may publish revised and/or new versions
237 of the General Public License from time to time. Such new versions will
238 be similar in spirit to the present version, but may differ in detail to
239 address new problems or concerns.
240
241 Each version is given a distinguishing version number. If the Program
242 specifies a version number of this License which applies to it and "any
243 later version", you have the option of following the terms and conditions
244 either of that version or of any later version published by the Free
245 Software Foundation. If the Program does not specify a version number of
246 this License, you may choose any version ever published by the Free Software
247 Foundation.
248
249 10. If you wish to incorporate parts of the Program into other free
250 programs whose distribution conditions are different, write to the author
251 to ask for permission. For software which is copyrighted by the Free
252 Software Foundation, write to the Free Software Foundation; we sometimes
253 make exceptions for this. Our decision will be guided by the two goals
254 of preserving the free status of all derivatives of our free software and
255 of promoting the sharing and reuse of software generally.
256
257 NO WARRANTY
258
259 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
260 FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
261 OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
262 PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
263 OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
264 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
265 TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
266 PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
267 REPAIR OR CORRECTION.
268
269 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
270 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
271 REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
272 INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
273 OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
274 TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
275 YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
276 PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
277 POSSIBILITY OF SUCH DAMAGES.
278
279 END OF TERMS AND CONDITIONS
280
281 How to Apply These Terms to Your New Programs
282
283 If you develop a new program, and you want it to be of the greatest
284 possible use to the public, the best way to achieve this is to make it
285 free software which everyone can redistribute and change under these terms.
286
287 To do so, attach the following notices to the program. It is safest
288 to attach them to the start of each source file to most effectively
289 convey the exclusion of warranty; and each file should have at least
290 the "copyright" line and a pointer to where the full notice is found.
291
292 <one line to give the program's name and a brief idea of what it does.>
293 Copyright (C) <year> <name of author>
294
295 This program is free software; you can redistribute it and/or modify
296 it under the terms of the GNU General Public License as published by
297 the Free Software Foundation; either version 2 of the License, or
298 (at your option) any later version.
299
300 This program is distributed in the hope that it will be useful,
301 but WITHOUT ANY WARRANTY; without even the implied warranty of
302 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
303 GNU General Public License for more details.
304
305 You should have received a copy of the GNU General Public License along
306 with this program; if not, write to the Free Software Foundation, Inc.,
307 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
308
309 Also add information on how to contact you by electronic and paper mail.
310
311 If the program is interactive, make it output a short notice like this
312 when it starts in an interactive mode:
313
314 Gnomovision version 69, Copyright (C) year name of author
315 Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
316 This is free software, and you are welcome to redistribute it
317 under certain conditions; type `show c' for details.
318
319 The hypothetical commands `show w' and `show c' should show the appropriate
320 parts of the General Public License. Of course, the commands you use may
321 be called something other than `show w' and `show c'; they could even be
322 mouse-clicks or menu items--whatever suits your program.
323
324 You should also get your employer (if you work as a programmer) or your
325 school, if any, to sign a "copyright disclaimer" for the program, if
326 necessary. Here is a sample; alter the names:
327
328 Yoyodyne, Inc., hereby disclaims all copyright interest in the program
329 `Gnomovision' (which makes passes at compilers) written by James Hacker.
330
331 <signature of Ty Coon>, 1 April 1989
332 Ty Coon, President of Vice
333
334 This General Public License does not permit incorporating your program into
335 proprietary programs. If your program is a subroutine library, you may
336 consider it more useful to permit linking proprietary applications with the
337 library. If this is what you want to do, use the GNU Lesser General
338 Public License instead of this License.
0 == Ohcount
1
2 NOTE: THE PRIMARY DOCUMENTATION FOR OHCOUNT IS EXTRACTED FROM SOURCE CODE
3 BY DOXYGEN. FOR THE MOST UP-TO-DATE DOCS, PLEASE SEE BELOW FOR INFO
4 ON BUILDING AND REFERING TO THE DOXYGEN DOCS.
5
6 Ohloh/SourceForge's source code line counter.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License Version 2 as
10 published by the Free Software Foundation.
11
12 Ohcount is specifically licensed under GPL v2.0, and no later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 == Overview
23
24 Ohcount is a library for counting lines of source code.
25 It was originally developed at Ohloh, and is used to generate
26 the reports at www.ohloh.net.
27
28 Ohcount supports multiple languages within a single file: for example,
29 a complex HTML document might include regions of both CSS and JavaScript.
30
31 Ohcount has two main components: a detector which determines the primary
32 language family used by a particular source file, and a parser which
33 provides a line-by-line breakdown of the contents of a source file.
34
35 Ohcount includes a command line tool that allows you to count individual
36 files or whole directory trees. It also allows you to find source code
37 files by language family, or to create a detailed annotation of an
38 individual source file.
39
40 Ohcount includes a Ruby binding which allows you to directly access its
41 language detection features from a Ruby application.
42
43 == System Requirements
44
45 Ohcount is supported on Mac OS X 10.4 and 10.5 and Ubuntu 8.04 LTS. Other Linux
46 environments should also work, but your mileage may vary.
47
48 Ohcount does not support Windows.
49
50 Ohcount targets Ruby 1.8.6. The build script requires a bash shell. You
51 also need a C compiler to build the native extensions.
52
53 == Source Code ==
54
55 Ohcount source code is available as a Git repository:
56
57 git clone git://github.com/andyverprauskus/ohcount.git
58
59 == Doc files ==
60
61 To build the more extensive Doxygen docs, do
62 > cd doc
63 > Doxygen Doxyfile
64
65 After building the docs, view them with a browser by opening doc/html/index.html.
66 On a mac, you can install Doxygen with "sudo port install Doxygen".
67 On Debian/Ubuntu, install with "sudo apt-get instal doxygen".
68
69 == Building Ohcount ==
70
71 You will need ragel 6.3 or higher, bash, pcre, gcc (version 4.1.2 or greater) and SWIG to build ohcount. Once you have them, go to the top directory of ohcount and type:
72
73 > bash build
74 or > ./build
75
76 == Using Ohcount ==
77
78 Once you've building ohcount, the executable program will be at bin/ohcount. The most basic use is to count lines of code in a directory tree, run:
79 "ohcount" to count the current directory and source code in any child directories
80
81 == For additional docs, including how to add a new language, see the Doxygen docs ==
82
83 Particularly, for instructions on adding a new language, follow the instructions at doc/html/detector_doc.html
84 Read http://labs.ohloh.net/ohcount/wiki/HowToSubmitPatches for information about having your patch accepted.
85
86
87 DEPENDENCIES
88 ============
89 SWIG, pcre, ragel, bash
0 task :default => [:build_all]
1
2 task :build_all => [:build, :build_ruby]
3
4 task :build do
5 system "./build"
6 end
7
8 task :build_ruby do
9 system "./build ruby"
10 end
+162
-0
build less more
0 #!/usr/bin/env bash
1 # Build script for Ohcount.
2 # Written by Mitchell Foral. mitchell<att>caladbolg.net.
3
4 # Options
5 # Change these for your system configuration.
6 if [ `uname` != "Darwin" ]
7 then
8 # Linux
9 INC_DIR=
10 LIB_DIR=
11
12 if [ `uname` == "FreeBSD" ]
13 then
14 INC_DIR=/usr/local/include
15 LIB_DIR=/usr/local/lib
16 fi
17
18 # You shouldn't have to change the following.
19 CFLAGS=-O3
20 CFLAGS="$CFLAGS -DTMP_FILES_ARE_DT_UNKNOWN" # workaround bug on centos/SF servers
21 WARN="-Wall -Wno-pointer-to-int-cast -Wno-parentheses"
22 SHARED=-shared
23 SHARED_NAME=libohcount.so
24 RB_SHARED=-shared
25 RB_SHARED_NAME=ohcount.so
26 else
27 # Mac OSX
28 INC_DIR=/opt/local/include
29 LIB_DIR=/opt/local/lib
30 # You shouldn't have to change the following.
31 CFLAGS="-fno-common -g"
32 WARN="-Wall -Wno-parentheses"
33 SHARED="-dynamiclib -L$LIB_DIR -lpcre"
34 SHARED_NAME=libohcount.dylib
35 RB_SHARED="-dynamic -bundle -lruby"
36 RB_SHARED_NAME=ohcount.bundle
37 fi
38
39 # C compiler and flags
40 cc="gcc -fPIC -g $CFLAGS $WARN -I$INC_DIR -L$LIB_DIR"
41
42 # Ohcount source files
43 files="src/sourcefile.c \
44 src/detector.c \
45 src/licenses.c \
46 src/parser.o \
47 src/loc.c \
48 src/log.c \
49 src/diff.c \
50 src/parsed_language.c \
51 src/hash/language_hash.c"
52
53 # If any src/hash/*.gperf file is newer than the header files (which were
54 # presumably generated together), regenerate the headers.
55 build_hash_headers()
56 {
57 if [[ -z `ls src/hash/ | grep "_hash.h$"` ||
58 ! -z `find src/hash/*.gperf -newer src/hash/parser_hash.h` ]]
59 then
60 echo "Generating hash headers"
61 sh -c "cd src/hash/ && ./generate_headers" || exit 1
62 fi
63 }
64
65 # If src/parser.o does not exist, or if there are Ragel parsers or parser
66 # header files newer than the existing parser.o, recompile parser.o.
67 build_parser_o()
68 {
69 if [[ ! -f src/parser.o ||
70 ! -z `find src/parsers/*.{h,rl} -newer src/parser.o` ]]
71 then
72 bash -c "cd src/parsers/ && bash ./compile" || exit 1
73 echo "Building src/parser.c (will take a while)"
74 bash -c "$cc -c src/parser.c -o src/parser.o" || exit 1
75 fi
76 }
77
78 build_shared()
79 {
80 build_hash_headers
81 build_parser_o
82 if [[ ! -f src/$SHARED_NAME ||
83 ! -z `find src/*.{h,c} -newer src/$SHARED_NAME` ]]
84 then
85 echo "Building shared library"
86 sh -c "$cc $SHARED $files -o src/$SHARED_NAME" || exit 1
87 fi
88 }
89
90 build_ohcount()
91 {
92 build_hash_headers
93 build_parser_o
94 echo "Building Ohcount"
95 mkdir -p bin/
96 sh -c "$cc src/ohcount.c $files -o bin/ohcount -lpcre" || exit 1
97 }
98
99 build_test_suite()
100 {
101 build_hash_headers
102 build_parser_o
103 echo "Building test suite"
104 sh -c "$cc test/unit/all_tests.c $files -o test/unit/run_tests -lpcre" \
105 || exit 1
106 }
107
108 run_test_suite()
109 {
110 echo "Running test suite"
111 echo "disabled test suite, does not work"
112 }
113
114 build_ruby_bindings()
115 {
116 arch=`ruby -rmkmf -e 'print RbConfig::expand(RbConfig::CONFIG["arch"])'`
117 echo "Generating Ruby bindings for $arch"
118 sh -c "swig -ruby -o ruby/ohcount_wrap.c ruby/ohcount.i" || exit 1
119 mkdir -p ruby/$arch
120 sh -c "$cc $RB_SHARED ruby/ohcount_wrap.c $files -o ruby/$arch/$RB_SHARED_NAME \
121 -I`ruby -rmkmf -e 'print RbConfig::expand(RbConfig::CONFIG["archdir"])'` \
122 -lpcre" || exit 1
123 sh -c "cd test/unit/ruby && ruby ruby_test.rb" || exit 1
124 }
125
126 if [ $# -eq 0 ] || [ $1 == "all" ]
127 then
128 build_ohcount
129 build_test_suite
130 run_test_suite
131 echo $success
132 elif [ $1 == "shared" ]
133 then
134 build_shared
135 echo "Build successful; $SHARED_NAME is in src/"
136 elif [ $1 == "ohcount" ]
137 then
138 build_ohcount
139 echo "Build successful; ohcount is in bin/"
140 elif [ $1 == "tests" ]
141 then
142 build_test_suite
143 run_test_suite
144 elif [ $1 == "ruby" ]
145 then
146 build_ruby_bindings
147 echo "Build successful; $RB_SHARED_NAME is in ruby/$arch"
148 elif [ $1 == "clean" ]
149 then
150 rm -f bin/ohcount
151 rm -f test/unit/run_tests
152 rm -f src/parser.o
153 rm -f src/parsers/*.h
154 rm -f src/hash/*.h
155 rm -f src/hash/*.c
156 rm -f src/$SHARED_NAME
157 rm -f ruby/$RB_SHARED_NAME
158 rm -rf ruby/`ruby -rmkmf -e 'print RbConfig::expand(RbConfig::CONFIG["arch"])'`/*
159 else
160 echo "Usage: build [all|ohcount|shared|tests|ruby|clean]"
161 fi
0 ohcount (3.0.0-8.3) unstable; urgency=medium
1
2 * Non-maintainer upload.
3 * Remove ruby files from ohcount-doc, which only get installed
4 when building for Arch: all. (Closes: #818239)
5
6 -- Christian Hofstaedtler <zeha@debian.org> Thu, 22 Dec 2016 06:54:28 +0000
7
8 ohcount (3.0.0-8.2) unstable; urgency=medium
9
10 * Non-maintainer upload.
11 * Source-ful, binary-less upload to get rid of /ruby directory.
12 Probably somehow my fault? (Closes: #818239)
13
14 -- Christian Hofstaedtler <zeha@debian.org> Sun, 17 Jul 2016 19:58:32 +0000
15
16 ohcount (3.0.0-8.1) unstable; urgency=medium
17
18 * Non-maintainer upload.
19 * Fix build system being broken by removal of deprecated "Config"
20 object in Ruby 2.2. (Closes: #805674)
21
22 -- Christian Hofstaedtler <zeha@debian.org> Tue, 01 Mar 2016 22:02:31 +0100
23
24 ohcount (3.0.0-8) unstable; urgency=low
25
26 * Remove the explicit dependency on ruby 1.8
27 Thanks to Jonas Genannt (Closes: #733724)
28 * Switch from cdbs to dh. Thanks to Jonas Genannt
29 * Standards-Version updated to 3.9.5
30
31 -- Sylvestre Ledru <sylvestre@debian.org> Thu, 16 Jan 2014 16:26:45 +0100
32
33 ohcount (3.0.0-7) unstable; urgency=low
34
35 * Standards-Version updated to 3.9.4
36 * libdifflcs-ruby dep renamed to ruby-diff-lcs (Closes: #707792)
37 * Remove Torsten from the uploaders (I have been DD for a while :)
38 * ACK NMU (thanks btw)
39
40 -- Sylvestre Ledru <sylvestre@debian.org> Sat, 11 May 2013 19:17:57 +0200
41
42 ohcount (3.0.0-6.1) unstable; urgency=low
43
44 * Non-maintainer upload.
45 * Add dependency on file (Closes: #677494).
46
47 -- Luk Claes <luk@debian.org> Wed, 04 Jul 2012 17:18:12 +0000
48
49 ohcount (3.0.0-6) unstable; urgency=low
50
51 * Update some issues with the documentation (Closes: #650685)
52
53 -- Sylvestre Ledru <sylvestre@debian.org> Sat, 10 Dec 2011 19:38:15 +0100
54
55 ohcount (3.0.0-5) unstable; urgency=low
56
57 * Oups. Plug back the patches
58 * Manpage of ohcount added
59
60 -- Sylvestre Ledru <sylvestre@debian.org> Fri, 30 Sep 2011 21:37:34 +0200
61
62 ohcount (3.0.0-4) unstable; urgency=low
63
64 * Support of the txx extension (considered as C++). Thanks to Sébastien Dinot
65 for the patch.
66 * Switch to dpkg-source 3.0 (quilt) format
67 * Standards-Version updated to version 3.9.2
68 * Fix debian-rules-uses-deprecated-makefile lintian warning
69
70 -- Sylvestre Ledru <sylvestre@debian.org> Mon, 26 Sep 2011 13:42:49 +0200
71
72 ohcount (3.0.0-3) unstable; urgency=low
73
74 * Standards-Version updated to version 3.9.1
75 * Fix a seg fault when checking lintian source code. Thanks to
76 Raphael Geissert for investigating for me. (Closes: #608837)
77 (LP: #605631)
78 * Fix lintian warning copyright-refers-to-deprecated-bsd-license-file
79
80 -- Sylvestre Ledru <sylvestre@debian.org> Sat, 15 Jan 2011 09:34:05 +0100
81
82 ohcount (3.0.0-2) unstable; urgency=low
83
84 * Missing dependency (Closes: #558491)
85
86 -- Sylvestre Ledru <sylvestre@debian.org> Sun, 29 Nov 2009 18:19:46 +0100
87
88 ohcount (3.0.0-1) unstable; urgency=low
89
90 * New upstream release
91 * New package ohcount-doc added
92 * Homepage updated
93 * Vcs-* added
94 * Many changes on the debian/rules due to a refactoring from upstream which
95 has been done (patches are now obsolete or upstream)
96 * Upstream has redeveloped ohcount with C (instead of ruby). (Closes: #542892)
97 * Update of the watch file
98 * Repack script updated (upstream has a .so)
99 * Standards-Version updated to version 3.8.3
100 * Change of my email address since I am now DD
101 * Standards-Version updated to 3.8.3
102 * DM-Upload-Allowed removed
103
104 -- Sylvestre Ledru <sylvestre@debian.org> Fri, 27 Nov 2009 11:22:21 +0100
105
106 ohcount (2.0.1-1) unstable; urgency=low
107
108 * Initial release (Closes: #523006)
109
110 -- Sylvestre Ledru <sylvestre.ledru@inria.fr> Tue, 07 Apr 2009 20:18:38 +0200
0 Source: ohcount
1 Section: utils
2 Priority: optional
3 Maintainer: Sylvestre Ledru <sylvestre@debian.org>
4 Build-Depends: debhelper (>= 7), libpcre3-dev, gem2deb, rake,
5 ragel (>= 6.3), ruby-diff-lcs, doxygen, gperf, file
6 Standards-Version: 3.9.5
7 Homepage: http://sourceforge.net/projects/ohcount/
8 Vcs-Svn: https://bollin.googlecode.com/svn/ohcount/trunk
9 Vcs-Browser: http://bollin.googlecode.com/svn/ohcount/trunk
10 XS-Ruby-Versions: all
11
12 Package: ohcount
13 XB-Ruby-Versions: ${ruby:Versions}
14 Architecture: any
15 Depends: ruby | ruby-interpreter, ${shlibs:Depends}, ${misc:Depends},
16 ruby-diff-lcs, file
17 Suggests: ohcount-doc
18 Description: Source code line counter
19 Ohcount supports over 70 popular programming languages.
20 Ohcount does more than just count lines of code. It can also detect
21 popular open source licenses such as GPL within a large directory of source
22 code. It can also detect code that targets a particular programming API,
23 such as Win32 or KDE.
24 Ohcount is the line counter which powers http://www.ohloh.net/
25 .
26
27 Package: ohcount-doc
28 Section: doc
29 Architecture: all
30 Depends: ${shlibs:Depends}, ${misc:Depends}
31 Description: Source code line counter - Documentation
32 Ohcount supports over 70 popular programming languages.
33 Ohcount does more than just count lines of code. It can also detect
34 popular open source licenses such as GPL within a large directory of source
35 code. It can also detect code that targets a particular programming API,
36 such as Win32 or KDE.
37 Ohcount is the line counter which powers http://www.ohloh.net/
38 .
39 This package contains the documentation.
0 This package was debianized by Sylvestre Ledru <sylvestre.ledru@inria.fr> on
1 Tue, 07 Apr 2009 20:18:38 +0200.
2
3 It was downloaded from <http://labs.ohloh.net/ohcount/>
4
5 Upstream Author:
6
7 Ohloh <info@ohloh.net>
8
9 Copyright:
10
11 Copyright (C) 2007-2009 Ohloh
12
13 License:
14
15 This program is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; either version 2 of the License, or
18 (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License along
26 with this program; if not, write to the Free Software Foundation, Inc.,
27 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28
29 The full text of the license can be found in
30 `/usr/share/common-licenses/GPL-2'.
31
32 The Debian packaging is (C) 2009, Sylvestre Ledru <sylvestre.ledru@inria.fr> and
33 is licensed under the GPL, see `/usr/share/common-licenses/GPL'.
34
35 ohcount incorporates some piece of code in order to test during build time
36 its capabilities. These files are not included in the binary.
37
38
39 Files: test/expected_dir/haxe1.hx test/src_dir/haxe1.hx
40
41 Copyright:
42 Thomas Pfeiffer - kiroukou
43 Niel Drummond
44
45 License:
46 Copyright the original author or authors.
47 Licensed under the MOZILLA PUBLIC LICENSE, Version 1.1 (the "License");
48 you may not use this file except in compliance with the License.
49 You may obtain a copy of the License at
50 http://www.mozilla.org/MPL/MPL-1.1.html
51 Unless required by applicable law or agreed to in writing, software
52 distributed under the License is distributed on an "AS IS" BASIS,
53 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
54 See the License for the specific language governing permissions and
55 limitations under the License.
56
57 Files: test/src_dir/pascal2.pp test/expected_dir/pascal2.pp
58
59 Copyright:
60 Simon Steele 1998-2000
61
62 License:
63 BSD
64 Redistribution and use in source and binary forms, with or without
65 modification, are permitted provided that the following conditions are
66 met:
67
68 * Redistributions of source code must retain the above copyright
69 notice, this list of conditions and the following disclaimer.
70 * Redistributions in binary form must reproduce the above copyright
71 notice, this list of conditions and the following disclaimer in the
72 documentation and/or other materials provided with the distribution.
73 * Neither the name of the <ORGANIZATION> nor the names of its
74 contributors may be used to endorse or promote products derived from
75 this software without specific prior written permission.
76
77 Files: test/src_dir/js1.js test/expected_dir/js1.js
78
79 Copyright:
80 2005-2008 Sam Stephenson
81
82 License:
83 MIT
84
85 Permission is hereby granted, free of charge, to any person obtaining a copy
86 of this software and associated documentation files (the "Software"), to deal
87 in the Software without restriction, including without limitation the rights
88 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
89 copies of the Software, and to permit persons to whom the Software is
90 furnished to do so, subject to the following conditions:
91
92 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
93 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
94 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
95 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
96 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
97 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
98 SOFTWARE.
99
100
101 Files: test/src_dir/foo.ebuild test/detect_files/foo.ebuild
102
103 License:
104 GPL-2
105
106 Files: test/src_dir/as1.as test/expected_dir/as1.as
107
108 Copyright:
109 Sean Chatman and Garrett Woodworth 2008
110
111 License:
112 MIT
113
114 Files: test/src_dir/perl_module.pm test/expected_dir/perl_module.pm
115
116 Copyright:
117 Audrey Tang 2003-2007
118
119 License:
120 This program is free software; you can redistribute it and/or modify it
121 under the same terms as Perl itself.
122
123 a) the GNU General Public License as published by the Free Software
124 Foundation; either version 1, or (at your option) any later
125 version, or
126
127 b) the "Artistic License" which comes with Perl.
128
129 On Debian GNU/Linux systems, the complete text of the GNU General
130 Public License can be found in `/usr/share/common-licenses/GPL' and
131 the Artistic Licence in `/usr/share/common-licenses/Artistic'.
0 Document: ohcount
1 Title: Debian ohcount Manual
2 Author: Ohloh
3 Abstract: ohcount manual
4 Section: Programming/Ruby
5
6 Format: HTML
7 Index: /usr/share/doc/ohcount-doc/index.html
8 Files: /usr/share/doc/ohcount-doc/*