Codebase list halibut / e209471
Disallow the zero Unicode character in all input. Halibut works internally with standard C-style null-terminated strings (or rather wide strings), so L'\0' appearing unexpectedly in the input can cause all kinds of havoc. It would be nice to redo all the string processing using (pointer, length) pairs and become robust against that, but I don't think it's realistic without a major rewrite. Zero characters have no actual use that I can see, so a simpler fix is to just outlaw them completely. This applies to a direct \0 appearing in the input file, and also to any sneaky attempts to enter one via \u0000. Simon Tatham 2 years ago
3 changed file(s) with 23 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
8282 do_error(NULL, "no data in input files");
8383 }
8484
85 void err_zerochar(errorstate *es, const filepos *fpos)
86 {
87 es->fatal = true;
88 do_error(fpos, "the Unicode zero character is not permitted in input");
89 }
90
8591 void err_brokencodepara(errorstate *es, const filepos *fpos)
8692 {
8793 es->fatal = true;
224224 void err_cantopen(errorstate *es, const char *sp);
225225 /* no data in input files */
226226 void err_nodata(errorstate *es);
227 /* unexpected zero character in input file */
228 void err_zerochar(errorstate *es, const filepos *fpos);
227229 /* line in codepara didn't begin `\c' */
228230 void err_brokencodepara(errorstate *es, const filepos *fpos);
229231 /* expected `}' after keyword */
169169 NULL, 0);
170170 assert(p == buf+1 && inlen == 0);
171171
172 for (int i = 0; i < in->nwc; i++) {
173 if (in->wc[i] == 0) {
174 /* The zero Unicode character is never legal */
175 err_zerochar(in->es, pos);
176 return EOF;
177 }
178 }
179
172180 in->wcpos = 0;
173181 }
174182 }
175183
176 return in->wc[in->wcpos++];
184 wchar_t wc = in->wc[in->wcpos++];
185
186 return wc;
177187
178188 } else
179189 return EOF;
15261536 }
15271537 case c_u:
15281538 uchr = t.aux;
1539 if (uchr == 0) {
1540 err_zerochar(in->es, &t.pos);
1541 break;
1542 }
15291543 utext[0] = uchr; utext[1] = 0;
15301544 wd.type = style;
15311545 wd.breaks = false;