Chore: make fuzzer produce minimal reproducible examples of bugs (#11700)
When the fuzzer for rules discovers a bug, it outputs source code that reproduces the bug. However, since the source code is autogenerated, it is often extremely complex and contains lots of parts that are irrelevant to the cause of the bug, making it tedious to figure out what the bug actually is.
This commit adds a "code sample minimizer" to the fuzzer that tries to remove irrelevant parts of the AST so that the resulting code sample is as small as possible while still reproducing the issue found by the fuzzer.
As a demonstration, the minimizer would have reduced the very large code sample from the fuzzing error in [this CI build](https://travis-ci.org/eslint/eslint/jobs/519526960) down to the following simplified code:
```js
($2 = $3) ?
$4 : ($5)
```
Teddy Katz authored 5 years ago
GitHub committed 5 years ago
121 | 121 | "nyc": "^13.3.0", |
122 | 122 | "proxyquire": "^2.0.1", |
123 | 123 | "puppeteer": "^1.14.0", |
124 | "recast": "^0.17.6", | |
124 | 125 | "shelljs": "^0.8.2", |
125 | 126 | "sinon": "^3.3.0", |
126 | 127 | "temp": "^0.9.0", |
0 | "use strict"; | |
1 | ||
2 | const { assert } = require("chai"); | |
3 | const reduceBadExampleSize = require("../../tools/code-sample-minimizer"); | |
4 | ||
5 | describe("reduceBadExampleSize()", () => { | |
6 | it("extracts relevant part of deeply nested code", () => { | |
7 | const initialCode = ` | |
8 | if (true) { | |
9 | while (false) { | |
10 | for (let i = 1; i < 10; i++) { | |
11 | let j = foo | |
12 | ? bar | |
13 | : THIS_EXPRESSION_CAUSES_A_BUG | |
14 | } | |
15 | } | |
16 | } | |
17 | `; | |
18 | ||
19 | const expectedFinalCode = "THIS_EXPRESSION_CAUSES_A_BUG"; | |
20 | ||
21 | assert.strictEqual( | |
22 | reduceBadExampleSize({ | |
23 | sourceText: initialCode, | |
24 | predicate: code => code.includes("THIS_EXPRESSION_CAUSES_A_BUG") | |
25 | }), | |
26 | expectedFinalCode | |
27 | ); | |
28 | }); | |
29 | ||
30 | it("removes irrelevant parts of AST nodes with many children", () => { | |
31 | const initialCode = ` | |
32 | foo; | |
33 | bar; | |
34 | baz; | |
35 | let x = [ | |
36 | 1, | |
37 | 2, | |
38 | , | |
39 | 3, | |
40 | THIS_EXPRESSION_CAUSES_A_BUG, | |
41 | 4 | |
42 | ] | |
43 | quux; | |
44 | `; | |
45 | ||
46 | const expectedFinalCode = "THIS_EXPRESSION_CAUSES_A_BUG"; | |
47 | ||
48 | assert.strictEqual( | |
49 | reduceBadExampleSize({ | |
50 | sourceText: initialCode, | |
51 | predicate: code => code.includes("THIS_EXPRESSION_CAUSES_A_BUG") | |
52 | }), | |
53 | expectedFinalCode | |
54 | ); | |
55 | }); | |
56 | ||
57 | it("removes irrelevant comments from the source code", () => { | |
58 | const initialCode = ` | |
59 | var /* aaa */foo = bar; | |
60 | `; | |
61 | ||
62 | const expectedFinalCode = "var foo = bar;"; | |
63 | ||
64 | assert.strictEqual( | |
65 | reduceBadExampleSize({ | |
66 | sourceText: initialCode, | |
67 | predicate: code => code.includes("var") && code.includes("foo = bar") | |
68 | }), | |
69 | expectedFinalCode | |
70 | ); | |
71 | }); | |
72 | }); |
43 | 43 | describe("when running in crash-only mode", () => { |
44 | 44 | describe("when a rule crashes on the given input", () => { |
45 | 45 | it("should report the crash with a minimal config", () => { |
46 | fakeRule = () => ({ | |
46 | fakeRule = context => ({ | |
47 | 47 | Program() { |
48 | throw CRASH_BUG; | |
48 | if (context.getSourceCode().text === "foo") { | |
49 | throw CRASH_BUG; | |
50 | } | |
49 | 51 | } |
50 | 52 | }); |
51 | 53 | |
80 | 82 | |
81 | 83 | describe("when a rule crashes on the given input", () => { |
82 | 84 | it("should report the crash with a minimal config", () => { |
83 | fakeRule = () => ({ | |
85 | fakeRule = context => ({ | |
84 | 86 | Program() { |
85 | throw CRASH_BUG; | |
87 | if (context.getSourceCode().text === "foo") { | |
88 | throw CRASH_BUG; | |
89 | } | |
86 | 90 | } |
87 | 91 | }); |
88 | 92 | |
102 | 106 | // Replaces programs that start with "foo" with "bar" |
103 | 107 | fakeRule = context => ({ |
104 | 108 | Program(node) { |
105 | if (context.getSourceCode().text.startsWith("foo")) { | |
109 | if (context.getSourceCode().text === `foo ${disableFixableRulesComment}`) { | |
106 | 110 | context.report({ |
107 | 111 | node, |
108 | 112 | message: "no foos allowed", |
136 | 140 | Program(node) { |
137 | 141 | const sourceCode = context.getSourceCode(); |
138 | 142 | |
139 | if (sourceCode.text.startsWith("foo")) { | |
143 | if (sourceCode.text === `foo ${disableFixableRulesComment}`) { | |
140 | 144 | context.report({ |
141 | 145 | node, |
142 | 146 | message: "no foos allowed", |
177 | 181 | Program(node) { |
178 | 182 | const sourceCode = context.getSourceCode(); |
179 | 183 | |
180 | if (sourceCode.text.startsWith("foo") || sourceCode.text.startsWith("bar")) { | |
184 | if (sourceCode.text.startsWith("foo") || sourceCode.text === intermediateCode) { | |
181 | 185 | context.report({ |
182 | 186 | node, |
183 | 187 | message: "no foos allowed", |
228 | 232 | message: "no foos allowed", |
229 | 233 | fix: fixer => fixer.replaceText(node, "bar") |
230 | 234 | }); |
231 | } else if (sourceCode.text.startsWith("bar")) { | |
235 | } else if (sourceCode.text === `bar ${disableFixableRulesComment}`) { | |
232 | 236 | throw CRASH_BUG; |
233 | 237 | } |
234 | 238 | } |
244 | 248 | assert.strictEqual(results.length, 1); |
245 | 249 | assert.strictEqual(results[0].type, "crash"); |
246 | 250 | |
247 | // TODO: (not-an-aardvark) It might be more useful to output the intermediate code here. | |
248 | assert.strictEqual(results[0].text, `foo ${disableFixableRulesComment}`); | |
251 | assert.strictEqual(results[0].text, `bar ${disableFixableRulesComment}`); | |
249 | 252 | assert.deepStrictEqual(results[0].config.rules, { "test-fuzzer-rule": 2 }); |
250 | 253 | assert.strictEqual(results[0].error, CRASH_BUG.stack); |
251 | 254 | }); |
0 | "use strict"; | |
1 | ||
2 | const evk = require("eslint-visitor-keys"); | |
3 | const recast = require("recast"); | |
4 | const espree = require("espree"); | |
5 | const assert = require("assert"); | |
6 | ||
7 | /** | |
8 | * Determines whether an AST node could be an expression, based on the type | |
9 | * @param {ASTNode} node The node | |
10 | * @returns {boolean} `true` if the node could be an expression | |
11 | */ | |
12 | function isMaybeExpression(node) { | |
13 | return node.type.endsWith("Expression") || | |
14 | node.type === "Identifier" || | |
15 | node.type === "MetaProperty" || | |
16 | node.type.endsWith("Literal"); | |
17 | } | |
18 | ||
19 | /** | |
20 | * Determines whether an AST node is a statement | |
21 | * @param {ASTNode} node The node | |
22 | * @returns {boolean} `true` if the node is a statement | |
23 | */ | |
24 | function isStatement(node) { | |
25 | return node.type.endsWith("Statement") || node.type.endsWith("Declaration"); | |
26 | } | |
27 | ||
28 | /** | |
29 | * Given "bad" source text (e.g. an code sample that causes a rule to crash), tries to return a smaller | |
30 | * piece of source text which is also "bad", to make it easier for a human to figure out where the | |
31 | * problem is. | |
32 | * @param {string} options.sourceText Initial piece of "bad" source text | |
33 | * @param {function(string): boolean} options.predicate A predicate that returns `true` for bad source text and `false` for good source text | |
34 | * @param {Parser} [options.parser] The parser used to parse the source text. Defaults to a modified | |
35 | * version of espree that uses recent parser options. | |
36 | * @param {Object} [options.visitorKeys] The visitor keys of the AST. Defaults to eslint-visitor-keys. | |
37 | * @returns {string} Another piece of "bad" source text, which may or may not be smaller than the original source text. | |
38 | */ | |
39 | function reduceBadExampleSize({ | |
40 | sourceText, | |
41 | predicate, | |
42 | parser = { | |
43 | parse: (code, options) => | |
44 | espree.parse(code, { | |
45 | ...options, | |
46 | loc: true, | |
47 | range: true, | |
48 | raw: true, | |
49 | tokens: true, | |
50 | comment: true, | |
51 | eslintVisitorKeys: true, | |
52 | eslintScopeManager: true, | |
53 | ecmaVersion: 2018, | |
54 | sourceType: "script" | |
55 | }) | |
56 | }, | |
57 | visitorKeys = evk.KEYS | |
58 | }) { | |
59 | let counter = 0; | |
60 | ||
61 | /** | |
62 | * Returns a new unique identifier | |
63 | * @returns {string} A name for a new identifier | |
64 | */ | |
65 | function generateNewIdentifierName() { | |
66 | return `$${(counter++)}`; | |
67 | } | |
68 | ||
69 | /** | |
70 | * Determines whether a source text sample is "bad" | |
71 | * @param {string} updatedSourceText The sample | |
72 | * @returns {boolean} `true` if the sample is "bad" | |
73 | */ | |
74 | function reproducesBadCase(updatedSourceText) { | |
75 | try { | |
76 | parser.parse(updatedSourceText); | |
77 | } catch (err) { | |
78 | return false; | |
79 | } | |
80 | ||
81 | return predicate(updatedSourceText); | |
82 | } | |
83 | ||
84 | assert(reproducesBadCase(sourceText), "Original source text should reproduce issue"); | |
85 | const parseResult = recast.parse(sourceText, { parser }); | |
86 | ||
87 | /** | |
88 | * Recursively removes descendant subtrees of the given AST node and replaces | |
89 | * them with simplified variants to produce a simplified AST which is still considered "bad". | |
90 | * @param {ASTNode} node An AST node to prune. May be mutated by this call, but the | |
91 | * resulting AST will still produce "bad" source code. | |
92 | * @returns {void} | |
93 | */ | |
94 | function pruneIrrelevantSubtrees(node) { | |
95 | for (const key of visitorKeys[node.type]) { | |
96 | if (Array.isArray(node[key])) { | |
97 | for (let index = node[key].length - 1; index >= 0; index--) { | |
98 | const [childNode] = node[key].splice(index, 1); | |
99 | ||
100 | if (!reproducesBadCase(recast.print(parseResult).code)) { | |
101 | node[key].splice(index, 0, childNode); | |
102 | if (childNode) { | |
103 | pruneIrrelevantSubtrees(childNode); | |
104 | } | |
105 | } | |
106 | } | |
107 | } else if (typeof node[key] === "object" && node[key] !== null) { | |
108 | ||
109 | const childNode = node[key]; | |
110 | ||
111 | if (isMaybeExpression(childNode)) { | |
112 | node[key] = { type: "Identifier", name: generateNewIdentifierName(), range: childNode.range }; | |
113 | if (!reproducesBadCase(recast.print(parseResult).code)) { | |
114 | node[key] = childNode; | |
115 | pruneIrrelevantSubtrees(childNode); | |
116 | } | |
117 | } else if (isStatement(childNode)) { | |
118 | node[key] = { type: "EmptyStatement", range: childNode.range }; | |
119 | if (!reproducesBadCase(recast.print(parseResult).code)) { | |
120 | node[key] = childNode; | |
121 | pruneIrrelevantSubtrees(childNode); | |
122 | } | |
123 | } | |
124 | } | |
125 | } | |
126 | } | |
127 | ||
128 | /** | |
129 | * Recursively tries to extract a descendant node from the AST that is "bad" on its own | |
130 | * @param {ASTNode} node A node which produces "bad" source code | |
131 | * @returns {ASTNode} A descendent of `node` which is also bad | |
132 | */ | |
133 | function extractRelevantChild(node) { | |
134 | const childNodes = [].concat( | |
135 | ...visitorKeys[node.type] | |
136 | .map(key => (Array.isArray(node[key]) ? node[key] : [node[key]])) | |
137 | ); | |
138 | ||
139 | for (const childNode of childNodes) { | |
140 | if (!childNode) { | |
141 | continue; | |
142 | } | |
143 | ||
144 | if (isMaybeExpression(childNode)) { | |
145 | if (reproducesBadCase(recast.print(childNode).code)) { | |
146 | return extractRelevantChild(childNode); | |
147 | } | |
148 | ||
149 | } else if (isStatement(childNode)) { | |
150 | if (reproducesBadCase(recast.print(childNode).code)) { | |
151 | return extractRelevantChild(childNode); | |
152 | } | |
153 | } else { | |
154 | const childResult = extractRelevantChild(childNode); | |
155 | ||
156 | if (reproducesBadCase(recast.print(childResult).code)) { | |
157 | return childResult; | |
158 | } | |
159 | } | |
160 | } | |
161 | return node; | |
162 | } | |
163 | ||
164 | /** | |
165 | * Removes and simplifies comments from the source text | |
166 | * @param {string} text A piece of "bad" source text | |
167 | * @returns {string} A piece of "bad" source text with fewer and/or simpler comments. | |
168 | */ | |
169 | function removeIrrelevantComments(text) { | |
170 | const ast = parser.parse(text); | |
171 | ||
172 | if (ast.comments) { | |
173 | for (const comment of ast.comments) { | |
174 | for (const potentialSimplification of [ | |
175 | ||
176 | // Try deleting the comment | |
177 | `${text.slice(0, comment.range[0])}${text.slice(comment.range[1])}`, | |
178 | ||
179 | // Try replacing the comment with a space | |
180 | `${text.slice(0, comment.range[0])} ${text.slice(comment.range[1])}`, | |
181 | ||
182 | // Try deleting the contents of the comment | |
183 | text.slice(0, comment.range[0] + 2) + text.slice(comment.type === "Block" ? comment.range[1] - 2 : comment.range[1]) | |
184 | ]) { | |
185 | if (reproducesBadCase(potentialSimplification)) { | |
186 | return removeIrrelevantComments(potentialSimplification); | |
187 | } | |
188 | } | |
189 | } | |
190 | } | |
191 | ||
192 | return text; | |
193 | } | |
194 | ||
195 | pruneIrrelevantSubtrees(parseResult.program); | |
196 | const relevantChild = recast.print(extractRelevantChild(parseResult.program)).code; | |
197 | ||
198 | assert(reproducesBadCase(relevantChild), "Extracted relevant source text should reproduce issue"); | |
199 | const result = removeIrrelevantComments(relevantChild); | |
200 | ||
201 | assert(reproducesBadCase(result), "Source text with irrelevant comments removed should reproduce issue"); | |
202 | return result; | |
203 | } | |
204 | ||
205 | module.exports = reduceBadExampleSize; |
11 | 11 | const assert = require("assert"); |
12 | 12 | const lodash = require("lodash"); |
13 | 13 | const eslump = require("eslump"); |
14 | const espree = require("espree"); | |
14 | 15 | const SourceCodeFixer = require("../lib/util/source-code-fixer"); |
15 | 16 | const ruleConfigs = require("../lib/config/config-rule").createCoreRuleConfigs(); |
17 | const sampleMinimizer = require("./code-sample-minimizer"); | |
16 | 18 | |
17 | 19 | //------------------------------------------------------------------------------ |
18 | 20 | // Public API |
80 | 82 | * @returns {string} A possibly-modified version of originalText that results in the same syntax error or crash after only one pass |
81 | 83 | */ |
82 | 84 | function isolateBadAutofixPass(originalText, config) { |
83 | let lastGoodText = originalText; | |
85 | let previousText = originalText; | |
84 | 86 | let currentText = originalText; |
85 | 87 | |
86 | 88 | do { |
89 | 91 | try { |
90 | 92 | messages = linter.verify(currentText, config); |
91 | 93 | } catch (err) { |
92 | return lastGoodText; | |
94 | return currentText; | |
93 | 95 | } |
94 | 96 | |
95 | 97 | if (messages.length === 1 && messages[0].fatal) { |
96 | return lastGoodText; | |
98 | return previousText; | |
97 | 99 | } |
98 | 100 | |
99 | lastGoodText = currentText; | |
101 | previousText = currentText; | |
100 | 102 | currentText = SourceCodeFixer.applyFixes(currentText, messages).output; |
101 | } while (lastGoodText !== currentText); | |
103 | } while (previousText !== currentText); | |
102 | 104 | |
103 | return lastGoodText; | |
105 | return currentText; | |
104 | 106 | } |
105 | 107 | |
106 | 108 | const problems = []; |
109 | ||
110 | /** | |
111 | * Creates a version of espree that always runs with the specified options | |
112 | * @param {ConfigData} config The config used | |
113 | * @returns {Parser} a parser | |
114 | */ | |
115 | function getParser({ parserOptions }) { | |
116 | return sourceText => espree.parse(sourceText, { | |
117 | ...parserOptions, | |
118 | loc: true, | |
119 | range: true, | |
120 | raw: true, | |
121 | tokens: true, | |
122 | comment: true | |
123 | }); | |
124 | } | |
107 | 125 | |
108 | 126 | for (let i = 0; i < options.count; progressCallback(problems.length), i++) { |
109 | 127 | const sourceType = lodash.sample(["script", "module"]); |
122 | 140 | linter.verify(text, config); |
123 | 141 | } |
124 | 142 | } catch (err) { |
125 | problems.push({ type: "crash", text, config: isolateBadConfig(text, config, "crash"), error: err.stack }); | |
143 | const lastGoodText = checkAutofixes ? isolateBadAutofixPass(text, config) : text; | |
144 | const smallConfig = isolateBadConfig(lastGoodText, config, "crash"); | |
145 | const smallText = sampleMinimizer({ | |
146 | sourceText: lastGoodText, | |
147 | parser: { parse: getParser(smallConfig) }, | |
148 | predicate(reducedText) { | |
149 | try { | |
150 | linter.verify(reducedText, smallConfig); | |
151 | return false; | |
152 | } catch (_) { | |
153 | return true; | |
154 | } | |
155 | } | |
156 | }); | |
157 | ||
158 | problems.push({ type: "crash", text: smallText, config: smallConfig, error: err.stack }); | |
159 | ||
126 | 160 | continue; |
127 | 161 | } |
128 | 162 | |
129 | 163 | if (checkAutofixes && autofixResult.fixed && autofixResult.messages.length === 1 && autofixResult.messages[0].fatal) { |
130 | 164 | const lastGoodText = isolateBadAutofixPass(text, config); |
165 | const smallConfig = isolateBadConfig(lastGoodText, config, "autofix"); | |
166 | const smallText = sampleMinimizer({ | |
167 | sourceText: lastGoodText, | |
168 | parser: { parse: getParser(smallConfig) }, | |
169 | predicate(reducedText) { | |
170 | try { | |
171 | const smallFixResult = linter.verifyAndFix(reducedText, smallConfig); | |
131 | 172 | |
132 | problems.push({ type: "autofix", text: lastGoodText, config: isolateBadConfig(lastGoodText, config, "autofix"), error: autofixResult.messages[0] }); | |
173 | return smallFixResult.fixed && smallFixResult.messages.length === 1 && smallFixResult.messages[0].fatal; | |
174 | } catch (_) { | |
175 | return false; | |
176 | } | |
177 | } | |
178 | }); | |
179 | ||
180 | problems.push({ | |
181 | type: "autofix", | |
182 | text: smallText, | |
183 | config: smallConfig, | |
184 | error: autofixResult.messages[0] | |
185 | }); | |
133 | 186 | } |
134 | 187 | } |
135 | 188 |