Codebase list apache-opennlp / cf86a31
New upstream version 2.1.0 Andrius Merkys 1 year, 5 months ago
270 changed file(s) with 7677 addition(s) and 6740 deletion(s). Raw diff Collapse all Expand all
00 Apache OpenNLP
1 Copyright 2021 The Apache Software Foundation
1 Copyright 2021-2022 The Apache Software Foundation
22
33 This product includes software developed at
44 The Apache Software Foundation (http://www.apache.org/).
1616 <parent>
1717 <groupId>org.apache.opennlp</groupId>
1818 <artifactId>opennlp</artifactId>
19 <version>2.0.0</version>
19 <version>2.1.0</version>
2020 <relativePath>../pom.xml</relativePath>
2121 </parent>
2222
4848 </dependency>
4949
5050 <dependency>
51 <groupId>junit</groupId>
52 <artifactId>junit</artifactId>
51 <groupId>org.junit.jupiter</groupId>
52 <artifactId>junit-jupiter-api</artifactId>
53 <scope>test</scope>
54 </dependency>
55
56 <dependency>
57 <groupId>org.junit.jupiter</groupId>
58 <artifactId>junit-jupiter-engine</artifactId>
5359 <scope>test</scope>
5460 </dependency>
5561 </dependencies>
2323 <parent>
2424 <groupId>org.apache.opennlp</groupId>
2525 <artifactId>opennlp</artifactId>
26 <version>2.0.0</version>
26 <version>2.1.0</version>
2727 <relativePath>../pom.xml</relativePath>
2828 </parent>
2929
00 Apache OpenNLP
1 Copyright 2017 The Apache Software Foundation
1 Copyright 2017-2022 The Apache Software Foundation
22
33 This product includes software developed at
44 The Apache Software Foundation (http://www.apache.org/).
2323 <parent>
2424 <groupId>org.apache.opennlp</groupId>
2525 <artifactId>opennlp</artifactId>
26 <version>2.0.0</version>
26 <version>2.1.0</version>
2727 <relativePath>../pom.xml</relativePath>
2828 </parent>
2929 <groupId>org.apache.opennlp</groupId>
3737 </dependency>
3838 <dependency>
3939 <groupId>com.microsoft.onnxruntime</groupId>
40 <artifactId>onnxruntime</artifactId>
40 <!-- This dependency supports CPU and GPU -->
41 <artifactId>onnxruntime_gpu</artifactId>
4142 <version>${onnxruntime.version}</version>
4243 </dependency>
4344 <dependency>
44 <groupId>junit</groupId>
45 <artifactId>junit</artifactId>
46 <version>${junit.version}</version>
45 <groupId>org.junit.jupiter</groupId>
46 <artifactId>junit-jupiter-api</artifactId>
47 <scope>test</scope>
48 </dependency>
49 <dependency>
50 <groupId>org.junit.jupiter</groupId>
51 <artifactId>junit-jupiter-engine</artifactId>
4752 <scope>test</scope>
4853 </dependency>
4954 </dependencies>
+0
-191
opennlp-dl/src/main/java/opennlp/dl/Inference.java less more
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.dl;
18
19 import java.io.BufferedReader;
20 import java.io.File;
21 import java.io.FileReader;
22 import java.io.IOException;
23 import java.util.Arrays;
24 import java.util.HashMap;
25 import java.util.Map;
26 import java.util.stream.IntStream;
27
28 import ai.onnxruntime.OrtEnvironment;
29 import ai.onnxruntime.OrtException;
30 import ai.onnxruntime.OrtSession;
31
32 import opennlp.tools.tokenize.Tokenizer;
33 import opennlp.tools.tokenize.WordpieceTokenizer;
34
35 /**
36 * An abstract class used by OpenNLP implementations using ONNX models.
37 */
38 public abstract class Inference {
39
40 public static final String INPUT_IDS = "input_ids";
41 public static final String ATTENTION_MASK = "attention_mask";
42 public static final String TOKEN_TYPE_IDS = "token_type_ids";
43
44 protected final OrtEnvironment env;
45 protected final OrtSession session;
46
47 private final Tokenizer tokenizer;
48 private final Map<String, Integer> vocabulary;
49
50 public abstract double[][] infer(String input) throws Exception;
51
52 /**
53 * Instantiates a new inference class.
54 * @param model The ONNX model file.
55 * @param vocab The model's vocabulary file.
56 * @throws OrtException Thrown if the ONNX model cannot be loaded.
57 * @throws IOException Thrown if the ONNX model or vocabulary files cannot be opened or read.
58 */
59 public Inference(File model, File vocab) throws OrtException, IOException {
60
61 this.env = OrtEnvironment.getEnvironment();
62 this.session = env.createSession(model.getPath(), new OrtSession.SessionOptions());
63 this.vocabulary = loadVocab(vocab);
64 this.tokenizer = new WordpieceTokenizer(vocabulary.keySet());
65
66 }
67
68 /**
69 * Tokenize the input text using the {@link WordpieceTokenizer}.
70 * @param text The input text.
71 * @return The input text's {@link Tokens}.
72 */
73 public Tokens tokenize(String text) {
74
75 final String[] tokens = tokenizer.tokenize(text);
76
77 final int[] ids = new int[tokens.length];
78
79 for (int x = 0; x < tokens.length; x++) {
80 ids[x] = vocabulary.get(tokens[x]);
81 }
82
83 final long[] lids = Arrays.stream(ids).mapToLong(i -> i).toArray();
84
85 final long[] mask = new long[ids.length];
86 Arrays.fill(mask, 1);
87
88 final long[] types = new long[ids.length];
89 Arrays.fill(types, 0);
90
91 return new Tokens(tokens, lids, mask, types);
92
93 }
94
95 /**
96 * Loads a vocabulary file from disk.
97 * @param vocab The vocabulary file.
98 * @return A map of vocabulary words to integer IDs.
99 * @throws IOException Thrown if the vocabulary file cannot be opened and read.
100 */
101 public Map<String, Integer> loadVocab(File vocab) throws IOException {
102
103 final Map<String, Integer> v = new HashMap<>();
104
105 BufferedReader br = new BufferedReader(new FileReader(vocab.getPath()));
106 String line = br.readLine();
107 int x = 0;
108
109 while (line != null) {
110
111 line = br.readLine();
112 x++;
113
114 v.put(line, x);
115
116 }
117
118 return v;
119
120 }
121
122 public static int maxIndex(double[] arr) {
123 return IntStream.range(0, arr.length)
124 .reduce((i, j) -> arr[i] > arr[j] ? i : j)
125 .orElse(-1);
126 }
127
128 /**
129 * Applies softmax to an array of values.
130 * @param input An array of values.
131 * @return The output array.
132 */
133 public double[] softmax(final double[] input) {
134
135 final double[] t = new double[input.length];
136 double sum = 0.0;
137
138 for (int x = 0; x < input.length; x++) {
139 double val = Math.exp(input[x]);
140 sum += val;
141 t[x] = val;
142 }
143
144 final double[] output = new double[input.length];
145
146 for (int x = 0; x < output.length; x++) {
147 output[x] = (float) (t[x] / sum);
148 }
149
150 return output;
151
152 }
153
154 /**
155 * Converts a two-dimensional float array to doubles.
156 * @param input The input array.
157 * @return The converted array.
158 */
159 public double[][] convertFloatsToDoubles(float[][] input) {
160
161 final double[][] outputs = new double[input.length][input[0].length];
162
163 for (int i = 0; i < input.length; i++) {
164 for (int j = 0; j < input[0].length; j++) {
165 outputs[i][j] = (double) input[i][j];
166 }
167 }
168
169 return outputs;
170
171 }
172
173 /**
174 * Converts a three-dimensional float array to doubles.
175 * @param input The input array.
176 * @return The converted array.
177 */
178 public double[] convertFloatsToDoubles(float[] input) {
179
180 final double[] output = new double[input.length];
181
182 for (int i = 0; i < input.length; i++) {
183 output[i] = input[i];
184 }
185
186 return output;
187
188 }
189
190 }
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.dl;
18
19 public class InferenceOptions {
20
21 private boolean includeAttentionMask = true;
22 private boolean includeTokenTypeIds = true;
23 private boolean gpu;
24 private int gpuDeviceId = 0;
25 private int documentSplitSize = 250;
26 private int splitOverlapSize = 50;
27
28 public boolean isIncludeAttentionMask() {
29 return includeAttentionMask;
30 }
31
32 public void setIncludeAttentionMask(boolean includeAttentionMask) {
33 this.includeAttentionMask = includeAttentionMask;
34 }
35
36 public boolean isIncludeTokenTypeIds() {
37 return includeTokenTypeIds;
38 }
39
40 public void setIncludeTokenTypeIds(boolean includeTokenTypeIds) {
41 this.includeTokenTypeIds = includeTokenTypeIds;
42 }
43
44 public boolean isGpu() {
45 return gpu;
46 }
47
48 public void setGpu(boolean gpu) {
49 this.gpu = gpu;
50 }
51
52 public int getGpuDeviceId() {
53 return gpuDeviceId;
54 }
55
56 public void setGpuDeviceId(int gpuDeviceId) {
57 this.gpuDeviceId = gpuDeviceId;
58 }
59
60 public int getDocumentSplitSize() {
61 return documentSplitSize;
62 }
63
64 public void setDocumentSplitSize(int documentSplitSize) {
65 this.documentSplitSize = documentSplitSize;
66 }
67
68 public int getSplitOverlapSize() {
69 return splitOverlapSize;
70 }
71
72 public void setSplitOverlapSize(int splitOverlapSize) {
73 this.splitOverlapSize = splitOverlapSize;
74 }
75
76 }
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.dl;
18
19 public class SpanEnd {
20
21 private int index;
22 private int characterEnd;
23
24 public SpanEnd(int index, int characterEnd) {
25 this.index = index;
26 this.characterEnd = characterEnd;
27 }
28
29 @Override
30 public String toString() {
31 return "index: " + index + "; character end: " + characterEnd;
32 }
33
34 public int getIndex() {
35 return index;
36 }
37
38 public int getCharacterEnd() {
39 return characterEnd;
40 }
41
42 }
1616
1717 package opennlp.dl.doccat;
1818
19 import java.io.BufferedReader;
1920 import java.io.File;
21 import java.io.FileReader;
22 import java.io.IOException;
23 import java.nio.LongBuffer;
24 import java.util.Arrays;
2025 import java.util.HashMap;
2126 import java.util.HashSet;
27 import java.util.LinkedList;
28 import java.util.List;
2229 import java.util.Map;
2330 import java.util.Set;
2431 import java.util.SortedMap;
2532 import java.util.TreeMap;
26
27 import opennlp.dl.Inference;
33 import java.util.stream.IntStream;
34
35 import ai.onnxruntime.OnnxTensor;
36 import ai.onnxruntime.OrtEnvironment;
37 import ai.onnxruntime.OrtException;
38 import ai.onnxruntime.OrtSession;
39
40 import opennlp.dl.InferenceOptions;
41 import opennlp.dl.Tokens;
42 import opennlp.dl.doccat.scoring.ClassificationScoringStrategy;
2843 import opennlp.tools.doccat.DocumentCategorizer;
44 import opennlp.tools.tokenize.Tokenizer;
45 import opennlp.tools.tokenize.WordpieceTokenizer;
2946
3047 /**
3148 * An implementation of {@link DocumentCategorizer} that performs document classification
3350 */
3451 public class DocumentCategorizerDL implements DocumentCategorizer {
3552
36 private final File model;
37 private final File vocab;
53 public static final String INPUT_IDS = "input_ids";
54 public static final String ATTENTION_MASK = "attention_mask";
55 public static final String TOKEN_TYPE_IDS = "token_type_ids";
56
57 private final Tokenizer tokenizer;
58 private final Map<String, Integer> vocabulary;
3859 private final Map<Integer, String> categories;
60 private final ClassificationScoringStrategy classificationScoringStrategy;
61 private final InferenceOptions inferenceOptions;
62 protected final OrtEnvironment env;
63 protected final OrtSession session;
3964
4065 /**
4166 * Creates a new document categorizer using ONNX models.
4267 * @param model The ONNX model file.
4368 * @param vocab The model's vocabulary file.
4469 * @param categories The categories.
70 * @param classificationScoringStrategy Implementation of {@link ClassificationScoringStrategy} used
71 * to calculate the classification scores given the score of each
72 * individual document part.
73 * @param inferenceOptions {@link InferenceOptions} to control the inference.
4574 */
46 public DocumentCategorizerDL(File model, File vocab, Map<Integer, String> categories) {
47
48 this.model = model;
49 this.vocab = vocab;
75 public DocumentCategorizerDL(File model, File vocab, Map<Integer, String> categories,
76 ClassificationScoringStrategy classificationScoringStrategy,
77 InferenceOptions inferenceOptions)
78 throws IOException, OrtException {
79
80 this.env = OrtEnvironment.getEnvironment();
81
82 final OrtSession.SessionOptions sessionOptions = new OrtSession.SessionOptions();
83 if (inferenceOptions.isGpu()) {
84 sessionOptions.addCUDA(inferenceOptions.getGpuDeviceId());
85 }
86
87 this.session = env.createSession(model.getPath(), sessionOptions);
88 this.vocabulary = loadVocab(vocab);
89 this.tokenizer = new WordpieceTokenizer(vocabulary.keySet());
5090 this.categories = categories;
91 this.classificationScoringStrategy = classificationScoringStrategy;
92 this.inferenceOptions = inferenceOptions;
5193
5294 }
5395
5698
5799 try {
58100
59 final DocumentCategorizerInference inference = new DocumentCategorizerInference(model, vocab);
60
61 final double[][] vectors = inference.infer(strings[0]);
62 final double[] results = inference.softmax(vectors[0]);
63
64 return results;
101 final List<Tokens> tokens = tokenize(strings[0]);
102
103 final List<double[]> scores = new LinkedList<>();
104
105 for (final Tokens t : tokens) {
106
107 final Map<String, OnnxTensor> inputs = new HashMap<>();
108
109 inputs.put(INPUT_IDS, OnnxTensor.createTensor(env,
110 LongBuffer.wrap(t.getIds()), new long[] {1, t.getIds().length}));
111
112 if (inferenceOptions.isIncludeAttentionMask()) {
113 inputs.put(ATTENTION_MASK, OnnxTensor.createTensor(env,
114 LongBuffer.wrap(t.getMask()), new long[] {1, t.getMask().length}));
115 }
116
117 if (inferenceOptions.isIncludeTokenTypeIds()) {
118 inputs.put(TOKEN_TYPE_IDS, OnnxTensor.createTensor(env,
119 LongBuffer.wrap(t.getTypes()), new long[] {1, t.getTypes().length}));
120 }
121
122 // The outputs from the model.
123 final float[][] v = (float[][]) session.run(inputs).get(0).getValue();
124
125 // Keep track of all scores.
126 final double[] categoryScoresForTokens = softmax(v[0]);
127 scores.add(categoryScoresForTokens);
128
129 }
130
131 return classificationScoringStrategy.score(scores);
65132
66133 } catch (Exception ex) {
67134 System.err.println("Unload to perform document classification inference: " + ex.getMessage());
78145
79146 @Override
80147 public String getBestCategory(double[] doubles) {
81 return categories.get(Inference.maxIndex(doubles));
148 return categories.get(maxIndex(doubles));
82149 }
83150
84151 @Override
152219
153220 }
154221
222 /**
223 * Loads a vocabulary file from disk.
224 * @param vocab The vocabulary file.
225 * @return A map of vocabulary words to integer IDs.
226 * @throws IOException Thrown if the vocabulary file cannot be opened and read.
227 */
228 private Map<String, Integer> loadVocab(File vocab) throws IOException {
229
230 final Map<String, Integer> v = new HashMap<>();
231
232 BufferedReader br = new BufferedReader(new FileReader(vocab.getPath()));
233 String line = br.readLine();
234 int x = 0;
235
236 while (line != null) {
237
238 line = br.readLine();
239 x++;
240
241 v.put(line, x);
242
243 }
244
245 return v;
246
247 }
248
249 private Tokens oldTokenize(String text) {
250
251 final String[] tokens = tokenizer.tokenize(text);
252
253 final int[] ids = new int[tokens.length];
254
255 for (int x = 0; x < tokens.length; x++) {
256 ids[x] = vocabulary.get(tokens[x]);
257 }
258
259 final long[] lids = Arrays.stream(ids).mapToLong(i -> i).toArray();
260
261 final long[] mask = new long[ids.length];
262 Arrays.fill(mask, 1);
263
264 final long[] types = new long[ids.length];
265 Arrays.fill(types, 0);
266
267 return new Tokens(tokens, lids, mask, types);
268
269 }
270
271 private List<Tokens> tokenize(final String text) {
272
273 final List<Tokens> t = new LinkedList<>();
274
275 // In this article as the paper suggests, we are going to segment the input into smaller text and feed
276 // each of them into BERT, it means for each row, we will split the text in order to have some
277 // smaller text (200 words long each)
278 // https://medium.com/analytics-vidhya/text-classification-with-bert-using-transformers-for-long-text-inputs-f54833994dfd
279
280 // Split the input text into 200 word chunks with 50 overlapping between chunks.
281 final String[] whitespaceTokenized = text.split("\\s+");
282
283 for (int start = 0; start < whitespaceTokenized.length;
284 start = start + inferenceOptions.getDocumentSplitSize()) {
285
286 // 200 word length chunk
287 // Check the end do don't go past and get a StringIndexOutOfBoundsException
288 int end = start + inferenceOptions.getDocumentSplitSize();
289 if (end > whitespaceTokenized.length) {
290 end = whitespaceTokenized.length;
291 }
292
293 // The group is that subsection of string.
294 final String group = String.join(" ", Arrays.copyOfRange(whitespaceTokenized, start, end));
295
296 // We want to overlap each chunk by 50 words so scoot back 50 words for the next iteration.
297 start = start - inferenceOptions.getSplitOverlapSize();
298
299 // Now we can tokenize the group and continue.
300 final String[] tokens = tokenizer.tokenize(group);
301
302 final int[] ids = new int[tokens.length];
303
304 for (int x = 0; x < tokens.length; x++) {
305 ids[x] = vocabulary.get(tokens[x]);
306 }
307
308 final long[] lids = Arrays.stream(ids).mapToLong(i -> i).toArray();
309
310 final long[] mask = new long[ids.length];
311 Arrays.fill(mask, 1);
312
313 final long[] types = new long[ids.length];
314 Arrays.fill(types, 0);
315
316 t.add(new Tokens(tokens, lids, mask, types));
317
318 }
319
320 return t;
321
322 }
323
324 /**
325 * Applies softmax to an array of values.
326 * @param input An array of values.
327 * @return The output array.
328 */
329 private double[] softmax(final float[] input) {
330
331 final double[] t = new double[input.length];
332 double sum = 0.0;
333
334 for (int x = 0; x < input.length; x++) {
335 double val = Math.exp(input[x]);
336 sum += val;
337 t[x] = val;
338 }
339
340 final double[] output = new double[input.length];
341
342 for (int x = 0; x < output.length; x++) {
343 output[x] = (float) (t[x] / sum);
344 }
345
346 return output;
347
348 }
349
350 private int maxIndex(double[] arr) {
351 return IntStream.range(0, arr.length)
352 .reduce((i, j) -> arr[i] > arr[j] ? i : j)
353 .orElse(-1);
354 }
355
155356 }
+0
-61
opennlp-dl/src/main/java/opennlp/dl/doccat/DocumentCategorizerInference.java less more
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.dl.doccat;
18
19 import java.io.File;
20 import java.nio.LongBuffer;
21 import java.util.HashMap;
22 import java.util.Map;
23
24 import ai.onnxruntime.OnnxTensor;
25
26 import opennlp.dl.Inference;
27 import opennlp.dl.Tokens;
28
29 public class DocumentCategorizerInference extends Inference {
30
31 private final Map<String, Integer> vocabulary;
32
33 public DocumentCategorizerInference(File model, File vocab) throws Exception {
34
35 super(model, vocab);
36
37 this.vocabulary = loadVocab(vocab);
38
39 }
40
41 @Override
42 public double[][] infer(String text) throws Exception {
43
44 final Tokens tokens = tokenize(text);
45
46 final Map<String, OnnxTensor> inputs = new HashMap<>();
47 inputs.put(INPUT_IDS, OnnxTensor.createTensor(env,
48 LongBuffer.wrap(tokens.getIds()), new long[]{1, tokens.getIds().length}));
49
50 inputs.put(ATTENTION_MASK, OnnxTensor.createTensor(env,
51 LongBuffer.wrap(tokens.getMask()), new long[]{1, tokens.getMask().length}));
52
53 inputs.put(TOKEN_TYPE_IDS, OnnxTensor.createTensor(env,
54 LongBuffer.wrap(tokens.getTypes()), new long[]{1, tokens.getTypes().length}));
55
56 return convertFloatsToDoubles((float[][]) session.run(inputs).get(0).getValue());
57
58 }
59
60 }
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.dl.doccat.scoring;
18
19 import java.util.List;
20
21 /**
22 * Calculates the document classification scores by averaging the scores for
23 * all individual parts of a document.
24 */
25 public class AverageClassificationScoringStrategy implements ClassificationScoringStrategy {
26
27 @Override
28 public double[] score(List<double[]> scores) {
29
30 final int values = scores.get(0).length;
31
32 final double[] averages = new double[values];
33
34 int j = 0;
35
36 for (int i = 0; i < values; i++) {
37
38 double sum = 0;
39
40 for (final double[] score : scores) {
41
42 sum += score[i];
43
44 }
45
46 averages[j++] = (sum / scores.size());
47
48 }
49
50 return averages;
51
52 }
53
54 }
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.dl.doccat.scoring;
18
19 import java.util.List;
20
21 /**
22 * Used to determine the scores of document classification. During classification,
23 * documents are separated into smaller parts. The classification is run on each part.
24 * Implementations of this interface are used to determine the score from the scores
25 * of each individual part.
26 */
27 public interface ClassificationScoringStrategy {
28
29 /**
30 * Calculate the final scores based on the scores of the individual document parts.
31 * @param scores The scores for each individual part of the document.
32 * @return The calculated scores.
33 */
34 double[] score(List<double[]> scores);
35
36 }
1616
1717 package opennlp.dl.namefinder;
1818
19 import java.io.BufferedReader;
1920 import java.io.File;
21 import java.io.FileReader;
22 import java.io.IOException;
23 import java.nio.LongBuffer;
2024 import java.util.Arrays;
25 import java.util.HashMap;
2126 import java.util.LinkedList;
2227 import java.util.List;
2328 import java.util.Map;
24
25 import opennlp.dl.Inference;
29 import java.util.regex.Matcher;
30 import java.util.regex.Pattern;
31
32 import ai.onnxruntime.OnnxTensor;
33 import ai.onnxruntime.OrtEnvironment;
34 import ai.onnxruntime.OrtException;
35 import ai.onnxruntime.OrtSession;
36
37 import opennlp.dl.InferenceOptions;
38 import opennlp.dl.SpanEnd;
39 import opennlp.dl.Tokens;
2640 import opennlp.tools.namefind.TokenNameFinder;
41 import opennlp.tools.tokenize.Tokenizer;
42 import opennlp.tools.tokenize.WordpieceTokenizer;
2743 import opennlp.tools.util.Span;
2844
2945 /**
3147 */
3248 public class NameFinderDL implements TokenNameFinder {
3349
50 public static final String INPUT_IDS = "input_ids";
51 public static final String ATTENTION_MASK = "attention_mask";
52 public static final String TOKEN_TYPE_IDS = "token_type_ids";
53
3454 public static final String I_PER = "I-PER";
3555 public static final String B_PER = "B-PER";
3656
37 private final TokenNameFinderInference inference;
57 protected final OrtSession session;
58
3859 private final Map<Integer, String> ids2Labels;
39
40 /**
41 * Creates a new NameFinderDL for entity recognition using ONNX models.
42 *
43 * @param model The ONNX model file.
44 * @param vocab The model's vocabulary file.
45 * @param doLowerCase Whether or not to lowercase the text prior to inference.
46 * @param ids2Labels A map of values and their assigned labels used to train the model.
47 * @throws Exception Thrown if the models cannot be loaded.
48 */
49 public NameFinderDL(File model, File vocab, boolean doLowerCase, Map<Integer, String> ids2Labels)
50 throws Exception {
51
60 private final Tokenizer tokenizer;
61 private final Map<String, Integer> vocab;
62 private final InferenceOptions inferenceOptions;
63 protected final OrtEnvironment env;
64
65 public NameFinderDL(File model, File vocabulary, Map<Integer, String> ids2Labels) throws Exception {
66
67 this(model, vocabulary, ids2Labels, new InferenceOptions());
68
69 }
70
71 public NameFinderDL(File model, File vocabulary, Map<Integer, String> ids2Labels,
72 InferenceOptions inferenceOptions) throws Exception {
73
74 this.env = OrtEnvironment.getEnvironment();
75
76 final OrtSession.SessionOptions sessionOptions = new OrtSession.SessionOptions();
77 if (inferenceOptions.isGpu()) {
78 sessionOptions.addCUDA(inferenceOptions.getGpuDeviceId());
79 }
80
81 this.session = env.createSession(model.getPath(), sessionOptions);
5282 this.ids2Labels = ids2Labels;
53 this.inference = new TokenNameFinderInference(model, vocab, doLowerCase);
83 this.vocab = loadVocab(vocabulary);
84 this.tokenizer = new WordpieceTokenizer(vocab.keySet());
85 this.inferenceOptions = inferenceOptions;
5486
5587 }
5688
5789 @Override
58 public Span[] find(String[] tokens) {
90 public Span[] find(String[] input) {
91
92
93
94 /**
95 * So, it looks like inference is being done on the wordpiece tokens but then
96 * spans are being created from the whitespace tokens.
97 */
5998
6099 final List<Span> spans = new LinkedList<>();
61 final String text = String.join(" ", tokens);
62
63 try {
64
65 final double[][] v = inference.infer(text);
66
67 // Find consecutive B-PER and I-PER labels and combine the spans where necessary.
68 // There are also B-LOC and I-LOC tags for locations that might be useful at some point.
69
70 // Keep track of where the last span was so when there are multiple/duplicate
71 // spans we can get the next one instead of the first one each time.
72 int characterStart = 0;
73
74 // We are looping over the vector for each word,
75 // finding the index of the array that has the maximum value,
76 // and then finding the token classification that corresponds to that index.
77 for (int x = 0; x < v.length; x++) {
78
79 final double[] arr = v[x];
80 final int maxIndex = Inference.maxIndex(arr);
81 final String label = ids2Labels.get(maxIndex);
82
83 final double probability = arr[maxIndex];
84
85 if (B_PER.equalsIgnoreCase(label)) {
86
87 // This is the start of a person entity.
88 final String spanText;
89
90 // Find the end index of the span in the array (where the label is not I-PER).
91 final int endIndex = findSpanEnd(v, x, ids2Labels);
92
93 // If the end is -1 it means this is a single-span token.
94 // If the end is != -1 it means this is a multi-span token.
95 if (endIndex != -1) {
96
97 // Subtract one for the beginning token not part of the text.
98 spanText = String.join(" ", Arrays.copyOfRange(tokens, x - 1, endIndex));
99
100 spans.add(new Span(x - 1, endIndex, spanText, probability));
101
102 x = endIndex;
103
104 } else {
105
106 // This is a single-token span so there is nothing else to do except grab the token.
107 spanText = tokens[x];
108
109 // Subtract one for the beginning token not part of the text.
110 spans.add(new Span(x - 1, endIndex, spanText, probability));
100
101 // Join the tokens here because they will be tokenized using Wordpiece during inference.
102 final String text = String.join(" ", input);
103
104 // The WordPiece tokenized text. This changes the spacing in the text.
105 final List<Tokens> wordpieceTokens = tokenize(text);
106
107 for (final Tokens tokens : wordpieceTokens) {
108
109 try {
110
111 // The inputs to the ONNX model.
112 final Map<String, OnnxTensor> inputs = new HashMap<>();
113 inputs.put(INPUT_IDS, OnnxTensor.createTensor(env, LongBuffer.wrap(tokens.getIds()),
114 new long[] {1, tokens.getIds().length}));
115
116 if (inferenceOptions.isIncludeAttentionMask()) {
117 inputs.put(ATTENTION_MASK, OnnxTensor.createTensor(env,
118 LongBuffer.wrap(tokens.getMask()), new long[] {1, tokens.getMask().length}));
119 }
120
121 if (inferenceOptions.isIncludeTokenTypeIds()) {
122 inputs.put(TOKEN_TYPE_IDS, OnnxTensor.createTensor(env,
123 LongBuffer.wrap(tokens.getTypes()), new long[] {1, tokens.getTypes().length}));
124 }
125
126 // The outputs from the model.
127 final float[][][] v = (float[][][]) session.run(inputs).get(0).getValue();
128
129 // Find consecutive B-PER and I-PER labels and combine the spans where necessary.
130 // There are also B-LOC and I-LOC tags for locations that might be useful at some point.
131
132 // Keep track of where the last span was so when there are multiple/duplicate
133 // spans we can get the next one instead of the first one each time.
134 int characterStart = 0;
135
136 // We are looping over the vector for each word,
137 // finding the index of the array that has the maximum value,
138 // and then finding the token classification that corresponds to that index.
139 for (int x = 0; x < v[0].length; x++) {
140
141 final float[] arr = v[0][x];
142 final int maxIndex = maxIndex(arr);
143 final String label = ids2Labels.get(maxIndex);
144
145 // TODO: Need to make sure this value is between 0 and 1?
146 // Can we do thresholding without it between 0 and 1?
147 final double confidence = arr[maxIndex]; // / 10;
148
149 // Show each token and its label per the model.
150 // System.out.println(tokens.getTokens()[x] + " : " + label);
151
152 // Is this is the start of a person entity.
153 if (B_PER.equals(label)) {
154
155 final String spanText;
156
157 // Find the end index of the span in the array (where the label is not I-PER).
158 final SpanEnd spanEnd = findSpanEnd(v, x, ids2Labels, tokens.getTokens());
159
160 // If the end is -1 it means this is a single-span token.
161 // If the end is != -1 it means this is a multi-span token.
162 if (spanEnd.getIndex() != -1) {
163
164 final StringBuilder sb = new StringBuilder();
165
166 // We have to concatenate the tokens.
167 // Add each token in the array and separate them with a space.
168 // We'll separate each with a single space because later we'll find the original span
169 // in the text and ignore spacing between individual tokens in findByRegex().
170 int end = spanEnd.getIndex();
171 for (int i = x; i <= end; i++) {
172
173 // If the next token starts with ##, combine it with this token.
174 if (tokens.getTokens()[i + 1].startsWith("##")) {
175
176 sb.append(tokens.getTokens()[i] + tokens.getTokens()[i + 1].replaceAll("##", ""));
177
178 // Append a space unless the next (next) token starts with ##.
179 if (!tokens.getTokens()[i + 2].startsWith("##")) {
180 sb.append(" ");
181 }
182
183 // Skip the next token since we just included it in this iteration.
184 i++;
185
186 } else {
187
188 sb.append(tokens.getTokens()[i].replaceAll("##", ""));
189
190 // Append a space unless the next token is a period.
191 if (!".".equals(tokens.getTokens()[i + 1])) {
192 sb.append(" ");
193 }
194
195 }
196
197 }
198
199 // This is the text of the span. We use the whole original input text and not one
200 // of the splits. This gives us accurate character positions.
201 spanText = findByRegex(text, sb.toString().trim()).trim();
202
203 } else {
204
205 // This is a single-token span so there is nothing else to do except grab the token.
206 spanText = tokens.getTokens()[x];
207
208 }
209
210 // This ignores other potential matches in the same sentence
211 // by only taking the first occurrence.
212 characterStart = text.indexOf(spanText, characterStart);
213 final int characterEnd = characterStart + spanText.length();
214
215 spans.add(new Span(characterStart, characterEnd, spanText, confidence));
216
217 characterStart = characterEnd;
111218
112219 }
113220
114221 }
115222
116 }
117
118 } catch (Exception ex) {
119 System.err.println("Error performing namefinder inference: " + ex.getMessage());
223 } catch (OrtException ex) {
224 throw new RuntimeException("Error performing namefinder inference: " + ex.getMessage(), ex);
225 }
226
120227 }
121228
122229 return spans.toArray(new Span[0]);
125232
126233 @Override
127234 public void clearAdaptiveData() {
128 // No use for this in this implementation.
129 }
130
131 private int findSpanEnd(double[][] v, int startIndex, Map<Integer, String> id2Labels) {
132
133 // This will be the index of the last token in the span.
235 // No use in this implementation.
236 }
237
238 private SpanEnd findSpanEnd(float[][][] v, int startIndex, Map<Integer, String> id2Labels,
239 String[] tokens) {
240
134241 // -1 means there is no follow-up token, so it is a single-token span.
135242 int index = -1;
243 int characterEnd = 0;
136244
137245 // Starts at the span start in the vector.
138246 // Looks at the next token to see if it is an I-PER.
142250 for (int x = startIndex + 1; x < v[0].length; x++) {
143251
144252 // Get the next item.
145 final double[] arr = v[x];
253 final float[] arr = v[0][x];
146254
147255 // See if the next token has an I-PER label.
148 final String nextTokenClassification = id2Labels.get(Inference.maxIndex(arr));
149
150 if (!I_PER.equalsIgnoreCase(nextTokenClassification)) {
256 final String nextTokenClassification = id2Labels.get(maxIndex(arr));
257
258 if (!I_PER.equals(nextTokenClassification)) {
151259 index = x - 1;
152260 break;
153261 }
154262
155263 }
156264
265 // Find where the span ends based on the tokens.
266 for (int x = 1; x <= index && x < tokens.length; x++) {
267 characterEnd += tokens[x].length();
268 }
269
270 // Account for the number of spaces (that is the number of tokens).
271 // (One space per token.)
272 characterEnd += index - 1;
273
274 return new SpanEnd(index, characterEnd);
275
276 }
277
278 private int maxIndex(float[] arr) {
279
280 double max = Float.NEGATIVE_INFINITY;
281 int index = -1;
282
283 for (int x = 0; x < arr.length; x++) {
284 if (arr[x] > max) {
285 index = x;
286 max = arr[x];
287 }
288 }
289
157290 return index;
158291
159292 }
160293
294 private static String findByRegex(String text, String span) {
295
296 final String regex = span
297 .replaceAll(" ", "\\\\s+")
298 .replaceAll("\\)", "\\\\)")
299 .replaceAll("\\(", "\\\\(");
300
301 final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
302 final Matcher matcher = pattern.matcher(text);
303
304 if (matcher.find()) {
305 return matcher.group(0);
306 }
307
308 // For some reason the regex match wasn't found. Just return the original span.
309 return span;
310
311 }
312
313 private List<Tokens> tokenize(final String text) {
314
315 final List<Tokens> t = new LinkedList<>();
316
317 // In this article as the paper suggests, we are going to segment the input into smaller text and feed
318 // each of them into BERT, it means for each row, we will split the text in order to have some
319 // smaller text (200 words long each)
320 // https://medium.com/analytics-vidhya/text-classification-with-bert-using-transformers-for-long-text-inputs-f54833994dfd
321
322 // Split the input text into 200 word chunks with 50 overlapping between chunks.
323 final String[] whitespaceTokenized = text.split("\\s+");
324
325 for (int start = 0; start < whitespaceTokenized.length;
326 start = start + inferenceOptions.getDocumentSplitSize()) {
327
328 // 200 word length chunk
329 // Check the end do don't go past and get a StringIndexOutOfBoundsException
330 int end = start + inferenceOptions.getDocumentSplitSize();
331 if (end > whitespaceTokenized.length) {
332 end = whitespaceTokenized.length;
333 }
334
335 // The group is that subsection of string.
336 final String group = String.join(" ", Arrays.copyOfRange(whitespaceTokenized, start, end));
337
338 // We want to overlap each chunk by 50 words so scoot back 50 words for the next iteration.
339 start = start - inferenceOptions.getSplitOverlapSize();
340
341 // Now we can tokenize the group and continue.
342 final String[] tokens = tokenizer.tokenize(group);
343
344 final int[] ids = new int[tokens.length];
345
346 for (int x = 0; x < tokens.length; x++) {
347 ids[x] = vocab.get(tokens[x]);
348 }
349
350 final long[] lids = Arrays.stream(ids).mapToLong(i -> i).toArray();
351
352 final long[] mask = new long[ids.length];
353 Arrays.fill(mask, 1);
354
355 final long[] types = new long[ids.length];
356 Arrays.fill(types, 0);
357
358 t.add(new Tokens(tokens, lids, mask, types));
359
360 }
361
362 return t;
363
364 }
365
366 /**
367 * Loads a vocabulary file from disk.
368 * @param vocab The vocabulary file.
369 * @return A map of vocabulary words to integer IDs.
370 * @throws IOException Thrown if the vocabulary file cannot be opened and read.
371 */
372 private Map<String, Integer> loadVocab(File vocab) throws IOException {
373
374 final Map<String, Integer> v = new HashMap<>();
375
376 try (final BufferedReader br = new BufferedReader(new FileReader(vocab.getPath()))) {
377
378 String line = br.readLine();
379 int x = 0;
380
381 while (line != null) {
382
383 line = br.readLine();
384 x++;
385
386 v.put(line, x);
387
388 }
389
390 }
391
392 return v;
393
394 }
395
161396 }
+0
-68
opennlp-dl/src/main/java/opennlp/dl/namefinder/TokenNameFinderInference.java less more
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.dl.namefinder;
18
19 import java.io.File;
20 import java.nio.LongBuffer;
21 import java.util.HashMap;
22 import java.util.Locale;
23 import java.util.Map;
24
25 import ai.onnxruntime.OnnxTensor;
26
27 import opennlp.dl.Inference;
28 import opennlp.dl.Tokens;
29
30 public class TokenNameFinderInference extends Inference {
31
32 private final boolean doLowerCase;
33
34 public TokenNameFinderInference(File model, File vocab, boolean doLowerCase) throws Exception {
35
36 super(model, vocab);
37
38 this.doLowerCase = doLowerCase;
39
40 }
41
42 @Override
43 public double[][] infer(String text) throws Exception {
44
45 if (doLowerCase) {
46 text = text.toLowerCase(Locale.ROOT);
47 }
48
49 final Tokens tokens = tokenize(text);
50
51 final Map<String, OnnxTensor> inputs = new HashMap<>();
52 inputs.put(INPUT_IDS, OnnxTensor.createTensor(env,
53 LongBuffer.wrap(tokens.getIds()), new long[]{1, tokens.getIds().length}));
54
55 inputs.put(ATTENTION_MASK, OnnxTensor.createTensor(env,
56 LongBuffer.wrap(tokens.getMask()), new long[]{1, tokens.getMask().length}));
57
58 inputs.put(TOKEN_TYPE_IDS, OnnxTensor.createTensor(env,
59 LongBuffer.wrap(tokens.getTypes()), new long[]{1, tokens.getTypes().length}));
60
61 final float[][][] v = (float[][][]) session.run(inputs).get(0).getValue();
62
63 return convertFloatsToDoubles(v[0]);
64
65 }
66
67 }
1717 package opennlp.dl.doccat;
1818
1919 import java.io.File;
20 import java.io.FileNotFoundException;
20 import java.io.IOException;
2121 import java.util.Arrays;
22 import java.util.Collections;
2223 import java.util.HashMap;
2324 import java.util.Map;
2425 import java.util.Set;
2526
26 import org.junit.Assert;
27 import org.junit.Test;
27 import ai.onnxruntime.OrtException;
28
29 import org.junit.jupiter.api.Assertions;
30 import org.junit.jupiter.api.Disabled;
31 import org.junit.jupiter.api.Test;
2832
2933 import opennlp.dl.AbstactDLTest;
34 import opennlp.dl.InferenceOptions;
35 import opennlp.dl.doccat.scoring.AverageClassificationScoringStrategy;
3036
3137 public class DocumentCategorizerDLEval extends AbstactDLTest {
3238
3339 @Test
34 public void categorize() throws FileNotFoundException {
35
36 // This test was written using the nlptown/bert-base-multilingual-uncased-sentiment model.
37 // You will need to update the assertions if you use a different model.
38
39 final File model = new File(getOpennlpDataDir(), "onnx/doccat/model.onnx");
40 final File vocab = new File(getOpennlpDataDir(), "onnx/doccat/vocab.txt");
41
42 final DocumentCategorizerDL documentCategorizerDL =
43 new DocumentCategorizerDL(model, vocab, getCategories());
40 public void categorize() throws IOException, OrtException {
41
42 final File model = new File(getOpennlpDataDir(),
43 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.onnx");
44 final File vocab = new File(getOpennlpDataDir(),
45 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.vocab");
46
47 final DocumentCategorizerDL documentCategorizerDL =
48 new DocumentCategorizerDL(model, vocab, getCategories(),
49 new AverageClassificationScoringStrategy(),
50 new InferenceOptions());
51
52 final String text = "We try hard to identify the sources and licenses of all media such as text, images" +
53 " or sounds used in our encyclopedia articles. Still, we cannot guarantee that all media are used " +
54 "or marked correctly: for example, if an image description page states that an image was in the " +
55 "public domain, you should still check yourself whether that claim appears correct and decide for " +
56 "yourself whether your use of the image would be fine under the laws applicable to you. Wikipedia " +
57 "is primarily subject to U.S. law; re-users outside the U.S. should be aware that they are subject " +
58 "to the laws of their country, which almost certainly are different. Images published under the " +
59 "GFDL or one of the Creative Commons Licenses are unlikely to pose problems, as these are specific " +
60 "licenses with precise terms worldwide. Public domain images may need to be re-evaluated by a " +
61 "re-user because it depends on each country's copyright laws what is in the public domain there. " +
62 "There is no guarantee that something in the public domain in the U.S. was also in the public " +
63 "domain in your country.";
64
65 final double[] result = documentCategorizerDL.categorize(new String[]{text});
66
67 // Sort the result for easier comparison.
68 final double[] sortedResult = Arrays.stream(result)
69 .boxed()
70 .sorted(Collections.reverseOrder()).mapToDouble(Double::doubleValue).toArray();
71
72 final double[] expected = new double[]
73 {0.3391093313694,
74 0.2611352801322937,
75 0.24420668184757233,
76 0.11939861625432968,
77 0.03615010157227516};
78
79 System.out.println("Actual: " + Arrays.toString(sortedResult));
80 System.out.println("Expected: " + Arrays.toString(expected));
81
82 Assertions.assertArrayEquals(expected, sortedResult, 0.0);
83 Assertions.assertEquals(5, result.length);
84
85 final String category = documentCategorizerDL.getBestCategory(result);
86 Assertions.assertEquals("bad", category);
87
88 }
89
90 @Disabled("This test will should only be run if a GPU device is present.")
91 @Test
92 public void categorizeWithGpu() throws Exception {
93
94 final File model = new File(getOpennlpDataDir(),
95 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.onnx");
96 final File vocab = new File(getOpennlpDataDir(),
97 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.vocab");
98
99 final InferenceOptions inferenceOptions = new InferenceOptions();
100 inferenceOptions.setGpu(true);
101 inferenceOptions.setGpuDeviceId(0);
102
103 final DocumentCategorizerDL documentCategorizerDL =
104 new DocumentCategorizerDL(model, vocab, getCategories(),
105 new AverageClassificationScoringStrategy(),
106 new InferenceOptions());
44107
45108 final double[] result = documentCategorizerDL.categorize(new String[]{"I am happy"});
46109 System.out.println(Arrays.toString(result));
47110
48111 final double[] expected = new double[]
49112 {0.007819971069693565,
50 0.006593209225684404,
51 0.04995147883892059,
52 0.3003573715686798,
53 0.6352779865264893};
54
55 Assert.assertTrue(Arrays.equals(expected, result));
56 Assert.assertEquals(5, result.length);
113 0.006593209225684404,
114 0.04995147883892059,
115 0.3003573715686798,
116 0.6352779865264893};
117
118 Assertions.assertArrayEquals(expected, result, 0.0);
119 Assertions.assertEquals(5, result.length);
57120
58121 final String category = documentCategorizerDL.getBestCategory(result);
59 Assert.assertEquals("very good", category);
60
61 }
62
63 @Test
64 public void scoreMap() throws FileNotFoundException {
65
66 // This test was written using the nlptown/bert-base-multilingual-uncased-sentiment model.
67 // You will need to update the assertions if you use a different model.
68
69 final File model = new File(getOpennlpDataDir(), "onnx/doccat/model.onnx");
70 final File vocab = new File(getOpennlpDataDir(), "onnx/doccat/vocab.txt");
71
72 final DocumentCategorizerDL documentCategorizerDL =
73 new DocumentCategorizerDL(model, vocab, getCategories());
122 Assertions.assertEquals("very good", category);
123
124 }
125
126 @Test
127 public void categorizeWithInferenceOptions() throws Exception {
128
129 final File model = new File(getOpennlpDataDir(),
130 "onnx/doccat/lvwerra_distilbert-imdb.onnx");
131 final File vocab = new File(getOpennlpDataDir(),
132 "onnx/doccat/lvwerra_distilbert-imdb.vocab");
133
134 final InferenceOptions inferenceOptions = new InferenceOptions();
135 inferenceOptions.setIncludeTokenTypeIds(false);
136
137 final Map<Integer, String> categories = new HashMap<>();
138 categories.put(0, "negative");
139 categories.put(1, "positive");
140
141 final DocumentCategorizerDL documentCategorizerDL =
142 new DocumentCategorizerDL(model, vocab, categories,
143 new AverageClassificationScoringStrategy(),
144 inferenceOptions);
145
146 final double[] result = documentCategorizerDL.categorize(new String[]{"I am angry"});
147
148 final double[] expected = new double[]{0.8851314783096313, 0.11486853659152985};
149
150 Assertions.assertArrayEquals(expected, result, 0.0);
151 Assertions.assertEquals(2, result.length);
152
153 final String category = documentCategorizerDL.getBestCategory(result);
154 Assertions.assertEquals("negative", category);
155
156 }
157
158 @Test
159 public void scoreMap() throws Exception {
160
161 final File model = new File(getOpennlpDataDir(),
162 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.onnx");
163 final File vocab = new File(getOpennlpDataDir(),
164 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.vocab");
165
166 final DocumentCategorizerDL documentCategorizerDL =
167 new DocumentCategorizerDL(model, vocab, getCategories(),
168 new AverageClassificationScoringStrategy(),
169 new InferenceOptions());
74170
75171 final Map<String, Double> result = documentCategorizerDL.scoreMap(new String[]{"I am happy"});
76172
77 Assert.assertEquals(0.6352779865264893, result.get("very good").doubleValue(), 0);
78 Assert.assertEquals(0.3003573715686798, result.get("good").doubleValue(), 0);
79 Assert.assertEquals(0.04995147883892059, result.get("neutral").doubleValue(), 0);
80 Assert.assertEquals(0.006593209225684404, result.get("bad").doubleValue(), 0);
81 Assert.assertEquals(0.007819971069693565, result.get("very bad").doubleValue(), 0);
82
83 }
84
85 @Test
86 public void sortedScoreMap() throws FileNotFoundException {
87
88 // This test was written using the nlptown/bert-base-multilingual-uncased-sentiment model.
89 // You will need to update the assertions if you use a different model.
90
91 final File model = new File(getOpennlpDataDir(), "onnx/doccat/model.onnx");
92 final File vocab = new File(getOpennlpDataDir(), "onnx/doccat/vocab.txt");
93
94 final DocumentCategorizerDL documentCategorizerDL =
95 new DocumentCategorizerDL(model, vocab, getCategories());
173 Assertions.assertEquals(0.6352779865264893, result.get("very good").doubleValue(), 0);
174 Assertions.assertEquals(0.3003573715686798, result.get("good").doubleValue(), 0);
175 Assertions.assertEquals(0.04995147883892059, result.get("neutral").doubleValue(), 0);
176 Assertions.assertEquals(0.006593209225684404, result.get("bad").doubleValue(), 0);
177 Assertions.assertEquals(0.007819971069693565, result.get("very bad").doubleValue(), 0);
178
179 }
180
181 @Test
182 public void sortedScoreMap() throws IOException, OrtException {
183
184 final File model = new File(getOpennlpDataDir(),
185 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.onnx");
186 final File vocab = new File(getOpennlpDataDir(),
187 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.vocab");
188
189 final DocumentCategorizerDL documentCategorizerDL =
190 new DocumentCategorizerDL(model, vocab, getCategories(),
191 new AverageClassificationScoringStrategy(),
192 new InferenceOptions());
96193
97194 final Map<Double, Set<String>> result = documentCategorizerDL.sortedScoreMap(new String[]{"I am happy"});
98195
99 Assert.assertEquals(result.get(0.6352779865264893).size(), 1);
100 Assert.assertEquals(result.get(0.3003573715686798).size(), 1);
101 Assert.assertEquals(result.get(0.04995147883892059).size(), 1);
102 Assert.assertEquals(result.get(0.006593209225684404).size(), 1);
103 Assert.assertEquals(result.get(0.007819971069693565).size(), 1);
104
105 }
106
107 @Test
108 public void doccat() throws FileNotFoundException {
109
110 // This test was written using the nlptown/bert-base-multilingual-uncased-sentiment model.
111 // You will need to update the assertions if you use a different model.
112
113 final File model = new File(getOpennlpDataDir(), "onnx/doccat/model.onnx");
114 final File vocab = new File(getOpennlpDataDir(), "onnx/doccat/vocab.txt");
115
116 final DocumentCategorizerDL documentCategorizerDL =
117 new DocumentCategorizerDL(model, vocab, getCategories());
196 Assertions.assertEquals(result.get(0.6352779865264893).size(), 1);
197 Assertions.assertEquals(result.get(0.3003573715686798).size(), 1);
198 Assertions.assertEquals(result.get(0.04995147883892059).size(), 1);
199 Assertions.assertEquals(result.get(0.006593209225684404).size(), 1);
200 Assertions.assertEquals(result.get(0.007819971069693565).size(), 1);
201
202 }
203
204 @Test
205 public void doccat() throws IOException, OrtException {
206
207 final File model = new File(getOpennlpDataDir(),
208 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.onnx");
209 final File vocab = new File(getOpennlpDataDir(),
210 "onnx/doccat/nlptown_bert-base-multilingual-uncased-sentiment.vocab");
211
212 final DocumentCategorizerDL documentCategorizerDL =
213 new DocumentCategorizerDL(model, vocab, getCategories(),
214 new AverageClassificationScoringStrategy(),
215 new InferenceOptions());
118216
119217 final int index = documentCategorizerDL.getIndex("bad");
120 Assert.assertEquals(1, index);
218 Assertions.assertEquals(1, index);
121219
122220 final String category = documentCategorizerDL.getCategory(3);
123 Assert.assertEquals("good", category);
221 Assertions.assertEquals("good", category);
124222
125223 final int number = documentCategorizerDL.getNumberOfCategories();
126 Assert.assertEquals(5, number);
224 Assertions.assertEquals(5, number);
127225
128226 }
129227
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.dl.doccat.scoring;
18
19 import java.util.LinkedList;
20 import java.util.List;
21
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
24
25 public class AverageClassificationScoringStrategyTest {
26
27 @Test
28 public void calculateAverage1() {
29
30 final List<double[]> scores = new LinkedList<>();
31 scores.add(new double[]{1, 2, 3, 4, 5});
32 scores.add(new double[]{1, 2, 3, 4, 5});
33 scores.add(new double[]{1, 2, 3, 4, 5});
34
35 final ClassificationScoringStrategy strategy = new AverageClassificationScoringStrategy();
36 final double[] results = strategy.score(scores);
37
38 Assertions.assertEquals(1.0, results[0], 0);
39 Assertions.assertEquals(2.0, results[1], 0);
40 Assertions.assertEquals(3.0, results[2], 0);
41 Assertions.assertEquals(4.0, results[3], 0);
42 Assertions.assertEquals(5.0, results[4], 0);
43
44 }
45
46 @Test
47 public void calculateAverage2() {
48
49 final List<double[]> scores = new LinkedList<>();
50 scores.add(new double[]{2, 1, 5});
51 scores.add(new double[]{4, 3, 10});
52 scores.add(new double[]{6, 5, 15});
53
54 final ClassificationScoringStrategy strategy = new AverageClassificationScoringStrategy();
55 final double[] results = strategy.score(scores);
56
57 Assertions.assertEquals(4.0, results[0], 0);
58 Assertions.assertEquals(3.0, results[1], 0);
59 Assertions.assertEquals(10.0, results[2], 0);
60
61 }
62
63 }
2222
2323 import ai.onnxruntime.OrtException;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.dl.AbstactDLTest;
29
3029 import opennlp.tools.util.Span;
3130
3231 public class NameFinderDLEval extends AbstactDLTest {
4342 final String[] tokens = new String[]
4443 {"George", "Washington", "was", "president", "of", "the", "United", "States", "."};
4544
46 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
47 final Span[] spans = nameFinderDL.find(tokens);
48
49 for (Span span : spans) {
50 System.out.println(span.toString());
51 }
52
53 Assert.assertEquals(1, spans.length);
54 Assert.assertEquals(0, spans[0].getStart());
55 Assert.assertEquals(2, spans[0].getEnd());
56 Assert.assertEquals(8.251646041870117, spans[0].getProb(), 0.0);
45 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, getIds2Labels());
46 final Span[] spans = nameFinderDL.find(tokens);
47
48 for (Span span : spans) {
49 System.out.println(span.toString());
50 }
51
52 Assertions.assertEquals(1, spans.length);
53 Assertions.assertEquals(0, spans[0].getStart());
54 Assertions.assertEquals(17, spans[0].getEnd());
55 Assertions.assertEquals(8.251646041870117, spans[0].getProb(), 0.0);
56 Assertions.assertEquals("George Washington", spans[0].getCoveredText(String.join(" ", tokens)));
5757
5858 }
5959
6868
6969 final String[] tokens = new String[]{"His", "name", "was", "George", "Washington"};
7070
71 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
72 final Span[] spans = nameFinderDL.find(tokens);
73
74 for (Span span : spans) {
75 System.out.println(span.toString());
76 }
77
78 Assert.assertEquals(1, spans.length);
79 Assert.assertEquals(3, spans[0].getStart());
80 Assert.assertEquals(5, spans[0].getEnd());
71 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, getIds2Labels());
72 final Span[] spans = nameFinderDL.find(tokens);
73
74 for (Span span : spans) {
75 System.out.println(span.toString());
76 }
77
78 Assertions.assertEquals(1, spans.length);
79 Assertions.assertEquals(13, spans[0].getStart());
80 Assertions.assertEquals(30, spans[0].getEnd());
8181
8282 }
8383
9292
9393 final String[] tokens = new String[]{"His", "name", "was", "George"};
9494
95 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
96 final Span[] spans = nameFinderDL.find(tokens);
97
98 for (Span span : spans) {
99 System.out.println(span.toString());
100 }
101
102 Assert.assertEquals(1, spans.length);
103 Assert.assertEquals(3, spans[0].getStart());
104 Assert.assertEquals(4, spans[0].getEnd());
95 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, getIds2Labels());
96 final Span[] spans = nameFinderDL.find(tokens);
97
98 for (Span span : spans) {
99 System.out.println(span.toString());
100 }
101
102 Assertions.assertEquals(1, spans.length);
103 Assertions.assertEquals(13, spans[0].getStart());
104 Assertions.assertEquals(19, spans[0].getEnd());
105105
106106 }
107107
116116
117117 final String[] tokens = new String[]{};
118118
119 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
120 final Span[] spans = nameFinderDL.find(tokens);
121
122 Assert.assertEquals(0, spans.length);
119 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, getIds2Labels());
120 final Span[] spans = nameFinderDL.find(tokens);
121
122 Assertions.assertEquals(0, spans.length);
123123
124124 }
125125
134134
135135 final String[] tokens = new String[]{"I", "went", "to", "the", "park"};
136136
137 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
138 final Span[] spans = nameFinderDL.find(tokens);
139
140 Assert.assertEquals(0, spans.length);
137 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, getIds2Labels());
138 final Span[] spans = nameFinderDL.find(tokens);
139
140 Assertions.assertEquals(0, spans.length);
141141
142142 }
143143
153153 final String[] tokens = new String[]{"George", "Washington", "and", "Abraham", "Lincoln",
154154 "were", "presidents"};
155155
156 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
157 final Span[] spans = nameFinderDL.find(tokens);
158
159 for (Span span : spans) {
160 System.out.println(span.toString());
161 }
162
163 Assert.assertEquals(2, spans.length);
164 Assert.assertEquals(0, spans[0].getStart());
165 Assert.assertEquals(2, spans[0].getEnd());
166 Assert.assertEquals(3, spans[1].getStart());
167 Assert.assertEquals(5, spans[1].getEnd());
168
169 }
170
171 @Test(expected = OrtException.class)
172 public void invalidModel() throws Exception {
173
174 // This test was written using the dslim/bert-base-NER model.
175 // You will need to update the ids2Labels and assertions if you use a different model.
176
177 final File model = new File("invalid.onnx");
178 final File vocab = new File("vocab.txt");
179
180 new NameFinderDL(model, vocab, true, getIds2Labels());
156 final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, getIds2Labels());
157 final Span[] spans = nameFinderDL.find(tokens);
158
159 for (Span span : spans) {
160 System.out.println(span.toString());
161 }
162
163 Assertions.assertEquals(2, spans.length);
164 Assertions.assertEquals(0, spans[0].getStart());
165 Assertions.assertEquals(17, spans[0].getEnd());
166 Assertions.assertEquals(22, spans[1].getStart());
167 Assertions.assertEquals(37, spans[1].getEnd());
168
169 }
170
171 @Test
172 public void invalidModel() {
173
174 Assertions.assertThrows(OrtException.class, () -> {
175 // This test was written using the dslim/bert-base-NER model.
176 // You will need to update the ids2Labels and assertions if you use a different model.
177
178 final File model = new File("invalid.onnx");
179 final File vocab = new File("vocab.txt");
180
181 new NameFinderDL(model, vocab, getIds2Labels());
182 });
181183
182184 }
183185
2323 <parent>
2424 <groupId>org.apache.opennlp</groupId>
2525 <artifactId>opennlp</artifactId>
26 <version>2.0.0</version>
26 <version>2.1.0</version>
2727 <relativePath>../pom.xml</relativePath>
2828 </parent>
2929
236236 ChunkerModel model;
237237
238238 try(ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream(lineStream)) {
239 model = ChunkerME.train("en", sampleStream,
239 model = ChunkerME.train("eng", sampleStream,
240240 TrainingParameters.defaultParams(), new ChunkerFactory());
241241 }
242242
156156
157157 ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
158158
159 model = DocumentCategorizerME.train("en", sampleStream,
159 model = DocumentCategorizerME.train("eng", sampleStream,
160160 TrainingParameters.defaultParams(), new DoccatFactory());
161161 } catch (IOException e) {
162162 e.printStackTrace();
293293 TokenNameFinderModel model;
294294
295295 try (ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream)) {
296 model = NameFinderME.train("en", "person", sampleStream, TrainingParameters.defaultParams(), nameFinderFactory);
296 model = NameFinderME.train("eng", "person", sampleStream, TrainingParameters.defaultParams(), nameFinderFactory);
297297 }
298298
299299 try (ObjectStream modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)){
239239 params.getHeadRulesSerializerImpl());
240240 }
241241 else {
242 if ("en".equals(params.getLang())) {
242 if ("eng".equals(params.getLang())) {
243243 headRulesSerializer = new opennlp.tools.parser.lang.en.HeadRules.HeadRulesSerializer();
244244 }
245245 else if ("es".equals(params.getLang())) {
374374 InputStreamFactory inputStreamFactory = new MarkableFileInputStreamFactory(new File("parsing.train"));
375375 ObjectStream<String> stringStream = new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8);
376376 ObjectStream<Parse> sampleStream = new ParseSample(stringStream);
377 ParserCrossValidator evaluator = new ParserCrossValidator("en", trainParameters, headRules, \
377 ParserCrossValidator evaluator = new ParserCrossValidator("eng", trainParameters, headRules, \
378378 parserType, listeners.toArray(new ParserEvaluationMonitor[listeners.size()])));
379379 evaluator.evaluate(sampleStream, 10);
380380
210210
211211 ObjectStream<POSSample> sampleStream = new WordTagSampleStream(lineStream);
212212
213 model = POSTaggerME.train("en", sampleStream, TrainingParameters.defaultParams(), new POSTaggerFactory());
213 model = POSTaggerME.train("eng", sampleStream, TrainingParameters.defaultParams(), new POSTaggerFactory());
214214 } catch (IOException e) {
215215 e.printStackTrace();
216216 }]]>
257257 To train the english tokenizer use the following command:
258258 <screen>
259259 <![CDATA[
260 $ opennlp TokenizerTrainer -model en-token.bin -alphaNumOpt -lang en -data en-token.train -encoding UTF-8
261
262 Indexing events using cutoff of 5
263
264 Computing event counts... done. 262271 events
260 $ opennlp TokenizerTrainer -model en-token.bin -alphaNumOpt true -lang en -data en-token.train -encoding UTF-8
261
262 Indexing events with TwoPass using cutoff of 5
263
264 Computing event counts... done. 45 events
265265 Indexing... done.
266 Sorting and merging events... done. Reduced 262271 events to 59060.
267 Done indexing.
268 Incorporating indexed data for training...
266 Sorting and merging events... done. Reduced 45 events to 25.
267 Done indexing in 0,09 s.
268 Incorporating indexed data for training...
269269 done.
270 Number of Event Tokens: 59060
270 Number of Event Tokens: 25
271271 Number of Outcomes: 2
272 Number of Predicates: 15695
272 Number of Predicates: 18
273273 ...done.
274 Computing model parameters...
274 Computing model parameters ...
275275 Performing 100 iterations.
276 1: .. loglikelihood=-181792.40419263614 0.9614292087192255
277 2: .. loglikelihood=-34208.094253153664 0.9629238459456059
278 3: .. loglikelihood=-18784.123872910015 0.9729211388220581
279 4: .. loglikelihood=-13246.88162585859 0.9856103038460219
280 5: .. loglikelihood=-10209.262670265718 0.9894422181636552
276 1: ... loglikelihood=-31.191623125197527 0.8222222222222222
277 2: ... loglikelihood=-21.036561339080343 0.8666666666666667
278 3: ... loglikelihood=-16.397882721809086 0.9333333333333333
279 4: ... loglikelihood=-13.624159882595462 0.9333333333333333
280 5: ... loglikelihood=-11.762067054883842 0.9777777777777777
281281
282282 ...<skipping a bunch of iterations>...
283283
284 95: .. loglikelihood=-769.2107474529454 0.999511955191386
285 96: .. loglikelihood=-763.8891914534009 0.999511955191386
286 97: .. loglikelihood=-758.6685383254891 0.9995157680414533
287 98: .. loglikelihood=-753.5458314695236 0.9995157680414533
288 99: .. loglikelihood=-748.5182305519613 0.9995157680414533
289 100: .. loglikelihood=-743.5830058068038 0.9995157680414533
290 Wrote tokenizer model.
284 95: ... loglikelihood=-2.0234942537226366 1.0
285 96: ... loglikelihood=-2.0107265117555935 1.0
286 97: ... loglikelihood=-1.998139365828305 1.0
287 98: ... loglikelihood=-1.9857283791639697 1.0
288 99: ... loglikelihood=-1.9734892753591327 1.0
289 100: ... loglikelihood=-1.9614179307958106 1.0
290 Writing tokenizer model ... done (0,044s)
291
292 Wrote tokenizer model to
291293 Path: en-token.bin]]>
292294 </screen>
293295 </para>
2323 <parent>
2424 <groupId>org.apache.opennlp</groupId>
2525 <artifactId>opennlp</artifactId>
26 <version>2.0.0</version>
26 <version>2.1.0</version>
2727 <relativePath>../pom.xml</relativePath>
2828 </parent>
2929
6060 </dependency>
6161
6262 <dependency>
63 <groupId>junit</groupId>
64 <artifactId>junit</artifactId>
63 <groupId>org.junit.jupiter</groupId>
64 <artifactId>junit-jupiter-api</artifactId>
65 <scope>test</scope>
66 </dependency>
67
68 <dependency>
69 <groupId>org.junit.jupiter</groupId>
70 <artifactId>junit-jupiter-engine</artifactId>
6571 <scope>test</scope>
6672 </dependency>
6773 </dependencies>
2525
2626 import morfologik.stemming.DictionaryMetadata;
2727
28 import org.junit.Assert;
29 import org.junit.Test;
28 import org.junit.jupiter.api.Assertions;
29 import org.junit.jupiter.api.Test;
3030
3131 import opennlp.morfologik.lemmatizer.MorfologikLemmatizer;
3232
7474 public void testBuildDictionary() throws Exception {
7575 Path output = createMorfologikDictionary();
7676 MorfologikLemmatizer ml = new MorfologikLemmatizer(output);
77 Assert.assertNotNull(ml);
77 Assertions.assertNotNull(ml);
7878 output.toFile().deleteOnExit();
7979 }
8080
2020 import java.util.Arrays;
2121 import java.util.List;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.morfologik.builder.POSDictionayBuilderTest;
2727 import opennlp.tools.lemmatizer.Lemmatizer;
3838
3939 String[] lemmas = dict.lemmatize(toks, tags);
4040
41 Assert.assertEquals("casar", lemmas[0]);
42 Assert.assertEquals("casa", lemmas[1]);
41 Assertions.assertEquals("casar", lemmas[0]);
42 Assertions.assertEquals("casa", lemmas[1]);
4343
4444 // lookup is case insensitive. There is no entry casa - prop
45 Assert.assertNull(lemmas[2]);
45 Assertions.assertNull(lemmas[2]);
4646 }
4747
4848 @Test
5454
5555 List<List<String>> lemmas = dict.lemmatize(Arrays.asList(toks), Arrays.asList(tags));
5656
57 Assert.assertTrue(lemmas.get(0).contains("ir"));
58 Assert.assertTrue(lemmas.get(0).contains("ser"));
57 Assertions.assertTrue(lemmas.get(0).contains("ir"));
58 Assertions.assertTrue(lemmas.get(0).contains("ser"));
5959 }
6060
6161 private MorfologikLemmatizer createDictionary(boolean caseSensitive)
2222
2323 import morfologik.stemming.Dictionary;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.morfologik.builder.POSDictionayBuilderTest;
2929 import opennlp.tools.postag.TagDictionary;
3535 MorfologikTagDictionary dict = createDictionary(false);
3636
3737 List<String> tags = Arrays.asList(dict.getTags("carro"));
38 Assert.assertEquals(1, tags.size());
39 Assert.assertTrue(tags.contains("NOUN"));
38 Assertions.assertEquals(1, tags.size());
39 Assertions.assertTrue(tags.contains("NOUN"));
4040 }
4141
4242 @Test
4444 TagDictionary dict = createDictionary(false);
4545
4646 List<String> tags = Arrays.asList(dict.getTags("casa"));
47 Assert.assertEquals(2, tags.size());
48 Assert.assertTrue(tags.contains("NOUN"));
49 Assert.assertTrue(tags.contains("V"));
47 Assertions.assertEquals(2, tags.size());
48 Assertions.assertTrue(tags.contains("NOUN"));
49 Assertions.assertTrue(tags.contains("V"));
5050
5151 // this is the behavior of case insensitive dictionary
5252 // if we search it using case insensitive, Casa as a proper noun
5353 // should be lower case in the dictionary
5454 tags = Arrays.asList(dict.getTags("Casa"));
55 Assert.assertEquals(2, tags.size());
56 Assert.assertTrue(tags.contains("NOUN"));
57 Assert.assertTrue(tags.contains("V"));
55 Assertions.assertEquals(2, tags.size());
56 Assertions.assertTrue(tags.contains("NOUN"));
57 Assertions.assertTrue(tags.contains("V"));
5858 }
5959
6060 @Test
6262 TagDictionary dict = createDictionary(true);
6363
6464 List<String> tags = Arrays.asList(dict.getTags("casa"));
65 Assert.assertEquals(2, tags.size());
66 Assert.assertTrue(tags.contains("NOUN"));
67 Assert.assertTrue(tags.contains("V"));
65 Assertions.assertEquals(2, tags.size());
66 Assertions.assertTrue(tags.contains("NOUN"));
67 Assertions.assertTrue(tags.contains("V"));
6868
6969 // this is the behavior of case insensitive dictionary
7070 // if we search it using case insensitive, Casa as a proper noun
7171 // should be lower case in the dictionary
7272 tags = Arrays.asList(dict.getTags("Casa"));
73 Assert.assertEquals(1, tags.size());
74 Assert.assertTrue(tags.contains("PROP"));
73 Assertions.assertEquals(1, tags.size());
74 Assertions.assertTrue(tags.contains("PROP"));
7575
7676 }
7777
2323 import java.nio.charset.StandardCharsets;
2424 import java.nio.file.Path;
2525
26 import org.junit.Assert;
27 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.morfologik.builder.POSDictionayBuilderTest;
3030 import opennlp.tools.cmdline.CmdLineUtil;
7878 POSModel posModel = trainPOSModel(ModelType.MAXENT, inFactory);
7979
8080 POSTaggerFactory factory = posModel.getFactory();
81 Assert.assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
81 Assertions.assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
8282
8383 factory = null;
8484
8989 POSModel fromSerialized = new POSModel(in);
9090
9191 factory = fromSerialized.getFactory();
92 Assert.assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
92 Assertions.assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
9393
94 Assert.assertEquals(2, factory.getTagDictionary().getTags("casa").length);
94 Assertions.assertEquals(2, factory.getTagDictionary().getTags("casa").length);
9595 }
9696
9797 }
2424 <parent>
2525 <groupId>org.apache.opennlp</groupId>
2626 <artifactId>opennlp</artifactId>
27 <version>2.0.0</version>
27 <version>2.1.0</version>
2828 <relativePath>../pom.xml</relativePath>
2929 </parent>
3030
5050 </dependency>
5151
5252 <dependency>
53 <groupId>junit</groupId>
54 <artifactId>junit</artifactId>
53 <groupId>org.junit.jupiter</groupId>
54 <artifactId>junit-jupiter-api</artifactId>
55 <scope>test</scope>
56 </dependency>
57
58 <dependency>
59 <groupId>org.junit.jupiter</groupId>
60 <artifactId>junit-jupiter-engine</artifactId>
5561 <scope>test</scope>
5662 </dependency>
5763
5050 String[] fields = line.split("\t");
5151
5252 if (fields.length != 10) {
53 throw new InvalidFormatException("Line must have exactly 10 fields");
53 throw new InvalidFormatException("Line [" + line + "] must have exactly 10 fields");
5454 }
5555
5656 id = fields[0];
2323 import java.util.zip.GZIPOutputStream;
2424
2525 import opennlp.tools.ml.model.AbstractModel;
26 import opennlp.tools.ml.model.ModelParameterChunker;
2627
2728 /**
2829 * Model writer that saves models in binary format.
6768 }
6869
6970 public void writeUTF(String s) throws java.io.IOException {
70 output.writeUTF(s);
71 ModelParameterChunker.writeUTF(output, s);
7172 }
7273
7374 public void writeInt(int i) throws java.io.IOException {
2323 import java.util.zip.GZIPOutputStream;
2424
2525 import opennlp.tools.ml.model.AbstractModel;
26 import opennlp.tools.ml.model.ModelParameterChunker;
2627
2728 public class BinaryQNModelWriter extends QNModelWriter {
2829 protected DataOutputStream output;
6364 }
6465
6566 public void writeUTF(String s) throws IOException {
66 output.writeUTF(s);
67 ModelParameterChunker.writeUTF(output, s);
6768 }
6869
6970 public void writeInt(int i) throws IOException {
5555 }
5656
5757 public String readUTF() throws IOException {
58 return input.readUTF();
58 return ModelParameterChunker.readUTF(input);
5959 }
6060
6161 }
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.tools.ml.model;
18
19 import java.io.DataInputStream;
20 import java.io.DataOutputStream;
21 import java.io.IOException;
22 import java.io.UTFDataFormatException;
23 import java.nio.ByteBuffer;
24 import java.nio.CharBuffer;
25 import java.nio.charset.CharsetEncoder;
26 import java.nio.charset.CoderResult;
27 import java.nio.charset.StandardCharsets;
28 import java.util.ArrayList;
29 import java.util.List;
30
31 /**
32 * A helper class that handles Strings with more than 64k (65535 bytes) in length.
33 * This is achieved via the signature {@link #SIGNATURE_CHUNKED_PARAMS} at the beginning of
34 * the String instance to be written to a {@link DataOutputStream}.
35 * <p>
36 * Background: In OpenNLP, for large(r) corpora, we train models whose (UTF String) parameters will exceed
37 * the {@link #MAX_CHUNK_SIZE_BYTES} bytes limit set in {@link DataOutputStream}.
38 * For writing and reading those models, we have to chunk up those string instances in 64kB blocks and
39 * recombine them correctly upon reading a (binary) model file.
40 * <p>
41 * The problem was raised in <a href="https://issues.apache.org/jira/browse/OPENNLP-1366">ticket OPENNLP-1366</a>.
42 * <p>
43 * Solution strategy:
44 * <ul>
45 * <li>If writing parameters to a {@link DataOutputStream} blows up with a {@link UTFDataFormatException} a
46 * large String instance is chunked up and written as appropriate blocks.</li>
47 * <li>To indicate that chunking was conducted, we start with the {@link #SIGNATURE_CHUNKED_PARAMS} indicator,
48 * directly followed by the number of chunks used. This way, when reading in chunked model parameters,
49 * recombination is achieved transparently.</li>
50 * </ul>
51 * <p>
52 * Note: Both, existing (binary) model files and newly trained models which don't require the chunking
53 * technique, will be supported like in previous OpenNLP versions.
54 *
55 * @author <a href="mailto:martin.wiesner@hs-heilbronn.de">Martin Wiesner</a>
56 * @author <a href="mailto:struberg@apache.org">Mark Struberg</a>
57 */
58 public final class ModelParameterChunker {
59
60 /*
61 * A signature that denotes the start of a String that required chunking.
62 *
63 * Semantics:
64 * If a model parameter (String) carries the below signature at the very beginning, this indicates
65 * that 'n > 1' chunks must be processed to obtain the whole model parameters. Otherwise, those would not be
66 * written to the binary model files (as reported in OPENNLP-1366) if the training occurs on large corpora
67 * as used, for instance, in the context of (very large) German NLP models.
68 */
69 public static final String SIGNATURE_CHUNKED_PARAMS = "CHUNKED-MODEL-PARAMS:"; // followed by no of chunks!
70
71 private static final int MAX_CHUNK_SIZE_BYTES = 65535; // the maximum 'utflen' DataOutputStream can handle
72
73 private ModelParameterChunker(){
74 // private utility class ct s
75 }
76
77 /**
78 * Reads model parameters from {@code dis}. In case the stream start with {@link #SIGNATURE_CHUNKED_PARAMS},
79 * the number of chunks is detected and the original large parameter string is reconstructed from several
80 * chunks.
81 *
82 * @param dis The stream which will be used to read the model parameter from.
83 */
84 public static String readUTF(DataInputStream dis) throws IOException {
85 String data = dis.readUTF();
86 if (data.startsWith(SIGNATURE_CHUNKED_PARAMS)) {
87 String chunkElements = data.replace(SIGNATURE_CHUNKED_PARAMS, "");
88 int chunkSize = Integer.parseInt(chunkElements);
89 StringBuilder sb = new StringBuilder();
90 for (int i = 0; i < chunkSize; i++) {
91 sb.append(dis.readUTF());
92 }
93 return sb.toString(); // the reconstructed model parameter string
94 } else { // default case: no chunked data -> just return the read data / parameter information
95 return data;
96 }
97 }
98
99 /**
100 * Writes the model parameter {@code s} to {@code dos}. In case {@code s} does exceed
101 * {@link #MAX_CHUNK_SIZE_BYTES} in length, the chunking mechanism is used; otherwise the parameter is
102 * written 'as is'.
103 *
104 * @param dos The {@link DataOutputStream} stream which will be used to persist the model.
105 * @param s The input string that is checked for length and chunked if {@link #MAX_CHUNK_SIZE_BYTES} is
106 * exceeded.
107 */
108 public static void writeUTF(DataOutputStream dos, String s) throws IOException {
109 try {
110 dos.writeUTF(s);
111 } catch (UTFDataFormatException dfe) {
112 // we definitely have to chunk the given model parameter 's' as it exceeds the bytes allowed for 1 chunk
113 final String[] chunks = splitByByteLength(s);
114 // write the signature string with the amount of chunks for reading the model file correctly
115 dos.writeUTF(SIGNATURE_CHUNKED_PARAMS + chunks.length); // add number of required chunks
116 for (String c: chunks) {
117 dos.writeUTF(c);
118 }
119 }
120 }
121
122 private static String[] splitByByteLength(String input) {
123 CharBuffer in = CharBuffer.wrap(input);
124 ByteBuffer out = ByteBuffer.allocate(MAX_CHUNK_SIZE_BYTES); // output buffer of required size
125 CharsetEncoder coder = StandardCharsets.UTF_8.newEncoder();
126 List<String> chunks = new ArrayList<>();
127 int pos = 0;
128 while (true) {
129 CoderResult cr = coder.encode(in, out, true);
130 int nPos = input.length() - in.length();
131 String s = input.substring(pos, nPos);
132 chunks.add(s);
133 pos = nPos;
134 out.rewind();
135 if (! cr.isOverflow()) {
136 break;
137 }
138 }
139 return chunks.toArray(new String[0]);
140 }
141 }
2323 import java.util.zip.GZIPOutputStream;
2424
2525 import opennlp.tools.ml.model.AbstractModel;
26 import opennlp.tools.ml.model.ModelParameterChunker;
2627
2728 /**
2829 * Model writer that saves models in binary format.
6364 }
6465
6566 public void writeUTF(String s) throws java.io.IOException {
66 output.writeUTF(s);
67 ModelParameterChunker.writeUTF(output, s);
6768 }
6869
6970 public void writeInt(int i) throws java.io.IOException {
2323 import java.util.zip.GZIPOutputStream;
2424
2525 import opennlp.tools.ml.model.AbstractModel;
26 import opennlp.tools.ml.model.ModelParameterChunker;
2627
2728 /**
2829 * Model writer that saves models in binary format.
6465 }
6566
6667 public void writeUTF(String s) throws java.io.IOException {
67 output.writeUTF(s);
68 ModelParameterChunker.writeUTF(output, s);
6869 }
6970
7071 public void writeInt(int i) throws java.io.IOException {
273273 Properties manifest = (Properties) artifactProvider.getArtifact("manifest.properties");
274274
275275 String version = manifest.getProperty("OpenNLP-Version");
276
277 if (Version.parse(version).getMinor() < 8) {
276 Version v = Version.parse(version);
277
278 if (v.getMajor() == 1 && v.getMinor() < 8) {
278279 return new DefaultPOSContextGenerator(cacheSize, getDictionary());
279280 }
280281 }
2424 import java.util.Map;
2525 import java.util.Map.Entry;
2626 import java.util.Properties;
27 import java.util.TreeMap;
2728
2829 import opennlp.tools.ml.EventTrainer;
2930
3738 public static final String CUTOFF_PARAM = "Cutoff";
3839 public static final String THREADS_PARAM = "Threads";
3940
40 private Map<String, Object> parameters = new HashMap<>();
41 private Map<String, Object> parameters = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
4142
4243 public TrainingParameters() {
4344 }
1616
1717 package opennlp.tools;
1818
19 import java.lang.annotation.ElementType;
20 import java.lang.annotation.Retention;
21 import java.lang.annotation.RetentionPolicy;
22 import java.lang.annotation.Target;
23
24 import org.junit.jupiter.api.Tag;
25
1926 /**
2027 * Marker class for tests with heap memory usage above 4 GB.
2128 */
22 public class HighMemoryUsage {
29 @Target( {ElementType.TYPE, ElementType.METHOD})
30 @Retention(RetentionPolicy.RUNTIME)
31 @Tag("opennlp.tools.HighMemoryUsage")
32 public @interface HighMemoryUsage {
2333 }
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.util.MockInputStreamFactory;
2626 import opennlp.tools.util.ObjectStream;
2929 public class ChunkSampleStreamTest {
3030
3131 @Test
32 public void testReadingEvents() throws IOException {
32 void testReadingEvents() throws IOException {
3333
3434 String sample = "word11 tag11 pred11" +
3535 '\n' +
5858
5959 // read first sample
6060 ChunkSample firstSample = chunkStream.read();
61 Assert.assertEquals("word11", firstSample.getSentence()[0]);
62 Assert.assertEquals("tag11", firstSample.getTags()[0]);
63 Assert.assertEquals("pred11", firstSample.getPreds()[0]);
64 Assert.assertEquals("word12", firstSample.getSentence()[1]);
65 Assert.assertEquals("tag12", firstSample.getTags()[1]);
66 Assert.assertEquals("pred12", firstSample.getPreds()[1]);
67 Assert.assertEquals("word13", firstSample.getSentence()[2]);
68 Assert.assertEquals("tag13", firstSample.getTags()[2]);
69 Assert.assertEquals("pred13", firstSample.getPreds()[2]);
61 Assertions.assertEquals("word11", firstSample.getSentence()[0]);
62 Assertions.assertEquals("tag11", firstSample.getTags()[0]);
63 Assertions.assertEquals("pred11", firstSample.getPreds()[0]);
64 Assertions.assertEquals("word12", firstSample.getSentence()[1]);
65 Assertions.assertEquals("tag12", firstSample.getTags()[1]);
66 Assertions.assertEquals("pred12", firstSample.getPreds()[1]);
67 Assertions.assertEquals("word13", firstSample.getSentence()[2]);
68 Assertions.assertEquals("tag13", firstSample.getTags()[2]);
69 Assertions.assertEquals("pred13", firstSample.getPreds()[2]);
7070
7171
7272 // read second sample
7373 ChunkSample secondSample = chunkStream.read();
74 Assert.assertEquals("word21", secondSample.getSentence()[0]);
75 Assert.assertEquals("tag21", secondSample.getTags()[0]);
76 Assert.assertEquals("pred21", secondSample.getPreds()[0]);
77 Assert.assertEquals("word22", secondSample.getSentence()[1]);
78 Assert.assertEquals("tag22", secondSample.getTags()[1]);
79 Assert.assertEquals("pred22", secondSample.getPreds()[1]);
80 Assert.assertEquals("word23", secondSample.getSentence()[2]);
81 Assert.assertEquals("tag23", secondSample.getTags()[2]);
82 Assert.assertEquals("pred23", secondSample.getPreds()[2]);
74 Assertions.assertEquals("word21", secondSample.getSentence()[0]);
75 Assertions.assertEquals("tag21", secondSample.getTags()[0]);
76 Assertions.assertEquals("pred21", secondSample.getPreds()[0]);
77 Assertions.assertEquals("word22", secondSample.getSentence()[1]);
78 Assertions.assertEquals("tag22", secondSample.getTags()[1]);
79 Assertions.assertEquals("pred22", secondSample.getPreds()[1]);
80 Assertions.assertEquals("word23", secondSample.getSentence()[2]);
81 Assertions.assertEquals("tag23", secondSample.getTags()[2]);
82 Assertions.assertEquals("pred23", secondSample.getPreds()[2]);
8383
84 Assert.assertNull(chunkStream.read());
84 Assertions.assertNull(chunkStream.read());
8585
8686 chunkStream.close();
8787 }
2828 import java.nio.charset.StandardCharsets;
2929 import java.util.Arrays;
3030
31 import org.junit.Assert;
32 import org.junit.Test;
31 import org.junit.jupiter.api.Assertions;
32 import org.junit.jupiter.api.Test;
3333
3434 import opennlp.tools.formats.ResourceAsStreamFactory;
3535 import opennlp.tools.util.InputStreamFactory;
113113 }
114114
115115 @Test
116 public void testChunkSampleSerDe() throws IOException {
116 void testChunkSampleSerDe() throws IOException {
117117 ChunkSample chunkSample = createGoldSample();
118118 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
119119 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
131131 // do nothing
132132 }
133133
134 Assert.assertNotNull(deSerializedChunkSample);
135 Assert.assertArrayEquals(chunkSample.getPhrasesAsSpanList(),
134 Assertions.assertNotNull(deSerializedChunkSample);
135 Assertions.assertArrayEquals(chunkSample.getPhrasesAsSpanList(),
136136 deSerializedChunkSample.getPhrasesAsSpanList());
137 Assert.assertArrayEquals(chunkSample.getPreds(), deSerializedChunkSample.getPreds());
138 Assert.assertArrayEquals(chunkSample.getTags(), deSerializedChunkSample.getTags());
139 Assert.assertArrayEquals(chunkSample.getSentence(), deSerializedChunkSample.getSentence());
140 Assert.assertEquals(chunkSample, deSerializedChunkSample);
141 }
142
143 @Test(expected = IllegalArgumentException.class)
144 public void testParameterValidation() {
145 new ChunkSample(new String[] {""}, new String[] {""},
146 new String[] {"test", "one element to much"});
147 }
148
149 @Test
150 public void testRetrievingContent() {
137 Assertions.assertArrayEquals(chunkSample.getPreds(), deSerializedChunkSample.getPreds());
138 Assertions.assertArrayEquals(chunkSample.getTags(), deSerializedChunkSample.getTags());
139 Assertions.assertArrayEquals(chunkSample.getSentence(), deSerializedChunkSample.getSentence());
140 Assertions.assertEquals(chunkSample, deSerializedChunkSample);
141 }
142
143 @Test
144 void testParameterValidation() {
145 Assertions.assertThrows(IllegalArgumentException.class, () -> {
146 new ChunkSample(new String[] {""}, new String[] {""},
147 new String[] {"test", "one element to much"});
148 });
149 }
150
151 @Test
152 void testRetrievingContent() {
151153 ChunkSample sample = new ChunkSample(createSentence(), createTags(), createChunks());
152154
153 Assert.assertArrayEquals(createSentence(), sample.getSentence());
154 Assert.assertArrayEquals(createTags(), sample.getTags());
155 Assert.assertArrayEquals(createChunks(), sample.getPreds());
156 }
157
158 @Test
159 public void testToString() throws IOException {
155 Assertions.assertArrayEquals(createSentence(), sample.getSentence());
156 Assertions.assertArrayEquals(createTags(), sample.getTags());
157 Assertions.assertArrayEquals(createChunks(), sample.getPreds());
158 }
159
160 @Test
161 void testToString() throws IOException {
160162
161163 ChunkSample sample = new ChunkSample(createSentence(), createTags(), createChunks());
162164 String[] sentence = createSentence();
168170 for (int i = 0; i < sentence.length; i++) {
169171 String line = reader.readLine();
170172 String[] parts = line.split("\\s+");
171 Assert.assertEquals(3, parts.length);
172 Assert.assertEquals(sentence[i], parts[0]);
173 Assert.assertEquals(tags[i], parts[1]);
174 Assert.assertEquals(chunks[i], parts[2]);
173 Assertions.assertEquals(3, parts.length);
174 Assertions.assertEquals(sentence[i], parts[0]);
175 Assertions.assertEquals(tags[i], parts[1]);
176 Assertions.assertEquals(chunks[i], parts[2]);
175177 }
176178 }
177179
178180 @Test
179 public void testNicePrint() {
181 void testNicePrint() {
180182
181183 ChunkSample sample = new ChunkSample(createSentence(), createTags(), createChunks());
182184
183 Assert.assertEquals(" [NP Forecasts_NNS ] [PP for_IN ] [NP the_DT trade_NN figures_NNS ] "
185 Assertions.assertEquals(" [NP Forecasts_NNS ] [PP for_IN ] [NP the_DT trade_NN figures_NNS ] "
184186 + "[VP range_VBP ] [ADVP widely_RB ] ,_, [NP Forecasts_NNS ] [PP for_IN ] "
185187 + "[NP the_DT trade_NN figures_NNS ] "
186188 + "[VP range_VBP ] [ADVP widely_RB ] ._.", sample.nicePrint());
187189 }
188190
189191 @Test
190 public void testAsSpan() {
192 void testAsSpan() {
191193 ChunkSample sample = new ChunkSample(createSentence(), createTags(),
192194 createChunks());
193195 Span[] spans = sample.getPhrasesAsSpanList();
194196
195 Assert.assertEquals(10, spans.length);
196 Assert.assertEquals(new Span(0, 1, "NP"), spans[0]);
197 Assert.assertEquals(new Span(1, 2, "PP"), spans[1]);
198 Assert.assertEquals(new Span(2, 5, "NP"), spans[2]);
199 Assert.assertEquals(new Span(5, 6, "VP"), spans[3]);
200 Assert.assertEquals(new Span(6, 7, "ADVP"), spans[4]);
201 Assert.assertEquals(new Span(8, 9, "NP"), spans[5]);
202 Assert.assertEquals(new Span(9, 10, "PP"), spans[6]);
203 Assert.assertEquals(new Span(10, 13, "NP"), spans[7]);
204 Assert.assertEquals(new Span(13, 14, "VP"), spans[8]);
205 Assert.assertEquals(new Span(14, 15, "ADVP"), spans[9]);
197 Assertions.assertEquals(10, spans.length);
198 Assertions.assertEquals(new Span(0, 1, "NP"), spans[0]);
199 Assertions.assertEquals(new Span(1, 2, "PP"), spans[1]);
200 Assertions.assertEquals(new Span(2, 5, "NP"), spans[2]);
201 Assertions.assertEquals(new Span(5, 6, "VP"), spans[3]);
202 Assertions.assertEquals(new Span(6, 7, "ADVP"), spans[4]);
203 Assertions.assertEquals(new Span(8, 9, "NP"), spans[5]);
204 Assertions.assertEquals(new Span(9, 10, "PP"), spans[6]);
205 Assertions.assertEquals(new Span(10, 13, "NP"), spans[7]);
206 Assertions.assertEquals(new Span(13, 14, "VP"), spans[8]);
207 Assertions.assertEquals(new Span(14, 15, "ADVP"), spans[9]);
206208 }
207209
208210
210212 // the same validateArguments method, we do a deeper test only once
211213
212214 @Test
213 public void testPhraseAsSpan() {
215 void testPhraseAsSpan() {
214216 Span[] spans = ChunkSample.phrasesAsSpanList(createSentence(),
215217 createTags(), createChunks());
216218
217 Assert.assertEquals(10, spans.length);
218 Assert.assertEquals(new Span(0, 1, "NP"), spans[0]);
219 Assert.assertEquals(new Span(1, 2, "PP"), spans[1]);
220 Assert.assertEquals(new Span(2, 5, "NP"), spans[2]);
221 Assert.assertEquals(new Span(5, 6, "VP"), spans[3]);
222 Assert.assertEquals(new Span(6, 7, "ADVP"), spans[4]);
223 Assert.assertEquals(new Span(8, 9, "NP"), spans[5]);
224 Assert.assertEquals(new Span(9, 10, "PP"), spans[6]);
225 Assert.assertEquals(new Span(10, 13, "NP"), spans[7]);
226 Assert.assertEquals(new Span(13, 14, "VP"), spans[8]);
227 Assert.assertEquals(new Span(14, 15, "ADVP"), spans[9]);
228 }
229
230 @Test
231 public void testRegions() throws IOException {
219 Assertions.assertEquals(10, spans.length);
220 Assertions.assertEquals(new Span(0, 1, "NP"), spans[0]);
221 Assertions.assertEquals(new Span(1, 2, "PP"), spans[1]);
222 Assertions.assertEquals(new Span(2, 5, "NP"), spans[2]);
223 Assertions.assertEquals(new Span(5, 6, "VP"), spans[3]);
224 Assertions.assertEquals(new Span(6, 7, "ADVP"), spans[4]);
225 Assertions.assertEquals(new Span(8, 9, "NP"), spans[5]);
226 Assertions.assertEquals(new Span(9, 10, "PP"), spans[6]);
227 Assertions.assertEquals(new Span(10, 13, "NP"), spans[7]);
228 Assertions.assertEquals(new Span(13, 14, "VP"), spans[8]);
229 Assertions.assertEquals(new Span(14, 15, "ADVP"), spans[9]);
230 }
231
232 @Test
233 void testRegions() throws IOException {
232234 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
233235 "/opennlp/tools/chunker/output.txt");
234236
237239
238240 ChunkSample cs1 = predictedSample.read();
239241 String[] g1 = Span.spansToStrings(cs1.getPhrasesAsSpanList(), cs1.getSentence());
240 Assert.assertEquals(15, g1.length);
242 Assertions.assertEquals(15, g1.length);
241243
242244 ChunkSample cs2 = predictedSample.read();
243245 String[] g2 = Span.spansToStrings(cs2.getPhrasesAsSpanList(), cs2.getSentence());
244 Assert.assertEquals(10, g2.length);
246 Assertions.assertEquals(10, g2.length);
245247
246248 ChunkSample cs3 = predictedSample.read();
247249 String[] g3 = Span.spansToStrings(cs3.getPhrasesAsSpanList(), cs3.getSentence());
248 Assert.assertEquals(7, g3.length);
249 Assert.assertEquals("United", g3[0]);
250 Assert.assertEquals("'s directors", g3[1]);
251 Assert.assertEquals("voted", g3[2]);
252 Assert.assertEquals("themselves", g3[3]);
253 Assert.assertEquals("their spouses", g3[4]);
254 Assert.assertEquals("lifetime access", g3[5]);
255 Assert.assertEquals("to", g3[6]);
250 Assertions.assertEquals(7, g3.length);
251 Assertions.assertEquals("United", g3[0]);
252 Assertions.assertEquals("'s directors", g3[1]);
253 Assertions.assertEquals("voted", g3[2]);
254 Assertions.assertEquals("themselves", g3[3]);
255 Assertions.assertEquals("their spouses", g3[4]);
256 Assertions.assertEquals("lifetime access", g3[5]);
257 Assertions.assertEquals("to", g3[6]);
256258
257259 predictedSample.close();
258260
259261 }
260262
261 @Test(expected = IllegalArgumentException.class)
262 public void testInvalidPhraseAsSpan1() {
263 ChunkSample.phrasesAsSpanList(new String[2], new String[1], new String[1]);
264 }
265
266 @Test(expected = IllegalArgumentException.class)
267 public void testInvalidPhraseAsSpan2() {
268 ChunkSample.phrasesAsSpanList(new String[1], new String[2], new String[1]);
269 }
270
271 @Test(expected = IllegalArgumentException.class)
272 public void testInvalidPhraseAsSpan3() {
273 ChunkSample.phrasesAsSpanList(new String[1], new String[1], new String[2]);
274 }
275
276 @Test(expected = IllegalArgumentException.class)
277 public void testInvalidChunkSampleArray() {
278 new ChunkSample(new String[1], new String[1], new String[2]);
279 }
280
281 @Test(expected = IllegalArgumentException.class)
282 public void testInvalidChunkSampleList() {
283 new ChunkSample(Arrays.asList(new String[1]), Arrays.asList(new String[1]),
284 Arrays.asList(new String[2]));
285 }
286
287 @Test
288 public void testEquals() {
289 Assert.assertFalse(createGoldSample() == createGoldSample());
290 Assert.assertTrue(createGoldSample().equals(createGoldSample()));
291 Assert.assertFalse(createPredSample().equals(createGoldSample()));
292 Assert.assertFalse(createPredSample().equals(new Object()));
263 @Test
264 void testInvalidPhraseAsSpan1() {
265 Assertions.assertThrows(IllegalArgumentException.class, () -> {
266 ChunkSample.phrasesAsSpanList(new String[2], new String[1], new String[1]);
267 });
268 }
269
270 @Test
271 void testInvalidPhraseAsSpan2() {
272 Assertions.assertThrows(IllegalArgumentException.class, () -> {
273 ChunkSample.phrasesAsSpanList(new String[1], new String[2], new String[1]);
274 });
275 }
276
277 @Test
278 void testInvalidPhraseAsSpan3() {
279 Assertions.assertThrows(IllegalArgumentException.class, () -> {
280 ChunkSample.phrasesAsSpanList(new String[1], new String[1], new String[2]);
281 });
282 }
283
284 @Test
285 void testInvalidChunkSampleArray() {
286 Assertions.assertThrows(IllegalArgumentException.class, () -> {
287 new ChunkSample(new String[1], new String[1], new String[2]);
288 });
289 }
290
291 @Test
292 void testInvalidChunkSampleList() {
293 Assertions.assertThrows(IllegalArgumentException.class, () -> {
294 new ChunkSample(Arrays.asList(new String[1]), Arrays.asList(new String[1]),
295 Arrays.asList(new String[2]));
296 });
297 }
298
299 @Test
300 void testEquals() {
301 Assertions.assertFalse(createGoldSample() == createGoldSample());
302 Assertions.assertTrue(createGoldSample().equals(createGoldSample()));
303 Assertions.assertFalse(createPredSample().equals(createGoldSample()));
304 Assertions.assertFalse(createPredSample().equals(new Object()));
293305 }
294306
295307 }
2222 import java.nio.charset.StandardCharsets;
2323 import java.util.Locale;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.cmdline.chunker.ChunkerDetailedFMeasureListener;
2929 import opennlp.tools.formats.ResourceAsStreamFactory;
3232 public class ChunkerDetailedFMeasureListenerTest {
3333
3434 @Test
35 public void testEvaluator() throws IOException {
35 void testEvaluator() throws IOException {
3636
3737 ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory(
3838 getClass(), "/opennlp/tools/chunker/output.txt");
6565 line = reader.readLine();
6666 }
6767
68 Assert.assertEquals(expected.toString().trim(), listener.createReport(Locale.ENGLISH).trim());
68 Assertions.assertEquals(expected.toString().trim(), listener.createReport(Locale.ENGLISH).trim());
6969 }
7070 }
2121 import java.io.OutputStream;
2222 import java.nio.charset.StandardCharsets;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.cmdline.chunker.ChunkEvaluationErrorListener;
2828 import opennlp.tools.formats.ResourceAsStreamFactory;
4747 * @throws IOException
4848 */
4949 @Test
50 public void testEvaluator() throws IOException {
50 void testEvaluator() throws IOException {
5151 ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory(
5252 getClass(), "/opennlp/tools/chunker/output.txt");
5353 ResourceAsStreamFactory inExpected = new ResourceAsStreamFactory(getClass(),
6969
7070 FMeasure fm = evaluator.getFMeasure();
7171
72 Assert.assertEquals(0.8d, fm.getPrecisionScore(), DELTA);
73 Assert.assertEquals(0.875d, fm.getRecallScore(), DELTA);
72 Assertions.assertEquals(0.8d, fm.getPrecisionScore(), DELTA);
73 Assertions.assertEquals(0.875d, fm.getRecallScore(), DELTA);
7474
75 Assert.assertNotSame(stream.toString().length(), 0);
75 Assertions.assertNotSame(stream.toString().length(), 0);
7676 }
7777
7878 @Test
79 public void testEvaluatorNoError() throws IOException {
79 void testEvaluatorNoError() throws IOException {
8080 ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory(
8181 getClass(), "/opennlp/tools/chunker/output.txt");
8282 ResourceAsStreamFactory inExpected = new ResourceAsStreamFactory(getClass(),
9999
100100 FMeasure fm = evaluator.getFMeasure();
101101
102 Assert.assertEquals(1d, fm.getPrecisionScore(), DELTA);
103 Assert.assertEquals(1d, fm.getRecallScore(), DELTA);
102 Assertions.assertEquals(1d, fm.getPrecisionScore(), DELTA);
103 Assertions.assertEquals(1d, fm.getRecallScore(), DELTA);
104104
105 Assert.assertEquals(stream.toString().length(), 0);
105 Assertions.assertEquals(stream.toString().length(), 0);
106106 }
107107
108108 }
2121 import java.io.IOException;
2222 import java.nio.charset.StandardCharsets;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.formats.ResourceAsStreamFactory;
2828 import opennlp.tools.util.ObjectStream;
5151 }
5252
5353 @Test
54 public void testDefaultFactory() throws IOException {
54 void testDefaultFactory() throws IOException {
5555
5656 ChunkerModel model = trainModel(ModelType.MAXENT, new ChunkerFactory());
5757
5858 ChunkerFactory factory = model.getFactory();
59 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
60 Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
59 Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
60 Assertions.assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
6161
6262 ByteArrayOutputStream out = new ByteArrayOutputStream();
6363 model.serialize(out);
6666 ChunkerModel fromSerialized = new ChunkerModel(in);
6767
6868 factory = fromSerialized.getFactory();
69 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
70 Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
69 Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
70 Assertions.assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
7171 }
7272
7373
7474 @Test
75 public void testDummyFactory() throws IOException {
75 void testDummyFactory() throws IOException {
7676
7777 ChunkerModel model = trainModel(ModelType.MAXENT, new DummyChunkerFactory());
7878
7979 DummyChunkerFactory factory = (DummyChunkerFactory) model.getFactory();
80 Assert.assertTrue(factory.getContextGenerator() instanceof DummyChunkerFactory.DummyContextGenerator);
81 Assert.assertTrue(factory.getSequenceValidator() instanceof DummyChunkerFactory.DummySequenceValidator);
80 Assertions.assertTrue(factory.getContextGenerator()
81 instanceof DummyChunkerFactory.DummyContextGenerator);
82 Assertions.assertTrue(factory.getSequenceValidator()
83 instanceof DummyChunkerFactory.DummySequenceValidator);
8284
8385
8486 ByteArrayOutputStream out = new ByteArrayOutputStream();
8890 ChunkerModel fromSerialized = new ChunkerModel(in);
8991
9092 factory = (DummyChunkerFactory) fromSerialized.getFactory();
91 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
92 Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
93 Assertions.assertTrue(factory.getContextGenerator()
94 instanceof DefaultChunkerContextGenerator);
95 Assertions.assertTrue(factory.getSequenceValidator()
96 instanceof DefaultChunkerSequenceValidator);
9397
9498
9599 ChunkerME chunker = new ChunkerME(model);
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 public class ChunkerMEIT {
2525
26 private static String[] toks1 = { "Rockwell", "said", "the", "agreement", "calls", "for",
26 private static String[] toks1 = {"Rockwell", "said", "the", "agreement", "calls", "for",
2727 "it", "to", "supply", "200", "additional", "so-called", "shipsets",
28 "for", "the", "planes", "." };
28 "for", "the", "planes", "."};
2929
30 private static String[] tags1 = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
31 "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." };
30 private static String[] tags1 = {"NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
31 "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "."};
3232
33 private static String[] expect1 = { "B-NP", "B-VP", "B-NP", "I-NP", "B-VP", "B-SBAR",
33 private static String[] expect1 = {"B-NP", "B-VP", "B-NP", "I-NP", "B-VP", "B-SBAR",
3434 "B-NP", "B-VP", "I-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-PP", "B-NP",
35 "I-NP", "O" };
35 "I-NP", "O"};
3636
3737 @Test
38 public void downloadModel() throws IOException {
38 void downloadModel() throws IOException {
3939
4040 ChunkerME chunker = new ChunkerME("en");
4141
4242 String[] preds = chunker.chunk(toks1, tags1);
4343
44 Assert.assertArrayEquals(expect1, preds);
44 Assertions.assertArrayEquals(expect1, preds);
4545 }
4646
4747 }
2020 import java.nio.charset.StandardCharsets;
2121 import java.util.Arrays;
2222
23 import org.junit.Assert;
24 import org.junit.Before;
25 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.BeforeEach;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.formats.ResourceAsStreamFactory;
2828 import opennlp.tools.namefind.NameFinderME;
4848 * training sentences and then the computed model is used to predict sentences
4949 * from the training sentences.
5050 */
51
5152 public class ChunkerMETest {
5253
5354 private Chunker chunker;
5455
55 private static String[] toks1 = { "Rockwell", "said", "the", "agreement", "calls", "for",
56 private static String[] toks1 = {"Rockwell", "said", "the", "agreement", "calls", "for",
5657 "it", "to", "supply", "200", "additional", "so-called", "shipsets",
57 "for", "the", "planes", "." };
58 "for", "the", "planes", "."};
5859
59 private static String[] tags1 = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
60 "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." };
60 private static String[] tags1 = {"NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
61 "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "."};
6162
62 private static String[] expect1 = { "B-NP", "B-VP", "B-NP", "I-NP", "B-VP", "B-SBAR",
63 private static String[] expect1 = {"B-NP", "B-VP", "B-NP", "I-NP", "B-VP", "B-SBAR",
6364 "B-NP", "B-VP", "I-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-PP", "B-NP",
64 "I-NP", "O" };
65 "I-NP", "O"};
6566
66 @Test(expected = IOException.class)
67 public void downloadNonExistentModel() throws IOException {
67 @Test
68 void downloadNonExistentModel() {
6869
69 ChunkerME chunker = new ChunkerME("en");
70 Assertions.assertThrows(IOException.class, () -> {
7071
71 String[] preds = chunker.chunk(toks1, tags1);
72 ChunkerME chunker = new ChunkerME("en");
7273
73 Assert.assertArrayEquals(expect1, preds);
74 String[] preds = chunker.chunk(toks1, tags1);
75
76 Assertions.assertArrayEquals(expect1, preds);
77 });
78
79
7480 }
7581
76 @Before
77 public void startup() throws IOException {
82 @BeforeEach
83 void startup() throws IOException {
7884 // train the chunker
7985
8086 ResourceAsStreamFactory in = new ResourceAsStreamFactory(getClass(),
9399 }
94100
95101 @Test
96 public void testChunkAsArray() throws Exception {
102 void testChunkAsArray() {
97103
98104 String[] preds = chunker.chunk(toks1, tags1);
99105
100 Assert.assertArrayEquals(expect1, preds);
106 Assertions.assertArrayEquals(expect1, preds);
101107 }
102108
103109 @Test
104 public void testChunkAsSpan() throws Exception {
110 void testChunkAsSpan() {
105111 Span[] preds = chunker.chunkAsSpans(toks1, tags1);
106112 System.out.println(Arrays.toString(preds));
107113
108 Assert.assertEquals(10, preds.length);
109 Assert.assertEquals(new Span(0, 1, "NP"), preds[0]);
110 Assert.assertEquals(new Span(1, 2, "VP"), preds[1]);
111 Assert.assertEquals(new Span(2, 4, "NP"), preds[2]);
112 Assert.assertEquals(new Span(4, 5, "VP"), preds[3]);
113 Assert.assertEquals(new Span(5, 6, "SBAR"), preds[4]);
114 Assert.assertEquals(new Span(6, 7, "NP"), preds[5]);
115 Assert.assertEquals(new Span(7, 9, "VP"), preds[6]);
116 Assert.assertEquals(new Span(9, 13, "NP"), preds[7]);
117 Assert.assertEquals(new Span(13, 14, "PP"), preds[8]);
118 Assert.assertEquals(new Span(14, 16, "NP"), preds[9]);
114 Assertions.assertEquals(10, preds.length);
115 Assertions.assertEquals(new Span(0, 1, "NP"), preds[0]);
116 Assertions.assertEquals(new Span(1, 2, "VP"), preds[1]);
117 Assertions.assertEquals(new Span(2, 4, "NP"), preds[2]);
118 Assertions.assertEquals(new Span(4, 5, "VP"), preds[3]);
119 Assertions.assertEquals(new Span(5, 6, "SBAR"), preds[4]);
120 Assertions.assertEquals(new Span(6, 7, "NP"), preds[5]);
121 Assertions.assertEquals(new Span(7, 9, "VP"), preds[6]);
122 Assertions.assertEquals(new Span(9, 13, "NP"), preds[7]);
123 Assertions.assertEquals(new Span(13, 14, "PP"), preds[8]);
124 Assertions.assertEquals(new Span(14, 16, "NP"), preds[9]);
119125
120126 }
121127
122128 @Test
123 public void testTokenProbArray() throws Exception {
129 void testTokenProbArray() {
124130 Sequence[] preds = chunker.topKSequences(toks1, tags1);
125131
126 Assert.assertTrue(preds.length > 0);
127 Assert.assertEquals(expect1.length, preds[0].getProbs().length);
128 Assert.assertEquals(Arrays.asList(expect1), preds[0].getOutcomes());
129 Assert.assertNotSame(Arrays.asList(expect1), preds[1].getOutcomes());
132 Assertions.assertTrue(preds.length > 0);
133 Assertions.assertEquals(expect1.length, preds[0].getProbs().length);
134 Assertions.assertEquals(Arrays.asList(expect1), preds[0].getOutcomes());
135 Assertions.assertNotSame(Arrays.asList(expect1), preds[1].getOutcomes());
130136 }
131137
132138 @Test
133 public void testTokenProbMinScore() throws Exception {
139 void testTokenProbMinScore() {
134140 Sequence[] preds = chunker.topKSequences(toks1, tags1, -5.55);
135141
136 Assert.assertEquals(4, preds.length);
137 Assert.assertEquals(expect1.length, preds[0].getProbs().length);
138 Assert.assertEquals(Arrays.asList(expect1), preds[0].getOutcomes());
139 Assert.assertNotSame(Arrays.asList(expect1), preds[1].getOutcomes());
142 Assertions.assertEquals(4, preds.length);
143 Assertions.assertEquals(expect1.length, preds[0].getProbs().length);
144 Assertions.assertEquals(Arrays.asList(expect1), preds[0].getOutcomes());
145 Assertions.assertNotSame(Arrays.asList(expect1), preds[1].getOutcomes());
140146 }
141
142 @Test(expected = InsufficientTrainingDataException.class)
143 public void testInsufficientData() throws IOException {
144147
145 ResourceAsStreamFactory in = new ResourceAsStreamFactory(getClass(),
146 "/opennlp/tools/chunker/test-insufficient.txt");
148 @Test
149 void testInsufficientData() {
147150
148 ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream(
149 new PlainTextByLineStream(in, StandardCharsets.UTF_8));
151 Assertions.assertThrows(InsufficientTrainingDataException.class, () -> {
150152
151 TrainingParameters params = new TrainingParameters();
152 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
153 params.put(TrainingParameters.CUTOFF_PARAM, 1);
153 ResourceAsStreamFactory in = new ResourceAsStreamFactory(getClass(),
154 "/opennlp/tools/chunker/test-insufficient.txt");
154155
155 ChunkerME.train("eng", sampleStream, params, new ChunkerFactory());
156 ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream(
157 new PlainTextByLineStream(in, StandardCharsets.UTF_8));
158
159 TrainingParameters params = new TrainingParameters();
160 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
161 params.put(TrainingParameters.CUTOFF_PARAM, 1);
162
163 ChunkerME.train("eng", sampleStream, params, new ChunkerFactory());
164
165 });
166
156167
157168 }
158169
1616
1717 package opennlp.tools.chunker;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * This is the test class for {@link ChunkerModel}.
2525 public class ChunkerModelTest {
2626
2727 @Test
28 public void testInvalidFactorySignature() throws Exception {
28 void testInvalidFactorySignature() throws Exception {
2929
3030 ChunkerModel model = null;
3131 try {
3232 model = new ChunkerModel(this.getClass().getResourceAsStream("chunker170custom.bin"));
3333 } catch (IllegalArgumentException e) {
34 Assert.assertTrue("Exception must state ChunkerFactory",
35 e.getMessage().contains("ChunkerFactory"));
36 Assert.assertTrue("Exception must mention DummyChunkerFactory",
37 e.getMessage().contains("opennlp.tools.chunker.DummyChunkerFactory"));
34 Assertions.assertTrue(
35 e.getMessage().contains("ChunkerFactory"), "Exception must state ChunkerFactory");
36 Assertions.assertTrue(
37 e.getMessage().contains("opennlp.tools.chunker.DummyChunkerFactory"),
38 "Exception must mention DummyChunkerFactory");
3839 }
39 Assert.assertNull(model);
40 Assertions.assertNull(model);
4041 }
4142
4243 @Test
43 public void test170DefaultFactory() throws Exception {
44 void test170DefaultFactory() throws Exception {
4445
4546 // This is an OpenNLP 1.x model. It should load with OpenNLP 2.x.
46 Assert.assertNotNull(
47 Assertions.assertNotNull(
4748 new ChunkerModel(this.getClass().getResourceAsStream("chunker170default.bin")));
4849
4950 }
5051
5152 @Test
52 public void test180CustomFactory() throws Exception {
53 void test180CustomFactory() throws Exception {
5354
5455 // This is an OpenNLP 1.x model. It should load with OpenNLP 2.x.
55 Assert.assertNotNull(
56 Assertions.assertNotNull(
5657 new ChunkerModel(this.getClass().getResourceAsStream("chunker180custom.bin")));
5758
5859 }
2020 import java.nio.charset.StandardCharsets;
2121 import java.util.Collection;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
2727 import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
3232 interface ZeroMethods {
3333 }
3434
35 @Test(expected = IllegalArgumentException.class)
36 public void testZeroMethods() {
37 ArgumentParser.createUsage(ZeroMethods.class);
35 @Test
36 void testZeroMethods() {
37 Assertions.assertThrows(IllegalArgumentException.class, () -> {
38 ArgumentParser.createUsage(ZeroMethods.class);
39 });
3840 }
3941
4042 interface InvalidMethodName {
4143 String invalidMethodName();
4244 }
4345
44 @Test(expected = IllegalArgumentException.class)
45 public void testInvalidMethodName() {
46 ArgumentParser.createUsage(InvalidMethodName.class);
46 @Test
47 void testInvalidMethodName() {
48 Assertions.assertThrows(IllegalArgumentException.class, () -> {
49 ArgumentParser.createUsage(InvalidMethodName.class);
50 });
4751 }
4852
4953 interface InvalidReturnType {
5054 Exception getTest();
5155 }
5256
53 @Test(expected = IllegalArgumentException.class)
54 public void testInvalidReturnType() {
55 ArgumentParser.createUsage(InvalidReturnType.class);
57 @Test
58 void testInvalidReturnType() {
59 Assertions.assertThrows(IllegalArgumentException.class, () -> {
60 ArgumentParser.createUsage(InvalidReturnType.class);
61 });
5662 }
5763
5864 interface SimpleArguments extends AllOptionalArguments {
7783
7884
7985 @Test
80 public void testSimpleArguments() {
86 void testSimpleArguments() {
8187 String argsString = "-encoding UTF-8 -alphaNumOpt false";
8288 SimpleArguments args = ArgumentParser.parse(argsString.split(" "), SimpleArguments.class);
8389
84 Assert.assertEquals(StandardCharsets.UTF_8.name(), args.getEncoding());
85 Assert.assertEquals(Integer.valueOf(100), args.getIterations());
86 Assert.assertNull(args.getCutoff());
87 Assert.assertEquals(false, args.getAlphaNumOpt());
88 }
89
90 @Test(expected = IllegalArgumentException.class)
91 public void testSimpleArgumentsMissingEncoding() {
92 String argsString = "-alphaNumOpt false";
93
94 Assert.assertFalse(ArgumentParser.validateArguments(argsString.split(" "), SimpleArguments.class));
95 ArgumentParser.parse(argsString.split(" "), SimpleArguments.class);
90 Assertions.assertEquals(StandardCharsets.UTF_8.name(), args.getEncoding());
91 Assertions.assertEquals(Integer.valueOf(100), args.getIterations());
92 Assertions.assertNull(args.getCutoff());
93 Assertions.assertEquals(false, args.getAlphaNumOpt());
9694 }
9795
9896 @Test
99 public void testAllOptionalArgumentsOneArgument() {
97 void testSimpleArgumentsMissingEncoding() {
98 Assertions.assertThrows(IllegalArgumentException.class, () -> {
99 String argsString = "-alphaNumOpt false";
100
101 Assertions.assertFalse(ArgumentParser.validateArguments(argsString.split(" "), SimpleArguments.class));
102 ArgumentParser.parse(argsString.split(" "), SimpleArguments.class);
103 });
104
105 }
106
107 @Test
108 void testAllOptionalArgumentsOneArgument() {
100109 String argsString = "-alphaNumOpt false";
101110
102 Assert.assertTrue(ArgumentParser.validateArguments(argsString.split(" "), AllOptionalArguments.class));
111 Assertions.assertTrue(ArgumentParser.validateArguments(argsString.split(" "),
112 AllOptionalArguments.class));
103113 ArgumentParser.parse(argsString.split(" "), AllOptionalArguments.class);
104114 }
105115
106116 @Test
107 public void testAllOptionalArgumentsZeroArguments() {
117 void testAllOptionalArgumentsZeroArguments() {
108118 String[] args = {};
109 Assert.assertTrue(ArgumentParser.validateArguments(args, AllOptionalArguments.class));
119 Assertions.assertTrue(ArgumentParser.validateArguments(args, AllOptionalArguments.class));
110120 ArgumentParser.parse(args, AllOptionalArguments.class);
111121 }
112122
113 @Test(expected = IllegalArgumentException.class)
114 public void testAllOptionalArgumentsExtraArgument() {
115 String argsString = "-encoding UTF-8";
116 Assert.assertFalse(ArgumentParser.validateArguments(argsString.split(" "), AllOptionalArguments.class));
117 ArgumentParser.parse(argsString.split(" "), AllOptionalArguments.class);
123 @Test
124 void testAllOptionalArgumentsExtraArgument() {
125 Assertions.assertThrows(IllegalArgumentException.class, () -> {
126 String argsString = "-encoding UTF-8";
127 Assertions.assertFalse(ArgumentParser.validateArguments(argsString.split(" "),
128 AllOptionalArguments.class));
129 ArgumentParser.parse(argsString.split(" "), AllOptionalArguments.class);
130 });
118131 }
119132
120133 @Test
121 public void testSimpleArgumentsUsage() {
134 void testSimpleArgumentsUsage() {
122135
123136 String[] arguments = new String[] {"-encoding charset",
124137 "[-iterations num]",
128141
129142 int expectedLength = 2;
130143 for (String arg : arguments) {
131 Assert.assertTrue(usage.contains(arg));
144 Assertions.assertTrue(usage.contains(arg));
132145 expectedLength += arg.length();
133146 }
134147
135 Assert.assertTrue(usage.contains("a charset encoding"));
136 Assert.assertTrue(expectedLength < usage.length());
148 Assertions.assertTrue(usage.contains("a charset encoding"));
149 Assertions.assertTrue(expectedLength < usage.length());
137150 }
138151
139152 interface ExtendsEncodingParameter extends EncodingParameter {
142155 }
143156
144157 @Test
145 public void testDefaultEncodingParameter() {
158 void testDefaultEncodingParameter() {
146159
147160 String[] args = "-something aValue".split(" ");
148 Assert.assertTrue(ArgumentParser.validateArguments(args, ExtendsEncodingParameter.class));
161 Assertions.assertTrue(ArgumentParser.validateArguments(args, ExtendsEncodingParameter.class));
149162
150163 ExtendsEncodingParameter params = ArgumentParser.parse(args, ExtendsEncodingParameter.class);
151 Assert.assertEquals(Charset.defaultCharset(), params.getEncoding());
164 Assertions.assertEquals(Charset.defaultCharset(), params.getEncoding());
152165 }
153166
154167 @Test
155 public void testSetEncodingParameter() {
168 void testSetEncodingParameter() {
156169 Collection<Charset> availableCharset = Charset.availableCharsets().values();
157170 String notTheDefaultCharset = StandardCharsets.UTF_8.name();
158171 for (Charset charset : availableCharset) {
163176 }
164177
165178 String[] args = ("-something aValue -encoding " + notTheDefaultCharset).split(" ");
166 Assert.assertTrue(ArgumentParser.validateArguments(args, ExtendsEncodingParameter.class));
179 Assertions.assertTrue(ArgumentParser.validateArguments(args, ExtendsEncodingParameter.class));
167180
168181 ExtendsEncodingParameter params = ArgumentParser.parse(args, ExtendsEncodingParameter.class);
169 Assert.assertEquals(Charset.forName(notTheDefaultCharset), params.getEncoding());
182 Assertions.assertEquals(Charset.forName(notTheDefaultCharset), params.getEncoding());
170183 }
171184 }
1818
1919 import java.security.Permission;
2020
21 import org.junit.After;
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
21 import org.junit.jupiter.api.AfterEach;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class CLITest {
2727
6060
6161 private final SecurityManager originalSecurityManager = System.getSecurityManager();
6262
63 @Before
64 public void installNoExitSecurityManager() {
63 @BeforeEach
64 void installNoExitSecurityManager() {
6565 System.setSecurityManager(new NoExitSecurityManager());
6666 }
6767
6969 * Ensure the main method does not fail to print help message.
7070 */
7171 @Test
72 public void testMainHelpMessage() {
72 void testMainHelpMessage() {
7373
7474 try {
75 CLI.main(new String[]{});
75 CLI.main(new String[] {});
7676 } catch (ExitException e) {
77 Assert.assertEquals(0, e.status());
77 Assertions.assertEquals(0, e.status());
7878 }
7979 }
8080
8282 * Ensure the main method prints error and returns 1.
8383 */
8484 @Test
85 public void testUnknownToolMessage() {
85 void testUnknownToolMessage() {
8686 try {
87 CLI.main(new String[]{"unknown name"});
87 CLI.main(new String[] {"unknown name"});
8888 } catch (ExitException e) {
89 Assert.assertEquals(1, e.status());
89 Assertions.assertEquals(1, e.status());
9090 }
9191 }
9292
9494 * Ensure the tool checks the parameter and returns 1.
9595 */
9696 @Test
97 public void testToolParameterMessage() {
97 void testToolParameterMessage() {
9898 try {
99 CLI.main(new String[]{"DoccatTrainer", "-param", "value"});
99 CLI.main(new String[] {"DoccatTrainer", "-param", "value"});
100100 } catch (ExitException e) {
101 Assert.assertEquals(1, e.status());
101 Assertions.assertEquals(1, e.status());
102102 }
103103 }
104104
106106 * Ensure the main method prints error and returns -1
107107 */
108108 @Test
109 public void testUnknownFileMessage() {
109 void testUnknownFileMessage() {
110110 try {
111 CLI.main(new String[]{"Doccat", "unknown.model"});
111 CLI.main(new String[] {"Doccat", "unknown.model"});
112112 } catch (ExitException e) {
113 Assert.assertEquals(-1, e.status());
113 Assertions.assertEquals(-1, e.status());
114114 }
115115 }
116116
119119 * Ensure all tools do not fail printing help message;
120120 */
121121 @Test
122 public void testHelpMessageOfTools() {
122 void testHelpMessageOfTools() {
123123
124124 for (String toolName : CLI.getToolNames()) {
125125 System.err.println("-> ToolName" + toolName);
126126 try {
127 CLI.main(new String[]{toolName, "help"});
127 CLI.main(new String[] {toolName, "help"});
128128 } catch (ExitException e) {
129 Assert.assertEquals(0, e.status());
129 Assertions.assertEquals(0, e.status());
130130 }
131131 }
132132 }
133133
134 @After
135 public void restoreSecurityManager() {
134 @AfterEach
135 void restoreSecurityManager() {
136136 System.setSecurityManager(originalSecurityManager);
137137 }
138138
1616
1717 package opennlp.tools.cmdline;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link TerminateToolException} class.
2525 public class TerminateToolExceptionTest {
2626
2727 @Test
28 public void testCreation() {
28 void testCreation() {
2929 TerminateToolException e = new TerminateToolException(-500);
30 Assert.assertEquals(-500, e.getCode());
30 Assertions.assertEquals(-500, e.getCode());
3131 }
3232 }
2626 import java.io.PrintStream;
2727 import java.nio.charset.StandardCharsets;
2828
29 import org.junit.Assert;
30 import org.junit.Test;
29 import org.junit.jupiter.api.Assertions;
30 import org.junit.jupiter.api.Test;
3131
3232 import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
3333 import opennlp.tools.namefind.NameFinderME;
4343 public class TokenNameFinderToolTest {
4444
4545 @Test
46 public void run() throws IOException {
46 void run() throws IOException {
4747
4848 File model1 = trainModel();
4949
50 String[] args = new String[]{model1.getAbsolutePath()};
51
50 String[] args = new String[] {model1.getAbsolutePath()};
51
5252 final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence.";
5353 InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
54
54
5555 System.setIn(stream);
56
57 ByteArrayOutputStream baos = new ByteArrayOutputStream();
58 PrintStream ps = new PrintStream(baos);
59 System.setOut(ps);
6056
61 TokenNameFinderTool tool = new TokenNameFinderTool();
62 tool.run(args);
63
64 final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8);
65 Assert.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>"));
66
67 model1.delete();
68 }
69
70 @Test(expected = TerminateToolException.class)
71 public void invalidModel() {
72
73 String[] args = new String[]{"invalidmodel.bin"};
74
75 TokenNameFinderTool tool = new TokenNameFinderTool();
76 tool.run(args);
77
78 }
79
80 @Test()
81 public void usage() {
82
83 String[] args = new String[]{};
84
8557 ByteArrayOutputStream baos = new ByteArrayOutputStream();
8658 PrintStream ps = new PrintStream(baos);
8759 System.setOut(ps);
9062 tool.run(args);
9163
9264 final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8);
93 Assert.assertEquals(tool.getHelp(), content.trim());
94
65 Assertions.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>"));
66
67 model1.delete();
9568 }
96
69
70 @Test
71 void invalidModel() {
72
73 Assertions.assertThrows(TerminateToolException.class, () -> {
74
75 String[] args = new String[] {"invalidmodel.bin"};
76
77 TokenNameFinderTool tool = new TokenNameFinderTool();
78 tool.run(args);
79
80 });
81
82
83 }
84
85 @Test
86 void usage() {
87
88 String[] args = new String[] {};
89
90 ByteArrayOutputStream baos = new ByteArrayOutputStream();
91 PrintStream ps = new PrintStream(baos);
92 System.setOut(ps);
93
94 TokenNameFinderTool tool = new TokenNameFinderTool();
95 tool.run(args);
96
97 final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8);
98 Assertions.assertEquals(tool.getHelp(), content.trim());
99
100 }
101
97102 private File trainModel() throws IOException {
98103
99104 ObjectStream<String> lineStream =
104109 TrainingParameters params = new TrainingParameters();
105110 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
106111 params.put(TrainingParameters.CUTOFF_PARAM, 1);
107
112
108113 TokenNameFinderModel model;
109114
110115 TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();
113118 model = NameFinderME.train("eng", null, sampleStream, params,
114119 nameFinderFactory);
115120 }
116
121
117122 File modelFile = File.createTempFile("model", ".bin");
118
123
119124 try (BufferedOutputStream modelOut =
120125 new BufferedOutputStream(new FileOutputStream(modelFile))) {
121126 model.serialize(modelOut);
122127 }
123
128
124129 return modelFile;
125130 }
126
131
127132 }
1919 import java.io.File;
2020 import java.io.IOException;
2121 import java.nio.charset.Charset;
22 import java.nio.file.Path;
2223 import java.util.Arrays;
2324 import java.util.List;
2425
2526 import org.apache.commons.io.FileUtils;
26 import org.junit.Assert;
27 import org.junit.Rule;
28 import org.junit.Test;
29 import org.junit.rules.TemporaryFolder;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
29 import org.junit.jupiter.api.io.TempDir;
3030
3131 import opennlp.tools.formats.DirectorySampleStream;
3232 import opennlp.tools.formats.convert.FileToStringSampleStream;
3333
3434 public class FileToStringSampleStreamTest {
3535
36 @Rule
37 public TemporaryFolder directory = new TemporaryFolder();
36 @TempDir
37 public Path directory;
3838
3939 @Test
4040 public void readFileTest() throws IOException {
4141
4242 final String sentence1 = "This is a sentence.";
4343 final String sentence2 = "This is another sentence.";
44
44
4545 List<String> sentences = Arrays.asList(sentence1, sentence2);
46
46
4747 DirectorySampleStream directorySampleStream =
48 new DirectorySampleStream(directory.getRoot(), null, false);
49
50 File tempFile1 = directory.newFile();
48 new DirectorySampleStream(directory.toFile(), null, false);
49
50 File tempFile1 = directory.resolve("tempFile1").toFile();
5151 FileUtils.writeStringToFile(tempFile1, sentence1);
52
53 File tempFile2 = directory.newFile();
52
53 File tempFile2 = directory.resolve("tempFile2").toFile();
5454 FileUtils.writeStringToFile(tempFile2, sentence2);
55
55
5656 try (FileToStringSampleStream stream =
57 new FileToStringSampleStream(directorySampleStream, Charset.defaultCharset())) {
57 new FileToStringSampleStream(directorySampleStream, Charset.defaultCharset())) {
5858
5959 String read = stream.read();
60 Assert.assertTrue(sentences.contains(read));
60 Assertions.assertTrue(sentences.contains(read));
6161
6262 read = stream.read();
63 Assert.assertTrue(sentences.contains(read));
63 Assertions.assertTrue(sentences.contains(read));
6464 }
6565 }
6666
2121 import java.util.List;
2222 import java.util.Set;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.util.StringList;
2828
4040 * Tests a basic lookup.
4141 */
4242 @Test
43 public void testLookup() {
43 void testLookup() {
4444
4545 String a = "a";
4646 String b = "b";
5151
5252 Set<String> set = dict.asStringSet();
5353
54 Assert.assertTrue(set.contains(a));
55 Assert.assertFalse(set.contains(b));
56
57 Assert.assertTrue(set.contains(a.toUpperCase()));
54 Assertions.assertTrue(set.contains(a));
55 Assertions.assertFalse(set.contains(b));
56
57 Assertions.assertTrue(set.contains(a.toUpperCase()));
5858 }
5959
6060 /**
6161 * Tests set.
6262 */
6363 @Test
64 public void testSet() {
64 void testSet() {
6565
6666 String a = "a";
6767 String a1 = "a";
7373
7474 Set<String> set = dict.asStringSet();
7575
76 Assert.assertTrue(set.contains(a));
77 Assert.assertEquals(1, set.size());
76 Assertions.assertTrue(set.contains(a));
77 Assertions.assertEquals(1, set.size());
7878 }
7979
8080 /**
8181 * Tests set.
8282 */
8383 @Test
84 public void testSetDiffCase() {
84 void testSetDiffCase() {
8585
8686 String a = "a";
8787 String a1 = "A";
9393
9494 Set<String> set = dict.asStringSet();
9595
96 Assert.assertTrue(set.contains(a));
97 Assert.assertEquals(1, set.size());
96 Assertions.assertTrue(set.contains(a));
97 Assertions.assertEquals(1, set.size());
9898 }
9999
100100 /**
101101 * Tests for the {@link Dictionary#equals(Object)} method.
102102 */
103103 @Test
104 public void testEquals() {
104 void testEquals() {
105105 String entry1 = "1a";
106106 String entry2 = "1b";
107107
117117
118118 Set<String> setB = dictB.asStringSet();
119119
120 Assert.assertTrue(setA.equals(setB));
120 Assertions.assertTrue(setA.equals(setB));
121121 }
122122
123123 /**
124124 * Tests for the {@link Dictionary#equals(Object)} method.
125125 */
126126 @Test
127 public void testEqualsDifferentCase() {
127 void testEqualsDifferentCase() {
128128
129129 Dictionary dictA = getDict();
130130 dictA.put(asSL("1a"));
138138
139139 Set<String> setB = dictB.asStringSet();
140140
141 Assert.assertTrue(setA.equals(setB));
141 Assertions.assertTrue(setA.equals(setB));
142142 }
143143
144144 /**
145145 * Tests the {@link Dictionary#hashCode()} method.
146146 */
147147 @Test
148 public void testHashCode() {
148 void testHashCode() {
149149 String entry1 = "a1";
150150
151151 Dictionary dictA = getDict();
158158
159159 Set<String> setB = dictB.asStringSet();
160160
161 Assert.assertEquals(setA.hashCode(), setB.hashCode());
161 Assertions.assertEquals(setA.hashCode(), setB.hashCode());
162162 }
163163
164164 /**
165165 * Tests the {@link Dictionary#hashCode()} method.
166166 */
167167 @Test
168 public void testHashCodeDifferentCase() {
168 void testHashCodeDifferentCase() {
169169 String entry1 = "a1";
170170
171171 Dictionary dictA = getDict();
179179 Set<String> setB = dictB.asStringSet();
180180
181181 // TODO: should it be equal??
182 Assert.assertNotSame(setA.hashCode(), setB.hashCode());
182 Assertions.assertNotSame(setA.hashCode(), setB.hashCode());
183183 }
184184
185185 /**
186186 * Tests the lookup of tokens of different case.
187187 */
188188 @Test
189 public void testDifferentCaseLookup() {
189 void testDifferentCaseLookup() {
190190
191191 String entry1 = "1a";
192192 String entry2 = "1A";
198198
199199 Set<String> set = dict.asStringSet();
200200
201 Assert.assertTrue(set.contains(entry2));
201 Assertions.assertTrue(set.contains(entry2));
202202 }
203203
204204 /**
205205 * Tests the iterator implementation
206206 */
207207 @Test
208 public void testIterator() {
208 void testIterator() {
209209
210210 String entry1 = "1a";
211211 String entry2 = "1b";
222222 elements.add(it.next());
223223 }
224224
225 Assert.assertEquals(2, elements.size());
226 Assert.assertTrue(elements.contains(entry1));
227 Assert.assertTrue(elements.contains(entry2));
225 Assertions.assertEquals(2, elements.size());
226 Assertions.assertTrue(elements.contains(entry1));
227 Assertions.assertTrue(elements.contains(entry2));
228228
229229 }
230230 }
2121 import java.util.List;
2222 import java.util.Set;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.util.StringList;
2828
4040 * Tests a basic lookup.
4141 */
4242 @Test
43 public void testLookup() {
43 void testLookup() {
4444
4545 String a = "a";
4646 String b = "b";
5151
5252 Set<String> set = dict.asStringSet();
5353
54 Assert.assertTrue(set.contains(a));
55 Assert.assertFalse(set.contains(b));
56
57 Assert.assertFalse(set.contains(a.toUpperCase()));
54 Assertions.assertTrue(set.contains(a));
55 Assertions.assertFalse(set.contains(b));
56
57 Assertions.assertFalse(set.contains(a.toUpperCase()));
5858 }
5959
6060 /**
6161 * Tests set.
6262 */
6363 @Test
64 public void testSet() {
64 void testSet() {
6565
6666 String a = "a";
6767 String a1 = "a";
7373
7474 Set<String> set = dict.asStringSet();
7575
76 Assert.assertTrue(set.contains(a));
77 Assert.assertEquals(1, set.size());
76 Assertions.assertTrue(set.contains(a));
77 Assertions.assertEquals(1, set.size());
7878 }
7979
8080 /**
8181 * Tests set.
8282 */
8383 @Test
84 public void testSetDiffCase() {
84 void testSetDiffCase() {
8585
8686 String a = "a";
8787 String a1 = "A";
9393
9494 Set<String> set = dict.asStringSet();
9595
96 Assert.assertTrue(set.contains(a));
97 Assert.assertEquals(2, set.size());
96 Assertions.assertTrue(set.contains(a));
97 Assertions.assertEquals(2, set.size());
9898 }
9999
100100 /**
101101 * Tests for the {@link Dictionary#equals(Object)} method.
102102 */
103103 @Test
104 public void testEquals() {
104 void testEquals() {
105105 String entry1 = "1a";
106106 String entry2 = "1b";
107107
117117
118118 Set<String> setB = dictB.asStringSet();
119119
120 Assert.assertTrue(setA.equals(setB));
120 Assertions.assertTrue(setA.equals(setB));
121121 }
122122
123123 /**
124124 * Tests for the {@link Dictionary#equals(Object)} method.
125125 */
126126 @Test
127 public void testEqualsDifferentCase() {
127 void testEqualsDifferentCase() {
128128
129129 Dictionary dictA = getDict();
130130 dictA.put(asSL("1a"));
139139 Set<String> setB = dictB.asStringSet();
140140
141141 // should fail in case sensitive dict
142 Assert.assertFalse(setA.equals(setB));
142 Assertions.assertFalse(setA.equals(setB));
143143 }
144144
145145 /**
146146 * Tests the {@link Dictionary#hashCode()} method.
147147 */
148148 @Test
149 public void testHashCode() {
149 void testHashCode() {
150150 String entry1 = "a1";
151151
152152 Dictionary dictA = getDict();
159159
160160 Set<String> setB = dictB.asStringSet();
161161
162 Assert.assertEquals(setA.hashCode(), setB.hashCode());
162 Assertions.assertEquals(setA.hashCode(), setB.hashCode());
163163 }
164164
165165 /**
166166 * Tests the {@link Dictionary#hashCode()} method.
167167 */
168168 @Test
169 public void testHashCodeDifferentCase() {
169 void testHashCodeDifferentCase() {
170170 String entry1 = "a1";
171171
172172 Dictionary dictA = getDict();
180180 Set<String> setB = dictB.asStringSet();
181181
182182 // TODO: should it be equal??
183 Assert.assertNotSame(setA.hashCode(), setB.hashCode());
183 Assertions.assertNotSame(setA.hashCode(), setB.hashCode());
184184 }
185185
186186 /**
187187 * Tests the lookup of tokens of different case.
188188 */
189189 @Test
190 public void testDifferentCaseLookup() {
190 void testDifferentCaseLookup() {
191191
192192 String entry1 = "1a";
193193 String entry2 = "1A";
200200 Set<String> set = dict.asStringSet();
201201
202202 // should return false because 1a != 1A in a case sensitive lookup
203 Assert.assertFalse(set.contains(entry2));
203 Assertions.assertFalse(set.contains(entry2));
204204 }
205205
206206 /**
207207 * Tests the iterator implementation
208208 */
209209 @Test
210 public void testIterator() {
210 void testIterator() {
211211
212212 String entry1 = "1a";
213213 String entry2 = "1b";
224224 elements.add(it.next());
225225 }
226226
227 Assert.assertEquals(4, elements.size());
228 Assert.assertTrue(elements.contains(entry1));
229 Assert.assertTrue(elements.contains(entry2));
230 Assert.assertTrue(elements.contains(entry1.toUpperCase()));
231 Assert.assertTrue(elements.contains(entry2.toUpperCase()));
227 Assertions.assertEquals(4, elements.size());
228 Assertions.assertTrue(elements.contains(entry1));
229 Assertions.assertTrue(elements.contains(entry2));
230 Assertions.assertTrue(elements.contains(entry1.toUpperCase()));
231 Assertions.assertTrue(elements.contains(entry2.toUpperCase()));
232232
233233 }
234234 }
2121 import java.io.IOException;
2222 import java.io.StringReader;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.util.InvalidFormatException;
2828 import opennlp.tools.util.StringList;
5050 * Tests a basic lookup.
5151 */
5252 @Test
53 public void testLookup() {
53 void testLookup() {
5454
5555 StringList entry1 = new StringList("1a", "1b");
5656 StringList entry1u = new StringList("1A", "1B");
6060
6161 dict.put(entry1);
6262
63 Assert.assertTrue(dict.contains(entry1));
64 Assert.assertTrue(dict.contains(entry1u));
65 Assert.assertTrue(!dict.contains(entry2));
63 Assertions.assertTrue(dict.contains(entry1));
64 Assertions.assertTrue(dict.contains(entry1u));
65 Assertions.assertTrue(!dict.contains(entry2));
6666 }
6767
6868 /**
6969 * Test lookup with case sensitive dictionary
7070 */
7171 @Test
72 public void testLookupCaseSensitive() {
72 void testLookupCaseSensitive() {
7373 StringList entry1 = new StringList("1a", "1b");
7474 StringList entry1u = new StringList("1A", "1B");
7575 StringList entry2 = new StringList("1A", "1C");
7878
7979 dict.put(entry1);
8080
81 Assert.assertTrue(dict.contains(entry1));
82 Assert.assertTrue(!dict.contains(entry1u));
83 Assert.assertTrue(!dict.contains(entry2));
81 Assertions.assertTrue(dict.contains(entry1));
82 Assertions.assertTrue(!dict.contains(entry1u));
83 Assertions.assertTrue(!dict.contains(entry2));
8484 }
8585
8686 /**
9090 * @throws InvalidFormatException
9191 */
9292 @Test
93 public void testSerialization() throws IOException {
93 void testSerialization() throws IOException {
9494 Dictionary reference = getCaseInsensitive();
9595
9696 String a1 = "a1";
107107 Dictionary recreated = new Dictionary(
108108 new ByteArrayInputStream(out.toByteArray()));
109109
110 Assert.assertTrue(reference.equals(recreated));
110 Assertions.assertTrue(reference.equals(recreated));
111111 }
112112
113113 /**
117117 * @throws IOException
118118 */
119119 @Test
120 public void testParseOneEntryPerLine() throws IOException {
120 void testParseOneEntryPerLine() throws IOException {
121121
122122 String testDictionary = "1a 1b 1c 1d \n 2a 2b 2c \n 3a \n 4a 4b ";
123123
124124 Dictionary dictionay =
125125 Dictionary.parseOneEntryPerLine(new StringReader(testDictionary));
126126
127 Assert.assertTrue(dictionay.size() == 4);
128 Assert.assertTrue(dictionay.contains(new StringList("1a", "1b", "1c", "1d")));
129 Assert.assertTrue(dictionay.contains(new StringList("2a", "2b", "2c")));
130 Assert.assertTrue(dictionay.contains(new StringList(new String[]{"3a"})));
131 Assert.assertTrue(dictionay.contains(new StringList("4a", "4b")));
127 Assertions.assertTrue(dictionay.size() == 4);
128 Assertions.assertTrue(dictionay.contains(new StringList("1a", "1b", "1c", "1d")));
129 Assertions.assertTrue(dictionay.contains(new StringList("2a", "2b", "2c")));
130 Assertions.assertTrue(dictionay.contains(new StringList(new String[] {"3a"})));
131 Assertions.assertTrue(dictionay.contains(new StringList("4a", "4b")));
132132 }
133133
134134 /**
135135 * Tests for the {@link Dictionary#equals(Object)} method.
136136 */
137137 @Test
138 public void testEquals() {
138 void testEquals() {
139139 StringList entry1 = new StringList("1a", "1b");
140140 StringList entry2 = new StringList("2a", "2b");
141141
151151 dictC.put(entry1);
152152 dictC.put(entry2);
153153
154 Assert.assertTrue(dictA.equals(dictB));
155 Assert.assertTrue(dictC.equals(dictA));
156 Assert.assertTrue(dictB.equals(dictC));
154 Assertions.assertTrue(dictA.equals(dictB));
155 Assertions.assertTrue(dictC.equals(dictA));
156 Assertions.assertTrue(dictB.equals(dictC));
157157 }
158158
159159 /**
160160 * Tests the {@link Dictionary#hashCode()} method.
161161 */
162162 @Test
163 public void testHashCode() {
163 void testHashCode() {
164164 StringList entry1 = new StringList("1a", "1b");
165165 StringList entry2 = new StringList("1A", "1B");
166166
176176 Dictionary dictD = getCaseSensitive();
177177 dictD.put(entry2);
178178
179 Assert.assertEquals(dictA.hashCode(), dictB.hashCode());
180 Assert.assertEquals(dictB.hashCode(), dictC.hashCode());
181 Assert.assertEquals(dictC.hashCode(), dictD.hashCode());
179 Assertions.assertEquals(dictA.hashCode(), dictB.hashCode());
180 Assertions.assertEquals(dictB.hashCode(), dictC.hashCode());
181 Assertions.assertEquals(dictC.hashCode(), dictD.hashCode());
182182 }
183183
184184 /**
185185 * Tests for the {@link Dictionary#toString()} method.
186186 */
187187 @Test
188 public void testToString() {
188 void testToString() {
189189 StringList entry1 = new StringList("1a", "1b");
190190
191191 Dictionary dictA = getCaseInsensitive();
201201 * Tests the lookup of tokens of different case.
202202 */
203203 @Test
204 public void testDifferentCaseLookup() {
204 void testDifferentCaseLookup() {
205205
206206 StringList entry1 = new StringList("1a", "1b");
207207 StringList entry2 = new StringList("1A", "1B");
210210
211211 dict.put(entry1);
212212
213 Assert.assertTrue(dict.contains(entry2));
213 Assertions.assertTrue(dict.contains(entry2));
214214 }
215215
216216 /**
217217 * Tests the lookup of tokens of different case.
218218 */
219219 @Test
220 public void testDifferentCaseLookupCaseSensitive() {
220 void testDifferentCaseLookupCaseSensitive() {
221221
222222 StringList entry1 = new StringList("1a", "1b");
223223 StringList entry2 = new StringList("1A", "1B");
226226
227227 dict.put(entry1);
228228
229 Assert.assertTrue(!dict.contains(entry2));
229 Assertions.assertTrue(!dict.contains(entry2));
230230 }
231231
232232 }
1818
1919 import java.util.Collections;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 public class BagOfWordsFeatureGeneratorTest {
2525
2626 @Test
27 public void testNull() {
27 void testNull() {
2828 BagOfWordsFeatureGenerator generator = new BagOfWordsFeatureGenerator();
2929 try {
3030 generator.extractFeatures(null, Collections.emptyMap());
31 Assert.fail("NullPointerException must be thrown");
32 }
33 catch (NullPointerException expected) {
31 Assertions.fail("NullPointerException must be thrown");
32 } catch (NullPointerException expected) {
3433 }
3534 }
3635
3736 @Test
38 public void testEmpty() {
37 void testEmpty() {
3938 BagOfWordsFeatureGenerator generator = new BagOfWordsFeatureGenerator();
4039
41 Assert.assertEquals(0, generator.extractFeatures(new String[]{}, Collections.emptyMap()).size());
40 Assertions.assertEquals(0, generator.extractFeatures(new String[] {}, Collections.emptyMap()).size());
4241 }
4342
4443 @Test
45 public void testUseAllTokens() {
44 void testUseAllTokens() {
4645 BagOfWordsFeatureGenerator generator = new BagOfWordsFeatureGenerator();
4746
48 Assert.assertArrayEquals(new String[]{"bow=it", "bow=is", "bow=12.345", "bow=feet", "bow=long"},
49 generator.extractFeatures(new String[]{"it", "is", "12.345", "feet", "long"},
47 Assertions.assertArrayEquals(new String[] {"bow=it", "bow=is", "bow=12.345", "bow=feet", "bow=long"},
48 generator.extractFeatures(new String[] {"it", "is", "12.345", "feet", "long"},
5049 Collections.emptyMap()).toArray());
5150 }
5251
5352 @Test
54 public void testOnlyLetterTokens() {
53 void testOnlyLetterTokens() {
5554 BagOfWordsFeatureGenerator generator = new BagOfWordsFeatureGenerator(true);
5655
57 Assert.assertArrayEquals(new String[]{"bow=it", "bow=is", "bow=feet", "bow=long"},
58 generator.extractFeatures(new String[]{"it", "is", "12.345", "feet", "long"},
59 Collections.emptyMap()).toArray());
56 Assertions.assertArrayEquals(new String[] {"bow=it", "bow=is", "bow=feet", "bow=long"},
57 generator.extractFeatures(new String[] {"it", "is", "12.345", "feet", "long"},
58 Collections.emptyMap()).toArray());
6059 }
6160 }
2121 import java.io.IOException;
2222 import java.nio.charset.StandardCharsets;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.formats.ResourceAsStreamFactory;
2828 import opennlp.tools.util.InputStreamFactory;
5555 }
5656
5757 @Test
58 public void testDefault() throws IOException {
58 void testDefault() throws IOException {
5959 DoccatModel model = train();
6060
61 Assert.assertNotNull(model);
61 Assertions.assertNotNull(model);
6262
6363 ByteArrayOutputStream out = new ByteArrayOutputStream();
6464 model.serialize(out);
6868
6969 DoccatFactory factory = fromSerialized.getFactory();
7070
71 Assert.assertNotNull(factory);
71 Assertions.assertNotNull(factory);
7272
73 Assert.assertEquals(1, factory.getFeatureGenerators().length);
74 Assert.assertEquals(BagOfWordsFeatureGenerator.class,
73 Assertions.assertEquals(1, factory.getFeatureGenerators().length);
74 Assertions.assertEquals(BagOfWordsFeatureGenerator.class,
7575 factory.getFeatureGenerators()[0].getClass());
7676
7777 }
7878
7979 @Test
80 public void testCustom() throws IOException {
81 FeatureGenerator[] featureGenerators = { new BagOfWordsFeatureGenerator(),
82 new NGramFeatureGenerator(), new NGramFeatureGenerator(2,3) };
80 void testCustom() throws IOException {
81 FeatureGenerator[] featureGenerators = {new BagOfWordsFeatureGenerator(),
82 new NGramFeatureGenerator(), new NGramFeatureGenerator(2, 3)};
8383
8484 DoccatFactory factory = new DoccatFactory(featureGenerators);
8585
8686 DoccatModel model = train(factory);
8787
88 Assert.assertNotNull(model);
88 Assertions.assertNotNull(model);
8989
9090 ByteArrayOutputStream out = new ByteArrayOutputStream();
9191 model.serialize(out);
9595
9696 factory = fromSerialized.getFactory();
9797
98 Assert.assertNotNull(factory);
98 Assertions.assertNotNull(factory);
9999
100 Assert.assertEquals(3, factory.getFeatureGenerators().length);
101 Assert.assertEquals(BagOfWordsFeatureGenerator.class,
100 Assertions.assertEquals(3, factory.getFeatureGenerators().length);
101 Assertions.assertEquals(BagOfWordsFeatureGenerator.class,
102102 factory.getFeatureGenerators()[0].getClass());
103 Assert.assertEquals(NGramFeatureGenerator.class,
103 Assertions.assertEquals(NGramFeatureGenerator.class,
104104 factory.getFeatureGenerators()[1].getClass());
105 Assert.assertEquals(NGramFeatureGenerator.class,factory.getFeatureGenerators()[2].getClass());
105 Assertions.assertEquals(NGramFeatureGenerator.class, factory.getFeatureGenerators()[2].getClass());
106106 }
107107
108108 }
2020 import java.util.Set;
2121 import java.util.SortedMap;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.util.InsufficientTrainingDataException;
2727 import opennlp.tools.util.ObjectStream;
3131 public class DocumentCategorizerMETest {
3232
3333 @Test
34 public void testSimpleTraining() throws IOException {
34 void testSimpleTraining() throws IOException {
3535
3636 ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream(
37 new DocumentSample("1", new String[]{"a", "b", "c"}),
38 new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}),
39 new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}),
40 new DocumentSample("0", new String[]{"x", "y", "z"}),
41 new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}),
42 new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}));
37 new DocumentSample("1", new String[] {"a", "b", "c"}),
38 new DocumentSample("1", new String[] {"a", "b", "c", "1", "2"}),
39 new DocumentSample("1", new String[] {"a", "b", "c", "3", "4"}),
40 new DocumentSample("0", new String[] {"x", "y", "z"}),
41 new DocumentSample("0", new String[] {"x", "y", "z", "5", "6"}),
42 new DocumentSample("0", new String[] {"x", "y", "z", "7", "8"}));
4343
4444 TrainingParameters params = new TrainingParameters();
4545 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
4646 params.put(TrainingParameters.CUTOFF_PARAM, 0);
4747
4848 DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
49 params, new DoccatFactory());
49 params, new DoccatFactory());
5050
5151 DocumentCategorizer doccat = new DocumentCategorizerME(model);
5252
53 double[] aProbs = doccat.categorize(new String[]{"a"});
54 Assert.assertEquals("1", doccat.getBestCategory(aProbs));
53 double[] aProbs = doccat.categorize(new String[] {"a"});
54 Assertions.assertEquals("1", doccat.getBestCategory(aProbs));
5555
56 double[] bProbs = doccat.categorize(new String[]{"x"});
57 Assert.assertEquals("0", doccat.getBestCategory(bProbs));
56 double[] bProbs = doccat.categorize(new String[] {"x"});
57 Assertions.assertEquals("0", doccat.getBestCategory(bProbs));
5858
5959 //test to make sure sorted map's last key is cat 1 because it has the highest score.
60 SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"});
60 SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[] {"a"});
6161 Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey());
62 Assert.assertEquals(1, cat.size());
62 Assertions.assertEquals(1, cat.size());
6363 }
64
65 @Test(expected = InsufficientTrainingDataException.class)
66 public void insufficientTestData() throws IOException {
6764
68 ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream(
69 new DocumentSample("1", new String[]{"a", "b", "c"}));
65 @Test
66 void insufficientTestData() {
7067
71 TrainingParameters params = new TrainingParameters();
72 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
73 params.put(TrainingParameters.CUTOFF_PARAM, 0);
68 Assertions.assertThrows(InsufficientTrainingDataException.class, () -> {
7469
75 DocumentCategorizerME.train("x-unspecified", samples,
76 params, new DoccatFactory());
70 ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream(
71 new DocumentSample("1", new String[] {"a", "b", "c"}));
72
73 TrainingParameters params = new TrainingParameters();
74 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
75 params.put(TrainingParameters.CUTOFF_PARAM, 0);
76
77 DocumentCategorizerME.train("x-unspecified", samples,
78 params, new DoccatFactory());
79
80 });
81
7782
7883 }
79
84
8085 }
2020 import java.util.Set;
2121 import java.util.SortedMap;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.ml.AbstractTrainer;
2727 import opennlp.tools.ml.naivebayes.NaiveBayesTrainer;
3232 public class DocumentCategorizerNBTest {
3333
3434 @Test
35 public void testSimpleTraining() throws IOException {
35 void testSimpleTraining() throws IOException {
3636
3737 ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream(
38 new DocumentSample("1", new String[]{"a", "b", "c"}),
39 new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}),
40 new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}),
41 new DocumentSample("0", new String[]{"x", "y", "z"}),
42 new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}),
43 new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}));
38 new DocumentSample("1", new String[] {"a", "b", "c"}),
39 new DocumentSample("1", new String[] {"a", "b", "c", "1", "2"}),
40 new DocumentSample("1", new String[] {"a", "b", "c", "3", "4"}),
41 new DocumentSample("0", new String[] {"x", "y", "z"}),
42 new DocumentSample("0", new String[] {"x", "y", "z", "5", "6"}),
43 new DocumentSample("0", new String[] {"x", "y", "z", "7", "8"}));
4444
4545 TrainingParameters params = new TrainingParameters();
4646 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
5252
5353 DocumentCategorizer doccat = new DocumentCategorizerME(model);
5454
55 double[] aProbs = doccat.categorize(new String[]{"a"});
56 Assert.assertEquals("1", doccat.getBestCategory(aProbs));
55 double[] aProbs = doccat.categorize(new String[] {"a"});
56 Assertions.assertEquals("1", doccat.getBestCategory(aProbs));
5757
58 double[] bProbs = doccat.categorize(new String[]{"x"});
59 Assert.assertEquals("0", doccat.getBestCategory(bProbs));
58 double[] bProbs = doccat.categorize(new String[] {"x"});
59 Assertions.assertEquals("0", doccat.getBestCategory(bProbs));
6060
6161 //test to make sure sorted map's last key is cat 1 because it has the highest score.
62 SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"});
62 SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[] {"a"});
6363 Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey());
64 Assert.assertEquals(1, cat.size());
64 Assertions.assertEquals(1, cat.size());
6565
6666 }
6767 }
2424 import java.io.ObjectOutput;
2525 import java.io.ObjectOutputStream;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 public class DocumentSampleTest {
3131
3232 @Test
33 public void testEquals() {
34 Assert.assertFalse(createGoldSample() == createGoldSample());
35 Assert.assertTrue(createGoldSample().equals(createGoldSample()));
36 Assert.assertFalse(createPredSample().equals(createGoldSample()));
37 Assert.assertFalse(createPredSample().equals(new Object()));
33 void testEquals() {
34 Assertions.assertFalse(createGoldSample() == createGoldSample());
35 Assertions.assertTrue(createGoldSample().equals(createGoldSample()));
36 Assertions.assertFalse(createPredSample().equals(createGoldSample()));
37 Assertions.assertFalse(createPredSample().equals(new Object()));
3838 }
3939
4040 @Test
41 public void testDocumentSampleSerDe() throws IOException {
41 void testDocumentSampleSerDe() throws IOException {
4242 DocumentSample documentSample = createGoldSample();
4343 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
4444 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
5656 // do nothing
5757 }
5858
59 Assert.assertNotNull(deSerializedDocumentSample);
60 Assert.assertEquals(documentSample.getCategory(), deSerializedDocumentSample.getCategory());
61 Assert.assertArrayEquals(documentSample.getText(), deSerializedDocumentSample.getText());
59 Assertions.assertNotNull(deSerializedDocumentSample);
60 Assertions.assertEquals(documentSample.getCategory(), deSerializedDocumentSample.getCategory());
61 Assertions.assertArrayEquals(documentSample.getText(), deSerializedDocumentSample.getText());
6262 }
6363
6464 public static DocumentSample createGoldSample() {
65 return new DocumentSample("aCategory", new String[]{"a", "small", "text"});
65 return new DocumentSample("aCategory", new String[] {"a", "small", "text"});
6666 }
6767
6868 public static DocumentSample createPredSample() {
69 return new DocumentSample("anotherCategory", new String[]{"a", "small", "text"});
69 return new DocumentSample("anotherCategory", new String[] {"a", "small", "text"});
7070 }
7171
7272 }
1818
1919 import java.util.Collections;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.util.InvalidFormatException;
2525
2626 public class NGramFeatureGeneratorTest {
2727
28 static final String[] TOKENS = new String[]{"a", "b", "c", "d", "e", "f", "g"};
28 static final String[] TOKENS = new String[] {"a", "b", "c", "d", "e", "f", "g"};
2929
3030 @Test
31 public void testNull() throws Exception {
31 void testNull() throws Exception {
3232 NGramFeatureGenerator generator = new NGramFeatureGenerator();
3333 try {
3434 generator.extractFeatures(null, Collections.emptyMap());
35 Assert.fail("NullPointerException must be thrown");
36 }
37 catch (NullPointerException expected) {
35 Assertions.fail("NullPointerException must be thrown");
36 } catch (NullPointerException expected) {
3837 }
3938 }
4039
4140 @Test
42 public void testEmpty() throws Exception {
41 void testEmpty() throws Exception {
4342 NGramFeatureGenerator generator = new NGramFeatureGenerator();
4443
45 Assert.assertEquals(0, generator.extractFeatures(new String[]{}, Collections.emptyMap()).size());
44 Assertions.assertEquals(0, generator.extractFeatures(new String[] {}, Collections.emptyMap()).size());
4645 }
4746
4847 @Test
49 public void testInvalidGramSize1() {
48 void testInvalidGramSize1() {
5049 try {
5150 new NGramFeatureGenerator(0, 1);
52 Assert.fail("InvalidFormatException must be thrown");
53 }
54 catch (InvalidFormatException expected) {
51 Assertions.fail("InvalidFormatException must be thrown");
52 } catch (InvalidFormatException expected) {
5553 }
5654 }
5755
5856 @Test
59 public void testInvalidGramSize2() {
57 void testInvalidGramSize2() {
6058 try {
6159 new NGramFeatureGenerator(2, 1);
62 Assert.fail("InvalidFormatException must be thrown");
63 }
64 catch (InvalidFormatException expected) {
60 Assertions.fail("InvalidFormatException must be thrown");
61 } catch (InvalidFormatException expected) {
6562 }
6663 }
6764
6865 @Test
69 public void testUnigram() throws Exception {
66 void testUnigram() throws Exception {
7067 NGramFeatureGenerator generator = new NGramFeatureGenerator(1, 1);
7168
72 Assert.assertArrayEquals(
73 new String[]{"ng=:a", "ng=:b", "ng=:c", "ng=:d", "ng=:e", "ng=:f", "ng=:g"},
69 Assertions.assertArrayEquals(
70 new String[] {"ng=:a", "ng=:b", "ng=:c", "ng=:d", "ng=:e", "ng=:f", "ng=:g"},
7471 generator.extractFeatures(TOKENS, Collections.emptyMap()).toArray());
7572 }
7673
7774 @Test
78 public void testBigram() throws Exception {
75 void testBigram() throws Exception {
7976 NGramFeatureGenerator generator = new NGramFeatureGenerator(2, 2);
8077
81 Assert.assertArrayEquals(
82 new String[]{"ng=:a:b", "ng=:b:c", "ng=:c:d", "ng=:d:e", "ng=:e:f", "ng=:f:g"},
78 Assertions.assertArrayEquals(
79 new String[] {"ng=:a:b", "ng=:b:c", "ng=:c:d", "ng=:d:e", "ng=:e:f", "ng=:f:g"},
8380 generator.extractFeatures(TOKENS, Collections.emptyMap()).toArray());
8481 }
8582
8683 @Test
87 public void testTrigram() throws Exception {
84 void testTrigram() throws Exception {
8885 NGramFeatureGenerator generator = new NGramFeatureGenerator(3, 3);
8986
90 Assert.assertArrayEquals(
91 new String[]{"ng=:a:b:c", "ng=:b:c:d", "ng=:c:d:e", "ng=:d:e:f", "ng=:e:f:g"},
87 Assertions.assertArrayEquals(
88 new String[] {"ng=:a:b:c", "ng=:b:c:d", "ng=:c:d:e", "ng=:d:e:f", "ng=:e:f:g"},
9289 generator.extractFeatures(TOKENS, Collections.emptyMap()).toArray());
9390 }
9491
9592 @Test
96 public void test12gram() throws Exception {
93 void test12gram() throws Exception {
9794 NGramFeatureGenerator generator = new NGramFeatureGenerator(1, 2);
9895
99 Assert.assertArrayEquals(
100 new String[]{
101 "ng=:a", "ng=:a:b",
102 "ng=:b", "ng=:b:c",
103 "ng=:c", "ng=:c:d",
104 "ng=:d", "ng=:d:e",
105 "ng=:e", "ng=:e:f",
106 "ng=:f", "ng=:f:g",
107 "ng=:g"
108 },
96 Assertions.assertArrayEquals(
97 new String[] {
98 "ng=:a", "ng=:a:b",
99 "ng=:b", "ng=:b:c",
100 "ng=:c", "ng=:c:d",
101 "ng=:d", "ng=:d:e",
102 "ng=:e", "ng=:e:f",
103 "ng=:f", "ng=:f:g",
104 "ng=:g"
105 },
109106 generator.extractFeatures(TOKENS, Collections.emptyMap()).toArray());
110107 }
111108
112109 @Test
113 public void test13gram() throws Exception {
110 void test13gram() throws Exception {
114111 NGramFeatureGenerator generator = new NGramFeatureGenerator(1, 3);
115112
116 Assert.assertArrayEquals(
117 new String[]{
118 "ng=:a", "ng=:a:b", "ng=:a:b:c",
119 "ng=:b", "ng=:b:c", "ng=:b:c:d",
120 "ng=:c", "ng=:c:d", "ng=:c:d:e",
121 "ng=:d", "ng=:d:e", "ng=:d:e:f",
122 "ng=:e", "ng=:e:f", "ng=:e:f:g",
123 "ng=:f", "ng=:f:g",
124 "ng=:g"
125 },
113 Assertions.assertArrayEquals(
114 new String[] {
115 "ng=:a", "ng=:a:b", "ng=:a:b:c",
116 "ng=:b", "ng=:b:c", "ng=:b:c:d",
117 "ng=:c", "ng=:c:d", "ng=:c:d:e",
118 "ng=:d", "ng=:d:e", "ng=:d:e:f",
119 "ng=:e", "ng=:e:f", "ng=:e:f:g",
120 "ng=:f", "ng=:f:g",
121 "ng=:g"
122 },
126123 generator.extractFeatures(TOKENS, Collections.emptyMap()).toArray());
127124 }
128125 }
2828 import java.util.List;
2929 import java.util.stream.Collectors;
3030
31 import org.junit.Assert;
31 import org.junit.jupiter.api.Assertions;
3232
3333 import opennlp.tools.ml.maxent.quasinewton.QNTrainer;
3434 import opennlp.tools.ml.naivebayes.NaiveBayesTrainer;
5454
5555 samples.close();
5656
57 Assert.assertEquals(checksum, new BigInteger(1, digest.digest()));
57 Assertions.assertEquals(checksum, new BigInteger(1, digest.digest()));
5858
5959 }
60
60
6161 public static void verifyFileChecksum(Path file, BigInteger checksum) throws Exception {
6262
6363 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
7070 }
7171 }
7272
73 Assert.assertEquals(checksum, new BigInteger(1, digest.digest()));
73 Assertions.assertEquals(checksum, new BigInteger(1, digest.digest()));
7474 }
75
75
7676 public static void verifyDirectoryChecksum(Path path, String extension, BigInteger checksum)
7777 throws Exception {
7878
7979 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
80
80
8181 final List<Path> paths = Files.walk(path)
8282 .filter(Files::isRegularFile)
8383 .filter(p -> p.toString().endsWith(extension))
8686 // Ensure the paths are in a consistent order when
8787 // verifying the file checksums.
8888 Collections.sort(paths);
89
89
9090 for (Path p : paths) {
9191 try (InputStream in = Files.newInputStream(p)) {
9292 byte[] buf = new byte[65536];
9797 }
9898 }
9999
100 Assert.assertEquals(checksum, new BigInteger(1, digest.digest()));
101 }
100 Assertions.assertEquals(checksum, new BigInteger(1, digest.digest()));
101 }
102102
103103 public static File getOpennlpDataDir() throws FileNotFoundException {
104104 final String dataDirectory = System.getProperty("OPENNLP_DATA_DIR");
2121 import java.math.BigInteger;
2222 import java.nio.charset.StandardCharsets;
2323
24 import org.junit.Assert;
25 import org.junit.BeforeClass;
26 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeAll;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.chunker.ChunkerCrossValidator;
2929 import opennlp.tools.chunker.ChunkerFactory;
7474 return new PlainTextByLineStream(new MarkableFileInputStreamFactory(
7575 new File(getOpennlpDataDir(), corpus)), StandardCharsets.ISO_8859_1);
7676 }
77
78 @BeforeClass
79 public static void verifyTrainingData() throws Exception {
77
78 @BeforeAll
79 static void verifyTrainingData() throws Exception {
8080
8181 verifyTrainingData(new ADSentenceSampleStream(getLineSample(BOSQUE), false),
8282 new BigInteger("140568367548727787313497336739085858596"));
8383
8484 verifyTrainingData(new ADSentenceSampleStream(getLineSample(FLORESTA_VIRGEM), false),
8585 new BigInteger("2614161133949079191933514776652602918"));
86
86
8787 }
8888
8989 private void sentenceCrossEval(TrainingParameters params,
90 double expectedScore) throws IOException {
90 double expectedScore) throws IOException {
9191
9292 ADSentenceSampleStream samples = new ADSentenceSampleStream(
9393 getLineSample(FLORESTA_VIRGEM), false);
9999 cv.evaluate(samples, 10);
100100
101101 System.out.println(cv.getFMeasure());
102 Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d);
102 Assertions.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d);
103103 }
104104
105105 private void tokenizerCrossEval(TrainingParameters params,
106 double expectedScore) throws IOException {
106 double expectedScore) throws IOException {
107107
108108 ObjectStream<NameSample> nameSamples = new ADNameSampleStream(
109109 getLineSample(FLORESTA_VIRGEM), true);
124124 validator.evaluate(samples, 10);
125125
126126 System.out.println(validator.getFMeasure());
127 Assert.assertEquals(expectedScore, validator.getFMeasure().getFMeasure(),
128 0.0001d);
127 Assertions.assertEquals(expectedScore, validator.getFMeasure().getFMeasure(), 0.0001d);
129128 }
130129
131130 private void chunkerCrossEval(TrainingParameters params,
132 double expectedScore) throws IOException {
131 double expectedScore) throws IOException {
133132
134133 ADChunkSampleStream samples = new ADChunkSampleStream(getLineSample(BOSQUE));
135134
137136 new ChunkerFactory());
138137
139138 cv.evaluate(samples, 10);
140 Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d);
141 }
142
143 @Test
144 public void evalPortugueseSentenceDetectorPerceptron() throws IOException {
139 Assertions.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d);
140 }
141
142 @Test
143 void evalPortugueseSentenceDetectorPerceptron() throws IOException {
145144 sentenceCrossEval(createPerceptronParams(), 0.9892778840089301d);
146145 }
147146
148147 @Test
149 public void evalPortugueseSentenceDetectorGis() throws IOException {
148 void evalPortugueseSentenceDetectorGis() throws IOException {
150149 sentenceCrossEval(ModelUtil.createDefaultTrainingParameters(), 0.987270070655111d);
151150 }
152151
153152 @Test
154 public void evalPortugueseSentenceDetectorMaxentQn() throws IOException {
153 void evalPortugueseSentenceDetectorMaxentQn() throws IOException {
155154 sentenceCrossEval(createMaxentQnParams(), 0.9924715809679968d);
156155 }
157156
158157 @Test
159 public void evalPortugueseSentenceDetectorNaiveBayes() throws IOException {
158 void evalPortugueseSentenceDetectorNaiveBayes() throws IOException {
160159 sentenceCrossEval(createNaiveBayesParams(), 0.9672196206048099d);
161160 }
162161
163162 @Test
164 public void evalPortugueseTokenizerPerceptron() throws IOException {
163 void evalPortugueseTokenizerPerceptron() throws IOException {
165164 tokenizerCrossEval(createPerceptronParams(), 0.9994887308380267d);
166165 }
167166
168167 @Test
169 public void evalPortugueseTokenizerGis() throws IOException {
168 void evalPortugueseTokenizerGis() throws IOException {
170169 tokenizerCrossEval(ModelUtil.createDefaultTrainingParameters(), 0.9992539405481062d);
171170 }
172171
173172 @Test
174 public void evalPortugueseTokenizerMaxentQn() throws IOException {
173 void evalPortugueseTokenizerMaxentQn() throws IOException {
175174 tokenizerCrossEval(createMaxentQnParams(), 0.9996017148748251d);
176175 }
177176
178177 @Test
179 public void evalPortugueseTokenizerNaiveBayes() throws IOException {
178 void evalPortugueseTokenizerNaiveBayes() throws IOException {
180179 tokenizerCrossEval(createNaiveBayesParams(), 0.9962358244502717d);
181180 }
182181
183182 @Test
184 public void evalPortugueseTokenizerMaxentQnMultipleThreads() throws IOException {
183 void evalPortugueseTokenizerMaxentQnMultipleThreads() throws IOException {
185184 TrainingParameters params = createMaxentQnParams();
186185 params.put("Threads", 4);
187186 tokenizerCrossEval(params, 0.9996017148748251d);
188187 }
189188
190189 @Test
191 public void evalPortugueseChunkerPerceptron() throws IOException {
190 void evalPortugueseChunkerPerceptron() throws IOException {
192191 chunkerCrossEval(createPerceptronParams(),
193192 0.9638122825015589d);
194193 }
195194
196195 @Test
197 public void evalPortugueseChunkerGis() throws IOException {
196 void evalPortugueseChunkerGis() throws IOException {
198197 chunkerCrossEval(ModelUtil.createDefaultTrainingParameters(),
199198 0.9573860781121228d);
200199 }
201200
202201 @Test
203 public void evalPortugueseChunkerGisMultipleThreads() throws IOException {
202 void evalPortugueseChunkerGisMultipleThreads() throws IOException {
204203 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
205204 params.put("Threads", 4);
206205 chunkerCrossEval(params, 0.9573860781121228d);
207206 }
208207
209208 @Test
210 public void evalPortugueseChunkerQn() throws IOException {
209 void evalPortugueseChunkerQn() throws IOException {
211210 chunkerCrossEval(createMaxentQnParams(),
212211 0.9648211936491359d);
213212 }
214213
215214 @Test
216 public void evalPortugueseChunkerQnMultipleThreads() throws IOException {
215 void evalPortugueseChunkerQnMultipleThreads() throws IOException {
217216 TrainingParameters params = createMaxentQnParams();
218217 params.put("Threads", 4);
219218
222221 }
223222
224223 @Test
225 public void evalPortugueseChunkerNaiveBayes() throws IOException {
224 void evalPortugueseChunkerNaiveBayes() throws IOException {
226225 chunkerCrossEval(createNaiveBayesParams(), 0.9041507736043933d);
227226 }
228227 }
2121 import java.math.BigInteger;
2222 import java.nio.charset.StandardCharsets;
2323
24 import org.junit.Assert;
25 import org.junit.BeforeClass;
26 import org.junit.Test;
27 import org.junit.experimental.categories.Category;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeAll;
26 import org.junit.jupiter.api.Test;
2827
2928 import opennlp.tools.HighMemoryUsage;
3029 import opennlp.tools.chunker.ChunkSample;
4847 */
4948 public class Conll00ChunkerEval extends AbstractEvalTest {
5049
51 private static File TEST_DATA_FILE;
50 private static File TEST_DATA_FILE;
5251 private static File TRAIN_DATA_FILE;
53
52
5453 private static ChunkerModel train(File trainFile, TrainingParameters params)
5554 throws IOException {
5655
6968
7069 ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model));
7170 evaluator.evaluate(samples);
72 Assert.assertEquals(expectedFMeasure,
73 evaluator.getFMeasure().getFMeasure(), 0.0001);
71 Assertions.assertEquals(expectedFMeasure, evaluator.getFMeasure().getFMeasure(), 0.0001);
7472 }
75
76 @BeforeClass
77 public static void verifyTrainingData() throws Exception {
78
73
74 @BeforeAll
75 static void verifyTrainingData() throws Exception {
76
7977 TEST_DATA_FILE = new File(getOpennlpDataDir(), "conll00/test.txt");
8078 TRAIN_DATA_FILE = new File(getOpennlpDataDir(), "conll00/train.txt");
8179
8280 verifyTrainingData(new ChunkSampleStream(
8381 new PlainTextByLineStream(new MarkableFileInputStreamFactory(TEST_DATA_FILE),
84 StandardCharsets.UTF_8)),
82 StandardCharsets.UTF_8)),
8583 new BigInteger("84610235226433393380477662908529306002"));
8684
8785 verifyTrainingData(new ChunkSampleStream(
8886 new PlainTextByLineStream(new MarkableFileInputStreamFactory(TEST_DATA_FILE),
89 StandardCharsets.UTF_8)),
90 new BigInteger("84610235226433393380477662908529306002"));
87 StandardCharsets.UTF_8)),
88 new BigInteger("84610235226433393380477662908529306002"));
9189
9290 }
9391
9492 @Test
95 public void evalEnglishPerceptron() throws IOException {
93 void evalEnglishPerceptron() throws IOException {
9694 ChunkerModel maxentModel = train(TRAIN_DATA_FILE, createPerceptronParams());
9795
9896 eval(maxentModel, TEST_DATA_FILE, 0.9295018353434714d);
9997 }
10098
10199 @Test
102 public void evalEnglishMaxentGis() throws IOException {
100 void evalEnglishMaxentGis() throws IOException {
103101 ChunkerModel maxentModel = train(TRAIN_DATA_FILE, ModelUtil.createDefaultTrainingParameters());
104102
105103 eval(maxentModel, TEST_DATA_FILE, 0.9239687473746113d);
107105
108106 // Note: Don't try to run this on your MacBook
109107 @Test
110 @Category(HighMemoryUsage.class)
111 public void evalEnglishMaxentQn() throws IOException {
108 @HighMemoryUsage
109 void evalEnglishMaxentQn() throws IOException {
112110 TrainingParameters params = createMaxentQnParams();
113111 params.put("Threads", 4);
114112 ChunkerModel maxentModel = train(TRAIN_DATA_FILE, params);
2121
2222 import java.math.BigInteger;
2323
24 import org.junit.Assert;
25 import org.junit.BeforeClass;
26 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeAll;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.formats.Conll02NameSampleStream;
2929 import opennlp.tools.formats.Conll02NameSampleStream.LANGUAGE;
5858 private static File spanishTrainingFile;
5959 private static File spanishTestAFile;
6060 private static File spanishTestBFile;
61
61
6262
6363 private TokenNameFinderModel train(File trainFile, LANGUAGE lang,
64 TrainingParameters params, int types) throws IOException {
64 TrainingParameters params, int types) throws IOException {
6565
6666 ObjectStream<NameSample> samples = new Conll02NameSampleStream(
67 lang,new MarkableFileInputStreamFactory(trainFile), types);
68
69 return NameFinderME.train(lang.toString().toLowerCase(), null, samples,
67 lang, new MarkableFileInputStreamFactory(trainFile), types);
68
69 return NameFinderME.train(lang.toString().toLowerCase(), null, samples,
7070 params, new TokenNameFinderFactory());
7171 }
7272
7373 private void eval(TokenNameFinderModel model, File testData, LANGUAGE lang,
74 int types, double expectedFMeasure) throws IOException {
74 int types, double expectedFMeasure) throws IOException {
7575
7676 ObjectStream<NameSample> samples = new Conll02NameSampleStream(
7777 lang, new MarkableFileInputStreamFactory(testData), types);
7979 TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(new NameFinderME(model));
8080 evaluator.evaluate(samples);
8181
82 Assert.assertEquals(expectedFMeasure, evaluator.getFMeasure().getFMeasure(), 0.0001);
83 }
84
85 @BeforeClass
86 public static void verifyTrainingData() throws Exception {
82 Assertions.assertEquals(expectedFMeasure, evaluator.getFMeasure().getFMeasure(), 0.0001);
83 }
84
85 @BeforeAll
86 static void verifyTrainingData() throws Exception {
8787
8888 dutchTrainingFile = new File(getOpennlpDataDir(), "conll02/ner/data/ned.train");
8989 dutchTestAFile = new File(getOpennlpDataDir(), "conll02/ner/data/ned.testa");
9191 spanishTrainingFile = new File(getOpennlpDataDir(), "conll02/ner/data/esp.train");
9292 spanishTestAFile = new File(getOpennlpDataDir(), "conll02/ner/data/esp.testa");
9393 spanishTestBFile = new File(getOpennlpDataDir(), "conll02/ner/data/esp.testb");
94
94
9595 verifyTrainingData(new Conll02NameSampleStream(
96 LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTrainingFile),
97 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
98 new BigInteger("109687424525847313767541246922170457976"));
96 LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTrainingFile),
97 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
98 new BigInteger("109687424525847313767541246922170457976"));
9999 verifyTrainingData(new Conll02NameSampleStream(
100 LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTestAFile),
101 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
102 new BigInteger("12942966701628852910737840182656846323"));
100 LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTestAFile),
101 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
102 new BigInteger("12942966701628852910737840182656846323"));
103103 verifyTrainingData(new Conll02NameSampleStream(
104 LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTestBFile),
105 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
106 new BigInteger("223206987942490952427646331013509976957"));
107
104 LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTestBFile),
105 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
106 new BigInteger("223206987942490952427646331013509976957"));
107
108108 verifyTrainingData(new Conll02NameSampleStream(
109 LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTrainingFile),
110 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
111 new BigInteger("226089384066775461905386060946810714487"));
109 LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTrainingFile),
110 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
111 new BigInteger("226089384066775461905386060946810714487"));
112112 verifyTrainingData(new Conll02NameSampleStream(
113 LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTestAFile),
114 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
115 new BigInteger("313879596837181728494732341737647284762"));
113 LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTestAFile),
114 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
115 new BigInteger("313879596837181728494732341737647284762"));
116116 verifyTrainingData(new Conll02NameSampleStream(
117 LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTestBFile),
118 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
119 new BigInteger("24037715705115461166858183817622459974"));
120
121 }
122
123 @Test
124 public void evalDutchPersonPerceptron() throws IOException {
117 LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTestBFile),
118 Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
119 new BigInteger("24037715705115461166858183817622459974"));
120
121 }
122
123 @Test
124 void evalDutchPersonPerceptron() throws IOException {
125125 TrainingParameters params = createPerceptronParams();
126126
127127 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
135135 }
136136
137137 @Test
138 public void evalDutchPersonMaxentGis() throws IOException {
138 void evalDutchPersonMaxentGis() throws IOException {
139139 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
140140
141141 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
149149 }
150150
151151 @Test
152 public void evalDutchPersonMaxentQn() throws IOException {
152 void evalDutchPersonMaxentQn() throws IOException {
153153 TrainingParameters params = createMaxentQnParams();
154154
155155 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
163163 }
164164
165165 @Test
166 public void evalDutchOrganizationPerceptron() throws IOException {
166 void evalDutchOrganizationPerceptron() throws IOException {
167167 TrainingParameters params = createPerceptronParams();
168168
169169 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
177177 }
178178
179179 @Test
180 public void evalDutchOrganizationMaxentGis() throws IOException {
180 void evalDutchOrganizationMaxentGis() throws IOException {
181181 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
182182
183183 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
191191 }
192192
193193 @Test
194 public void evalDutchOrganizationMaxentQn() throws IOException {
194 void evalDutchOrganizationMaxentQn() throws IOException {
195195 TrainingParameters params = createMaxentQnParams();
196196
197197 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
205205 }
206206
207207 @Test
208 public void evalDutchLocationPerceptron() throws IOException {
208 void evalDutchLocationPerceptron() throws IOException {
209209 TrainingParameters params = createPerceptronParams();
210210
211211 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
219219 }
220220
221221 @Test
222 public void evalDutchLocationMaxentGis() throws IOException {
222 void evalDutchLocationMaxentGis() throws IOException {
223223 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
224224
225225 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
233233 }
234234
235235 @Test
236 public void evalDutchLocationMaxentQn() throws IOException {
236 void evalDutchLocationMaxentQn() throws IOException {
237237 TrainingParameters params = createMaxentQnParams();
238238
239239 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
247247 }
248248
249249 @Test
250 public void evalDutchMiscPerceptron() throws IOException {
250 void evalDutchMiscPerceptron() throws IOException {
251251 TrainingParameters params = createPerceptronParams();
252252
253253 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
261261 }
262262
263263 @Test
264 public void evalDutchMiscMaxentGis() throws IOException {
264 void evalDutchMiscMaxentGis() throws IOException {
265265 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
266266
267267 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
275275 }
276276
277277 @Test
278 public void evalDutchMiscMaxentQn() throws IOException {
278 void evalDutchMiscMaxentQn() throws IOException {
279279 TrainingParameters params = createMaxentQnParams();
280280
281281 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
289289 }
290290
291291 @Test
292 public void evalDutchCombinedPerceptron() throws IOException {
292 void evalDutchCombinedPerceptron() throws IOException {
293293 TrainingParameters params = createPerceptronParams();
294294
295295 int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
300300 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
301301 combinedType);
302302
303 eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.727808326787117d);
303 eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.727808326787117d);
304304
305305 eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.7388253638253639d);
306306 }
307307
308308 @Test
309 public void evalDutchCombinedMaxentGis() throws IOException {
309 void evalDutchCombinedMaxentGis() throws IOException {
310310 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
311311
312312 int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
317317 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
318318 combinedType);
319319
320 eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6673209028459275d);
320 eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6673209028459275d);
321321
322322 eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.6984085910208306d);
323323 }
324324
325325 @Test
326 public void evalDutchCombinedMaxentQn() throws IOException {
326 void evalDutchCombinedMaxentQn() throws IOException {
327327 TrainingParameters params = createMaxentQnParams();
328328
329329 int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
334334 TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
335335 combinedType);
336336
337 eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6999800915787379d);
337 eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6999800915787379d);
338338
339339 eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.7101430258496261d);
340340 }
341341
342342 @Test
343 public void evalSpanishPersonPerceptron() throws IOException {
343 void evalSpanishPersonPerceptron() throws IOException {
344344 TrainingParameters params = createPerceptronParams();
345345
346346 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
354354 }
355355
356356 @Test
357 public void evalSpanishPersonMaxentGis() throws IOException {
357 void evalSpanishPersonMaxentGis() throws IOException {
358358 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
359359
360360 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
369369
370370
371371 @Test
372 public void evalSpanishPersonMaxentQn() throws IOException {
372 void evalSpanishPersonMaxentQn() throws IOException {
373373 TrainingParameters params = createMaxentQnParams();
374374
375375 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
383383 }
384384
385385 @Test
386 public void evalSpanishOrganizationPerceptron() throws IOException {
386 void evalSpanishOrganizationPerceptron() throws IOException {
387387 TrainingParameters params = createPerceptronParams();
388388
389389 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
397397 }
398398
399399 @Test
400 public void evalSpanishOrganizationMaxentGis() throws IOException {
400 void evalSpanishOrganizationMaxentGis() throws IOException {
401401 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
402402
403403 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
411411 }
412412
413413 @Test
414 public void evalSpanishOrganizationMaxentQn() throws IOException {
414 void evalSpanishOrganizationMaxentQn() throws IOException {
415415 TrainingParameters params = createMaxentQnParams();
416416
417417 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
425425 }
426426
427427 @Test
428 public void evalSpanishLocationPerceptron() throws IOException {
428 void evalSpanishLocationPerceptron() throws IOException {
429429 TrainingParameters params = createPerceptronParams();
430430
431431 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
439439 }
440440
441441 @Test
442 public void evalSpanishLocationMaxentGis() throws IOException {
442 void evalSpanishLocationMaxentGis() throws IOException {
443443 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
444444
445445 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
453453 }
454454
455455 @Test
456 public void evalSpanishLocationMaxentQn() throws IOException {
456 void evalSpanishLocationMaxentQn() throws IOException {
457457 TrainingParameters params = createMaxentQnParams();
458458
459459 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
467467 }
468468
469469 @Test
470 public void evalSpanishMiscPerceptron() throws IOException {
470 void evalSpanishMiscPerceptron() throws IOException {
471471 TrainingParameters params = createPerceptronParams();
472472
473473 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
481481 }
482482
483483 @Test
484 public void evalSpanishMiscMaxentGis() throws IOException {
484 void evalSpanishMiscMaxentGis() throws IOException {
485485 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
486486
487487 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
495495 }
496496
497497 @Test
498 public void evalSpanishMiscMaxentQn() throws IOException {
498 void evalSpanishMiscMaxentQn() throws IOException {
499499 TrainingParameters params = createMaxentQnParams();
500500
501501 TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
509509 }
510510
511511 @Test
512 public void evalSpanishCombinedPerceptron() throws IOException {
512 void evalSpanishCombinedPerceptron() throws IOException {
513513 TrainingParameters params = createPerceptronParams();
514514
515515 int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
526526 }
527527
528528 @Test
529 public void evalSpanishCombinedMaxentGis() throws IOException {
529 void evalSpanishCombinedMaxentGis() throws IOException {
530530 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
531531
532532 int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
543543 }
544544
545545 @Test
546 public void evalSpanishCombinedMaxentQn() throws IOException {
546 void evalSpanishCombinedMaxentQn() throws IOException {
547547 TrainingParameters params = createMaxentQnParams();
548548
549549 int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
2121 import java.math.BigInteger;
2222 import java.nio.charset.StandardCharsets;
2323
24 import org.junit.Assert;
25 import org.junit.BeforeClass;
26 import org.junit.Test;
27 import org.junit.experimental.categories.Category;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeAll;
26 import org.junit.jupiter.api.Test;
2827
2928 import opennlp.tools.HighMemoryUsage;
3029 import opennlp.tools.formats.ConllXPOSSampleStream;
5958 public class ConllXPosTaggerEval extends AbstractEvalTest {
6059
6160 private POSModel train(File trainFile, String lang,
62 TrainingParameters params) throws IOException {
61 TrainingParameters params) throws IOException {
6362
6463 ObjectStream<POSSample> samples =
6564 new ConllXPOSSampleStream(new MarkableFileInputStreamFactory(trainFile), StandardCharsets.UTF_8);
6867 }
6968
7069 private void eval(POSModel model, File testData,
71 double expectedAccuracy) throws IOException {
70 double expectedAccuracy) throws IOException {
7271
7372 ObjectStream<POSSample> samples = new ConllXPOSSampleStream(
7473 new MarkableFileInputStreamFactory(testData), StandardCharsets.UTF_8);
7675 POSEvaluator evaluator = new POSEvaluator(new POSTaggerME(model));
7776 evaluator.evaluate(samples);
7877
79 Assert.assertEquals(expectedAccuracy, evaluator.getWordAccuracy(), 0.0001);
80 }
81
82 @BeforeClass
83 public static void verifyTrainingData() throws Exception {
84
85 verifyTrainingData(new ConllXPOSSampleStream(
86 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
87 "conllx/data/danish/ddt/train/danish_ddt_train.conll")), StandardCharsets.UTF_8),
78 Assertions.assertEquals(expectedAccuracy, evaluator.getWordAccuracy(), 0.0001);
79 }
80
81 @BeforeAll
82 static void verifyTrainingData() throws Exception {
83
84 verifyTrainingData(new ConllXPOSSampleStream(
85 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
86 "conllx/data/danish/ddt/train/danish_ddt_train.conll")), StandardCharsets.UTF_8),
8887 new BigInteger("30795670444498617202001550516753630016"));
89
90 verifyTrainingData(new ConllXPOSSampleStream(
91 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
92 "conllx/data/danish/ddt/test/danish_ddt_test.conll")), StandardCharsets.UTF_8),
93 new BigInteger("314104267846430512372780024568104131337"));
94
95 verifyTrainingData(new ConllXPOSSampleStream(
96 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
97 "conllx/data/dutch/alpino/train/dutch_alpino_train.conll")), StandardCharsets.UTF_8),
98 new BigInteger("109328245573060521952850454797286933887"));
99
100 verifyTrainingData(new ConllXPOSSampleStream(
101 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
102 "conllx/data/dutch/alpino/test/dutch_alpino_test.conll")), StandardCharsets.UTF_8),
103 new BigInteger("132343141132816640849897155456916243039"));
104
105 verifyTrainingData(new ConllXPOSSampleStream(
106 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
107 "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll")), StandardCharsets.UTF_8),
108 new BigInteger("9504382474772307801979515927230835901"));
109
110 verifyTrainingData(new ConllXPOSSampleStream(
111 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
112 "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll")), StandardCharsets.UTF_8),
113 new BigInteger("175256039869578311901318972681191182910"));
114
115 verifyTrainingData(new ConllXPOSSampleStream(
116 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
117 "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll")), StandardCharsets.UTF_8),
118 new BigInteger("128378790384268106811747599235147991544"));
119
120 }
121
122 @Test
123 public void evalDanishMaxentGis() throws IOException {
88
89 verifyTrainingData(new ConllXPOSSampleStream(
90 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
91 "conllx/data/danish/ddt/test/danish_ddt_test.conll")), StandardCharsets.UTF_8),
92 new BigInteger("314104267846430512372780024568104131337"));
93
94 verifyTrainingData(new ConllXPOSSampleStream(
95 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
96 "conllx/data/dutch/alpino/train/dutch_alpino_train.conll")), StandardCharsets.UTF_8),
97 new BigInteger("109328245573060521952850454797286933887"));
98
99 verifyTrainingData(new ConllXPOSSampleStream(
100 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
101 "conllx/data/dutch/alpino/test/dutch_alpino_test.conll")),
102 StandardCharsets.UTF_8),
103 new BigInteger("132343141132816640849897155456916243039"));
104
105 verifyTrainingData(new ConllXPOSSampleStream(
106 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
107 "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll")),
108 StandardCharsets.UTF_8),
109 new BigInteger("9504382474772307801979515927230835901"));
110
111 verifyTrainingData(new ConllXPOSSampleStream(
112 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
113 "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll")),
114 StandardCharsets.UTF_8),
115 new BigInteger("175256039869578311901318972681191182910"));
116
117 verifyTrainingData(new ConllXPOSSampleStream(
118 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
119 "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll")),
120 StandardCharsets.UTF_8),
121 new BigInteger("128378790384268106811747599235147991544"));
122
123 }
124
125 @Test
126 void evalDanishMaxentGis() throws IOException {
124127 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
125128
126129 POSModel maxentModel = train(new File(getOpennlpDataDir(),
131134 }
132135
133136 @Test
134 public void evalDanishMaxentQn() throws IOException {
137 void evalDanishMaxentQn() throws IOException {
135138 TrainingParameters params = createMaxentQnParams();
136139
137140 POSModel maxentModel = train(new File(getOpennlpDataDir(),
142145 }
143146
144147 @Test
145 public void evalDutchMaxentGis() throws IOException {
148 void evalDutchMaxentGis() throws IOException {
146149 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
147150
148151 POSModel maxentModel = train(new File(getOpennlpDataDir(),
153156 }
154157
155158 @Test
156 @Category(HighMemoryUsage.class)
157 public void evalDutchMaxentQn() throws IOException {
159 @HighMemoryUsage
160 void evalDutchMaxentQn() throws IOException {
158161 TrainingParameters params = createMaxentQnParams();
159162
160163 POSModel maxentModel = train(new File(getOpennlpDataDir(),
165168 }
166169
167170 @Test
168 public void evalPortugueseMaxentGis() throws IOException {
171 void evalPortugueseMaxentGis() throws IOException {
169172 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
170173
171174 POSModel maxentModel = train(new File(getOpennlpDataDir(),
176179 }
177180
178181 @Test
179 public void evalPortugueseMaxentQn() throws IOException {
182 void evalPortugueseMaxentQn() throws IOException {
180183 TrainingParameters params = createMaxentQnParams();
181184
182185 POSModel maxentModel = train(new File(getOpennlpDataDir(),
187190 }
188191
189192 @Test
190 public void evalSwedishMaxentGis() throws IOException {
193 void evalSwedishMaxentGis() throws IOException {
191194 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
192195
193196 POSModel maxentModel = train(new File(getOpennlpDataDir(),
198201 }
199202
200203 @Test
201 public void evalSwedishMaxentQn() throws IOException {
204 void evalSwedishMaxentQn() throws IOException {
202205 TrainingParameters params = createMaxentQnParams();
203206
204207 POSModel maxentModel = train(new File(getOpennlpDataDir(),
2929 import java.nio.file.StandardCopyOption;
3030 import java.util.Map;
3131
32 import org.junit.Assert;
33 import org.junit.BeforeClass;
34 import org.junit.Test;
32 import org.junit.jupiter.api.Assertions;
33 import org.junit.jupiter.api.BeforeAll;
34 import org.junit.jupiter.api.Test;
3535
3636 import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
3737 import opennlp.tools.formats.DirectorySampleStream;
7979
8080 cv.evaluate(filteredSamples, 5);
8181
82 Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.001d);
82 Assertions.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.001d);
8383 }
8484 }
8585
86 @BeforeClass
87 public static void verifyTrainingData() throws Exception {
86 @BeforeAll
87 static void verifyTrainingData() throws Exception {
8888 verifyDirectoryChecksum(new File(getOpennlpDataDir(), "ontonotes4/data/files/data/english").toPath(),
8989 ".name", new BigInteger("74675117716526375898817028829433420680"));
9090 }
9191
9292 @Test
93 public void evalEnglishPersonNameFinder() throws IOException {
93 void evalEnglishPersonNameFinder() throws IOException {
9494 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
9595 params.put("Threads", "4");
9696 crossEval(params, "person", 0.822014580552418d);
9797 }
9898
9999 @Test
100 public void evalEnglishDateNameFinder() throws IOException {
100 void evalEnglishDateNameFinder() throws IOException {
101101 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
102102 params.put("Threads", "4");
103103 crossEval(params, "date", 0.8043873255040994d);
104104 }
105105
106106 @Test
107 public void evalAllTypesNameFinder() throws IOException {
107 void evalAllTypesNameFinder() throws IOException {
108108 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
109109 params.put("Threads", "4");
110110 crossEval(params, null, 0.8014054850253551d);
111111 }
112112
113113 @Test
114 public void evalAllTypesWithPOSNameFinder() throws IOException, URISyntaxException {
114 void evalAllTypesWithPOSNameFinder() throws IOException, URISyntaxException {
115115 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
116116 params.put("Threads", "4");
117117
135135 StandardCopyOption.REPLACE_EXISTING);
136136
137137 Map<String, Object> resources = TokenNameFinderTrainerTool.loadResources(resourcesPath.toFile(),
138 Paths.get(this.getClass().getResource("ner-en_pos-features.xml").toURI()).toFile());
138 Paths.get(this.getClass().getResource("ner-en_pos-features.xml").toURI()).toFile());
139139
140140 try (ObjectStream<NameSample> samples = createNameSampleStream()) {
141141
148148
149149 cv.evaluate(filteredSamples, 5);
150150
151 Assert.assertEquals(0.8070226153653437d, cv.getFMeasure().getFMeasure(), 0.001d);
151 Assertions.assertEquals(0.8070226153653437d, cv.getFMeasure().getFMeasure(), 0.001d);
152152 }
153153 }
154154 }
2323 import java.math.BigInteger;
2424 import java.nio.charset.StandardCharsets;
2525
26 import org.junit.Assert;
27 import org.junit.BeforeClass;
28 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.BeforeAll;
28 import org.junit.jupiter.api.Test;
2929
3030 import opennlp.tools.formats.DirectorySampleStream;
3131 import opennlp.tools.formats.convert.FileToStringSampleStream;
6464 ParserCrossValidator cv = new ParserCrossValidator("eng", params, rules, ParserType.CHUNKING);
6565 cv.evaluate(samples, 5);
6666
67 Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d);
67 Assertions.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d);
6868 }
6969 }
7070
71 @BeforeClass
72 public static void verifyTrainingData() throws Exception {
71 @BeforeAll
72 static void verifyTrainingData() throws Exception {
7373 verifyTrainingData(createParseSampleStream(), new BigInteger("83833369887442127665956850482411800415"));
7474 }
7575
7676 @Test
77 public void evalEnglishMaxent() throws IOException {
77 void evalEnglishMaxent() throws IOException {
7878
7979 HeadRules headRules;
8080 try (InputStream headRulesIn =
2222
2323 import java.nio.charset.StandardCharsets;
2424
25 import org.junit.Assert;
26 import org.junit.BeforeClass;
27 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.BeforeAll;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.tools.formats.DirectorySampleStream;
3030 import opennlp.tools.formats.convert.FileToStringSampleStream;
6262 POSTaggerCrossValidator cv = new POSTaggerCrossValidator("eng", params, new POSTaggerFactory());
6363 cv.evaluate(samples, 5);
6464
65 Assert.assertEquals(expectedScore, cv.getWordAccuracy(), 0.0001d);
65 Assertions.assertEquals(expectedScore, cv.getWordAccuracy(), 0.0001d);
6666 }
6767 }
6868
69 @BeforeClass
70 public static void verifyTrainingData() throws Exception {
69 @BeforeAll
70 static void verifyTrainingData() throws Exception {
7171 verifyTrainingData(createPOSSampleStream(), new BigInteger("300430765214895870888056958221353356972"));
7272 }
73
73
7474 @Test
75 public void evalEnglishMaxentTagger() throws IOException {
75 void evalEnglishMaxentTagger() throws IOException {
7676 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
7777 params.put("Threads", "4");
7878
2727 import java.util.List;
2828 import java.util.Objects;
2929
30 import org.junit.Assert;
31 import org.junit.BeforeClass;
32 import org.junit.Test;
30 import org.junit.jupiter.api.Assertions;
31 import org.junit.jupiter.api.BeforeAll;
32 import org.junit.jupiter.api.Test;
3333
3434 import opennlp.tools.chunker.Chunker;
3535 import opennlp.tools.chunker.ChunkerME;
124124 private final Tokenizer tokenizer;
125125
126126 private LeipzigTestSampleStream(int sentencePerDocument, Tokenizer tokenizer, InputStreamFactory in)
127 throws IOException {
127 throws IOException {
128128 super(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
129129 this.sentencePerDocument = sentencePerDocument;
130130 this.tokenizer = tokenizer;
158158 }
159159 }
160160
161 @BeforeClass
162 public static void verifyTrainingData() throws Exception {
161 @BeforeAll
162 static void verifyTrainingData() throws Exception {
163163 verifyTrainingData(new LeipzigTestSampleStream(25, SimpleTokenizer.INSTANCE,
164164 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
165 "leipzig/eng_news_2010_300K-sentences.txt"))),
165 "leipzig/eng_news_2010_300K-sentences.txt"))),
166166 new BigInteger("172812413483919324675263268750583851712"));
167167 }
168168
169169 @Test
170 public void evalSentenceModel() throws Exception {
170 void evalSentenceModel() throws Exception {
171171
172172 SentenceModel model = new SentenceModel(
173 new File(getOpennlpDataDir(), "models-sf/en-sent.bin"));
173 new File(getOpennlpDataDir(), "models-sf/en-sent.bin"));
174174
175175 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
176176
179179 StringBuilder text = new StringBuilder();
180180
181181 try (ObjectStream<LeipzigTestSample> lineBatches = new LeipzigTestSampleStream(25,
182 SimpleTokenizer.INSTANCE,
183 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
184 "leipzig/eng_news_2010_300K-sentences.txt")))) {
182 SimpleTokenizer.INSTANCE,
183 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
184 "leipzig/eng_news_2010_300K-sentences.txt")))) {
185185
186186 LeipzigTestSample lineBatch;
187187 while ((lineBatch = lineBatches.read()) != null) {
195195 digest.update(sentence.getBytes(StandardCharsets.UTF_8));
196196 }
197197
198 Assert.assertEquals(new BigInteger("228544068397077998410949364710969159291"),
199 new BigInteger(1, digest.digest()));
200 }
201
202 @Test
203 public void evalTokenModel() throws Exception {
198 Assertions.assertEquals(new BigInteger("228544068397077998410949364710969159291"),
199 new BigInteger(1, digest.digest()));
200 }
201
202 @Test
203 void evalTokenModel() throws Exception {
204204
205205 // the input stream is currently tokenized, we should detokenize it again,
206206 // (or extend to pass in tokenizer, then whitespace tokenizer can be passed)
207207 // and then tokenize it here
208208
209209 TokenizerModel model = new TokenizerModel(
210 new File(getOpennlpDataDir(), "models-sf/en-token.bin"));
210 new File(getOpennlpDataDir(), "models-sf/en-token.bin"));
211211
212212 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
213213
214214 Tokenizer tokenizer = new TokenizerME(model);
215215
216216 try (ObjectStream<LeipzigTestSample> lines = new LeipzigTestSampleStream(1,
217 WhitespaceTokenizer.INSTANCE,
218 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
219 "leipzig/eng_news_2010_300K-sentences.txt")))) {
217 WhitespaceTokenizer.INSTANCE,
218 new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
219 "leipzig/eng_news_2010_300K-sentences.txt")))) {
220220
221221 LeipzigTestSample line;
222222 while ((line = lines.read()) != null) {
227227 }
228228 }
229229
230 Assert.assertEquals(new BigInteger("180602607571756839321060482558626151930"),
231 new BigInteger(1, digest.digest()));
230 Assertions.assertEquals(new BigInteger("180602607571756839321060482558626151930"),
231 new BigInteger(1, digest.digest()));
232232 }
233233
234234 private ObjectStream<LeipzigTestSample> createLineWiseStream() throws IOException {
258258 }
259259 }
260260
261 Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
262 }
263
264 @Test
265 public void evalNerDateModel() throws Exception {
261 Assertions.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
262 }
263
264 @Test
265 void evalNerDateModel() throws Exception {
266266 TokenNameFinderModel personModel = new TokenNameFinderModel(
267267 new File(getOpennlpDataDir(), "models-sf/en-ner-date.bin"));
268268
270270 }
271271
272272 @Test
273 public void evalNerLocationModel() throws Exception {
273 void evalNerLocationModel() throws Exception {
274274 TokenNameFinderModel personModel = new TokenNameFinderModel(
275275 new File(getOpennlpDataDir(), "models-sf/en-ner-location.bin"));
276276
278278 }
279279
280280 @Test
281 public void evalNerMoneyModel() throws Exception {
281 void evalNerMoneyModel() throws Exception {
282282 TokenNameFinderModel personModel = new TokenNameFinderModel(
283283 new File(getOpennlpDataDir(), "models-sf/en-ner-money.bin"));
284284
286286 }
287287
288288 @Test
289 public void evalNerOrganizationModel() throws Exception {
289 void evalNerOrganizationModel() throws Exception {
290290 TokenNameFinderModel personModel = new TokenNameFinderModel(
291291 new File(getOpennlpDataDir(), "models-sf/en-ner-organization.bin"));
292292
294294 }
295295
296296 @Test
297 public void evalNerPercentageModel() throws Exception {
297 void evalNerPercentageModel() throws Exception {
298298 TokenNameFinderModel personModel = new TokenNameFinderModel(
299299 new File(getOpennlpDataDir(), "models-sf/en-ner-percentage.bin"));
300300
302302 }
303303
304304 @Test
305 public void evalNerPersonModel() throws Exception {
305 void evalNerPersonModel() throws Exception {
306306 TokenNameFinderModel personModel = new TokenNameFinderModel(
307307 new File(getOpennlpDataDir(), "models-sf/en-ner-person.bin"));
308308
310310 }
311311
312312 @Test
313 public void evalNerTimeModel() throws Exception {
313 void evalNerTimeModel() throws Exception {
314314 TokenNameFinderModel personModel = new TokenNameFinderModel(
315315 new File(getOpennlpDataDir(), "models-sf/en-ner-time.bin"));
316316
318318 }
319319
320320 @Test
321 public void evalChunkerModel() throws Exception {
321 void evalChunkerModel() throws Exception {
322322
323323 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
324324
341341 }
342342 }
343343
344 Assert.assertEquals(new BigInteger("226003515785585284478071030961407561943"),
344 Assertions.assertEquals(new BigInteger("226003515785585284478071030961407561943"),
345345 new BigInteger(1, digest.digest()));
346346 }
347347
365365 }
366366 }
367367
368 Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
369 }
370
371 @Test
372 public void evalMaxentModel() throws Exception {
368 Assertions.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
369 }
370
371 @Test
372 void evalMaxentModel() throws Exception {
373373 POSModel maxentModel = new POSModel(
374374 new File(getOpennlpDataDir(), "models-sf/en-pos-maxent.bin"));
375375
377377 }
378378
379379 @Test
380 public void evalPerceptronModel() throws Exception {
380 void evalPerceptronModel() throws Exception {
381381 POSModel perceptronModel = new POSModel(
382382 new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin"));
383383
385385 }
386386
387387 @Test
388 public void evalParserModel() throws Exception {
388 void evalParserModel() throws Exception {
389389
390390 ParserModel model = new ParserModel(
391391 new File(getOpennlpDataDir(), "models-sf/en-parser-chunking.bin"));
409409 }
410410 }
411411
412 Assert.assertEquals(new BigInteger("68039262350771988792233880373220954061"),
412 Assertions.assertEquals(new BigInteger("68039262350771988792233880373220954061"),
413413 new BigInteger(1, digest.digest()));
414414 }
415415 }
2020 import java.io.IOException;
2121 import java.math.BigInteger;
2222
23 import org.junit.Assert;
24 import org.junit.BeforeClass;
25 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.BeforeAll;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.formats.conllu.ConlluLemmaSampleStream;
2828 import opennlp.tools.formats.conllu.ConlluStream;
3939
4040 public class UniversalDependency20Eval extends AbstractEvalTest {
4141
42 private static File SPA_ANCORA_TRAIN;
42 private static File SPA_ANCORA_TRAIN;
4343 private static File SPA_ANCORA_DEV;
4444
45 @BeforeClass
46 public static void verifyTrainingData() throws Exception {
45 @BeforeAll
46 static void verifyTrainingData() throws Exception {
4747
48 SPA_ANCORA_TRAIN = new File(getOpennlpDataDir(),"ud20/UD_Spanish-AnCora/es_ancora-ud-train.conllu");
49 SPA_ANCORA_DEV = new File(getOpennlpDataDir(),"ud20/UD_Spanish-AnCora/es_ancora-ud-dev.conllu");
48 SPA_ANCORA_TRAIN = new File(getOpennlpDataDir(), "ud20/UD_Spanish-AnCora/es_ancora-ud-train.conllu");
49 SPA_ANCORA_DEV = new File(getOpennlpDataDir(), "ud20/UD_Spanish-AnCora/es_ancora-ud-dev.conllu");
5050
5151 verifyFileChecksum(SPA_ANCORA_TRAIN.toPath(),
5252 new BigInteger("224942804200733453179524127037951530195"));
5555 }
5656
5757 private double trainAndEval(String lang, File trainFile, TrainingParameters params,
58 File evalFile) throws IOException {
58 File evalFile) throws IOException {
5959 ConlluTagset tagset = ConlluTagset.X;
6060
6161 ObjectStream<LemmaSample> trainSamples = new ConlluLemmaSampleStream(new ConlluStream(
7171 }
7272
7373 @Test
74 public void trainAndEvalSpanishAncora() throws IOException {
74 void trainAndEvalSpanishAncora() throws IOException {
7575 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
7676 params.put("Threads", "4");
7777
7878 double wordAccuracy = trainAndEval("spa", SPA_ANCORA_TRAIN,
7979 params, SPA_ANCORA_DEV);
8080
81 Assert.assertEquals(0.9057341692068787d, wordAccuracy, ACCURACY_DELTA);
81 Assertions.assertEquals(0.9057341692068787d, wordAccuracy, ACCURACY_DELTA);
8282 }
8383 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.formats.Conll02NameSampleStream.LANGUAGE;
2525 import opennlp.tools.namefind.NameSample;
4141 }
4242
4343 @Test
44 public void testParsingSpanishSample() throws IOException {
44 void testParsingSpanishSample() throws IOException {
4545
4646 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.SPA, "conll2002-es.sample");
4747
4848 NameSample personName = sampleStream.read();
4949
50 Assert.assertNotNull(personName);
50 Assertions.assertNotNull(personName);
5151
52 Assert.assertEquals(5, personName.getSentence().length);
53 Assert.assertEquals(1, personName.getNames().length);
54 Assert.assertEquals(true, personName.isClearAdaptiveDataSet());
52 Assertions.assertEquals(5, personName.getSentence().length);
53 Assertions.assertEquals(1, personName.getNames().length);
54 Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
5555
5656 Span nameSpan = personName.getNames()[0];
57 Assert.assertEquals(0, nameSpan.getStart());
58 Assert.assertEquals(4, nameSpan.getEnd());
59 Assert.assertEquals(true, personName.isClearAdaptiveDataSet());
57 Assertions.assertEquals(0, nameSpan.getStart());
58 Assertions.assertEquals(4, nameSpan.getEnd());
59 Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
6060
61 Assert.assertEquals(0, sampleStream.read().getNames().length);
61 Assertions.assertEquals(0, sampleStream.read().getNames().length);
6262
63 Assert.assertNull(sampleStream.read());
63 Assertions.assertNull(sampleStream.read());
6464 }
6565
6666 @Test
67 public void testParsingDutchSample() throws IOException {
67 void testParsingDutchSample() throws IOException {
6868 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample");
6969
7070 NameSample personName = sampleStream.read();
7171
72 Assert.assertEquals(0, personName.getNames().length);
73 Assert.assertTrue(personName.isClearAdaptiveDataSet());
72 Assertions.assertEquals(0, personName.getNames().length);
73 Assertions.assertTrue(personName.isClearAdaptiveDataSet());
7474
7575 personName = sampleStream.read();
7676
77 Assert.assertFalse(personName.isClearAdaptiveDataSet());
77 Assertions.assertFalse(personName.isClearAdaptiveDataSet());
7878
79 Assert.assertNull(sampleStream.read());
79 Assertions.assertNull(sampleStream.read());
8080 }
8181
8282 @Test
83 public void testReset() throws IOException {
83 void testReset() throws IOException {
8484 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample");
8585
8686 NameSample sample = sampleStream.read();
8787
8888 sampleStream.reset();
8989
90 Assert.assertEquals(sample, sampleStream.read());
90 Assertions.assertEquals(sample, sampleStream.read());
9191 }
9292 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.formats.Conll03NameSampleStream.LANGUAGE;
2525 import opennlp.tools.namefind.NameSample;
4444 }
4545
4646 @Test
47 public void testParsingEnglishSample() throws IOException {
47 void testParsingEnglishSample() throws IOException {
4848
4949 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.EN, ENGLISH_SAMPLE);
5050
5151 NameSample personName = sampleStream.read();
52 Assert.assertNotNull(personName);
52 Assertions.assertNotNull(personName);
5353
54 Assert.assertEquals(9, personName.getSentence().length);
55 Assert.assertEquals(0, personName.getNames().length);
56 Assert.assertEquals(true, personName.isClearAdaptiveDataSet());
54 Assertions.assertEquals(9, personName.getSentence().length);
55 Assertions.assertEquals(0, personName.getNames().length);
56 Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
5757
5858 personName = sampleStream.read();
5959
60 Assert.assertNotNull(personName);
60 Assertions.assertNotNull(personName);
6161
62 Assert.assertEquals(2, personName.getSentence().length);
63 Assert.assertEquals(1, personName.getNames().length);
64 Assert.assertEquals(false, personName.isClearAdaptiveDataSet());
62 Assertions.assertEquals(2, personName.getSentence().length);
63 Assertions.assertEquals(1, personName.getNames().length);
64 Assertions.assertEquals(false, personName.isClearAdaptiveDataSet());
6565
6666 Span nameSpan = personName.getNames()[0];
67 Assert.assertEquals(0, nameSpan.getStart());
68 Assert.assertEquals(2, nameSpan.getEnd());
67 Assertions.assertEquals(0, nameSpan.getStart());
68 Assertions.assertEquals(2, nameSpan.getEnd());
6969
70 Assert.assertNull(sampleStream.read());
71 }
72
73 @Test(expected = IOException.class)
74 public void testParsingEnglishSampleWithGermanAsLanguage() throws IOException {
75 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, ENGLISH_SAMPLE);
76 sampleStream.read();
77 }
78
79 @Test(expected = IOException.class)
80 public void testParsingGermanSampleWithEnglishAsLanguage() throws IOException {
81 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.EN, GERMAN_SAMPLE);
82 sampleStream.read();
70 Assertions.assertNull(sampleStream.read());
8371 }
8472
8573 @Test
86 public void testParsingGermanSample() throws IOException {
74 void testParsingEnglishSampleWithGermanAsLanguage() {
75 Assertions.assertThrows(IOException.class, () -> {
76 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, ENGLISH_SAMPLE);
77 sampleStream.read();
78 });
79 }
80
81 @Test
82 void testParsingGermanSampleWithEnglishAsLanguage() {
83 Assertions.assertThrows(IOException.class, () -> {
84 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.EN, GERMAN_SAMPLE);
85 sampleStream.read();
86 });
87 }
88
89 @Test
90 void testParsingGermanSample() throws IOException {
8791
8892 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE);
8993
9094 NameSample personName = sampleStream.read();
91 Assert.assertNotNull(personName);
95 Assertions.assertNotNull(personName);
9296
93 Assert.assertEquals(5, personName.getSentence().length);
94 Assert.assertEquals(0, personName.getNames().length);
95 Assert.assertEquals(true, personName.isClearAdaptiveDataSet());
97 Assertions.assertEquals(5, personName.getSentence().length);
98 Assertions.assertEquals(0, personName.getNames().length);
99 Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
96100 }
97101
98102 @Test
99 public void testReset() throws IOException {
103 void testReset() throws IOException {
100104 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE);
101105
102106 NameSample sample = sampleStream.read();
103107
104108 sampleStream.reset();
105109
106 Assert.assertEquals(sample, sampleStream.read());
110 Assertions.assertEquals(sample, sampleStream.read());
107111 }
108112 }
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2324
2425 import opennlp.tools.postag.POSSample;
2526 import opennlp.tools.util.InputStreamFactory;
2627 import opennlp.tools.util.ObjectStream;
2728
28 import static org.junit.Assert.assertEquals;
29 import static org.junit.Assert.assertNull;
30
3129 public class ConllXPOSSampleStreamTest {
3230
3331 @Test
34 public void testParsingSample() throws IOException {
32 void testParsingSample() throws IOException {
3533
3634 InputStreamFactory in = new ResourceAsStreamFactory(ConllXPOSSampleStreamTest.class,
3735 "/opennlp/tools/formats/conllx.sample");
4240 String[] aSentence = a.getSentence();
4341 String[] aTags = a.getTags();
4442
45 assertEquals(22, aSentence.length);
46 assertEquals(22, aTags.length);
43 Assertions.assertEquals(22, aSentence.length);
44 Assertions.assertEquals(22, aTags.length);
4745
48 assertEquals("To", aSentence[0]);
49 assertEquals("AC", aTags[0]);
46 Assertions.assertEquals("To", aSentence[0]);
47 Assertions.assertEquals("AC", aTags[0]);
5048
51 assertEquals("kendte", aSentence[1]);
52 assertEquals("AN", aTags[1]);
49 Assertions.assertEquals("kendte", aSentence[1]);
50 Assertions.assertEquals("AN", aTags[1]);
5351
54 assertEquals("russiske", aSentence[2]);
55 assertEquals("AN", aTags[2]);
52 Assertions.assertEquals("russiske", aSentence[2]);
53 Assertions.assertEquals("AN", aTags[2]);
5654
57 assertEquals("historikere", aSentence[3]);
58 assertEquals("NC", aTags[3]);
55 Assertions.assertEquals("historikere", aSentence[3]);
56 Assertions.assertEquals("NC", aTags[3]);
5957
60 assertEquals("Andronik", aSentence[4]);
61 assertEquals("NP", aTags[4]);
58 Assertions.assertEquals("Andronik", aSentence[4]);
59 Assertions.assertEquals("NP", aTags[4]);
6260
63 assertEquals("Andronik", aSentence[5]);
64 assertEquals("NP", aTags[5]);
61 Assertions.assertEquals("Andronik", aSentence[5]);
62 Assertions.assertEquals("NP", aTags[5]);
6563
66 assertEquals("og", aSentence[6]);
67 assertEquals("CC", aTags[6]);
64 Assertions.assertEquals("og", aSentence[6]);
65 Assertions.assertEquals("CC", aTags[6]);
6866
69 assertEquals("Igor", aSentence[7]);
70 assertEquals("NP", aTags[7]);
67 Assertions.assertEquals("Igor", aSentence[7]);
68 Assertions.assertEquals("NP", aTags[7]);
7169
72 assertEquals("Klamkin", aSentence[8]);
73 assertEquals("NP", aTags[8]);
70 Assertions.assertEquals("Klamkin", aSentence[8]);
71 Assertions.assertEquals("NP", aTags[8]);
7472
75 assertEquals("tror", aSentence[9]);
76 assertEquals("VA", aTags[9]);
73 Assertions.assertEquals("tror", aSentence[9]);
74 Assertions.assertEquals("VA", aTags[9]);
7775
78 assertEquals("ikke", aSentence[10]);
79 assertEquals("RG", aTags[10]);
76 Assertions.assertEquals("ikke", aSentence[10]);
77 Assertions.assertEquals("RG", aTags[10]);
8078
81 assertEquals(",", aSentence[11]);
82 assertEquals("XP", aTags[11]);
79 Assertions.assertEquals(",", aSentence[11]);
80 Assertions.assertEquals("XP", aTags[11]);
8381
84 assertEquals("at", aSentence[12]);
85 assertEquals("CS", aTags[12]);
82 Assertions.assertEquals("at", aSentence[12]);
83 Assertions.assertEquals("CS", aTags[12]);
8684
87 assertEquals("Rusland", aSentence[13]);
88 assertEquals("NP", aTags[13]);
85 Assertions.assertEquals("Rusland", aSentence[13]);
86 Assertions.assertEquals("NP", aTags[13]);
8987
90 assertEquals("kan", aSentence[14]);
91 assertEquals("VA", aTags[14]);
88 Assertions.assertEquals("kan", aSentence[14]);
89 Assertions.assertEquals("VA", aTags[14]);
9290
93 assertEquals("udvikles", aSentence[15]);
94 assertEquals("VA", aTags[15]);
91 Assertions.assertEquals("udvikles", aSentence[15]);
92 Assertions.assertEquals("VA", aTags[15]);
9593
96 assertEquals("uden", aSentence[16]);
97 assertEquals("SP", aTags[16]);
94 Assertions.assertEquals("uden", aSentence[16]);
95 Assertions.assertEquals("SP", aTags[16]);
9896
99 assertEquals("en", aSentence[17]);
100 assertEquals("PI", aTags[17]);
97 Assertions.assertEquals("en", aSentence[17]);
98 Assertions.assertEquals("PI", aTags[17]);
10199
102 assertEquals("\"", aSentence[18]);
103 assertEquals("XP", aTags[18]);
100 Assertions.assertEquals("\"", aSentence[18]);
101 Assertions.assertEquals("XP", aTags[18]);
104102
105 assertEquals("jernnæve", aSentence[19]);
106 assertEquals("NC", aTags[19]);
103 Assertions.assertEquals("jernnæve", aSentence[19]);
104 Assertions.assertEquals("NC", aTags[19]);
107105
108 assertEquals("\"", aSentence[20]);
109 assertEquals("XP", aTags[20]);
106 Assertions.assertEquals("\"", aSentence[20]);
107 Assertions.assertEquals("XP", aTags[20]);
110108
111 assertEquals(".", aSentence[21]);
112 assertEquals("XP", aTags[21]);
109 Assertions.assertEquals(".", aSentence[21]);
110 Assertions.assertEquals("XP", aTags[21]);
113111
114112 POSSample b = sampleStream.read();
115113
116114 String[] bSentence = b.getSentence();
117115 String[] bTags = b.getTags();
118116
119 assertEquals(12, bSentence.length);
120 assertEquals(12, bTags.length);
117 Assertions.assertEquals(12, bSentence.length);
118 Assertions.assertEquals(12, bTags.length);
121119
122 assertEquals("De", bSentence[0]);
123 assertEquals("PP", bTags[0]);
120 Assertions.assertEquals("De", bSentence[0]);
121 Assertions.assertEquals("PP", bTags[0]);
124122
125 assertEquals("hævder", bSentence[1]);
126 assertEquals("VA", bTags[1]);
123 Assertions.assertEquals("hævder", bSentence[1]);
124 Assertions.assertEquals("VA", bTags[1]);
127125
128 assertEquals(",", bSentence[2]);
129 assertEquals("XP", bTags[2]);
126 Assertions.assertEquals(",", bSentence[2]);
127 Assertions.assertEquals("XP", bTags[2]);
130128
131 assertEquals("at", bSentence[3]);
132 assertEquals("CS", bTags[3]);
129 Assertions.assertEquals("at", bSentence[3]);
130 Assertions.assertEquals("CS", bTags[3]);
133131
134 assertEquals("Ruslands", bSentence[4]);
135 assertEquals("NP", bTags[4]);
132 Assertions.assertEquals("Ruslands", bSentence[4]);
133 Assertions.assertEquals("NP", bTags[4]);
136134
137 assertEquals("vej", bSentence[5]);
138 assertEquals("NC", bTags[5]);
135 Assertions.assertEquals("vej", bSentence[5]);
136 Assertions.assertEquals("NC", bTags[5]);
139137
140 assertEquals("til", bSentence[6]);
141 assertEquals("SP", bTags[6]);
138 Assertions.assertEquals("til", bSentence[6]);
139 Assertions.assertEquals("SP", bTags[6]);
142140
143 assertEquals("demokrati", bSentence[7]);
144 assertEquals("NC", bTags[7]);
141 Assertions.assertEquals("demokrati", bSentence[7]);
142 Assertions.assertEquals("NC", bTags[7]);
145143
146 assertEquals("går", bSentence[8]);
147 assertEquals("VA", bTags[8]);
144 Assertions.assertEquals("går", bSentence[8]);
145 Assertions.assertEquals("VA", bTags[8]);
148146
149 assertEquals("gennem", bSentence[9]);
150 assertEquals("SP", bTags[9]);
147 Assertions.assertEquals("gennem", bSentence[9]);
148 Assertions.assertEquals("SP", bTags[9]);
151149
152 assertEquals("diktatur", bSentence[10]);
153 assertEquals("NC", bTags[10]);
150 Assertions.assertEquals("diktatur", bSentence[10]);
151 Assertions.assertEquals("NC", bTags[10]);
154152
155 assertEquals(".", bSentence[11]);
156 assertEquals("XP", bTags[11]);
153 Assertions.assertEquals(".", bSentence[11]);
154 Assertions.assertEquals("XP", bTags[11]);
157155
158 assertNull(sampleStream.read());
156 Assertions.assertNull(sampleStream.read());
159157 }
160158 }
161159 }
1919 import java.io.File;
2020 import java.io.FileFilter;
2121 import java.io.IOException;
22 import java.nio.file.Files;
23 import java.nio.file.Path;
2224 import java.util.ArrayList;
2325 import java.util.List;
24
25 import org.junit.Assert;
26 import org.junit.Rule;
27 import org.junit.Test;
28 import org.junit.rules.TemporaryFolder;
26 import java.util.UUID;
27
28 import org.junit.jupiter.api.Assertions;
29 import org.junit.jupiter.api.Test;
30 import org.junit.jupiter.api.io.TempDir;
2931
3032 public class DirectorySampleStreamTest {
31
32 @Rule
33 public TemporaryFolder tempDirectory = new TemporaryFolder();
33
34 @TempDir
35 Path tempDirectory;
3436
3537 @Test
3638 public void directoryTest() throws IOException {
3941
4042 List<File> files = new ArrayList<>();
4143
42 File temp1 = tempDirectory.newFile();
43 files.add(temp1);
44
45 File temp2 = tempDirectory.newFile();
46 files.add(temp2);
47
48 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.getRoot(), filter, false);
49
50 File file = stream.read();
51 Assert.assertTrue(files.contains(file));
52
53 file = stream.read();
54 Assert.assertTrue(files.contains(file));
55
56 file = stream.read();
57 Assert.assertNull(file);
44 File temp1 = createTempFile();
45 files.add(temp1);
46
47 File temp2 = createTempFile();
48 files.add(temp2);
49
50 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.toFile(), filter, false);
51
52 File file = stream.read();
53 Assertions.assertTrue(files.contains(file));
54
55 file = stream.read();
56 Assertions.assertTrue(files.contains(file));
57
58 file = stream.read();
59 Assertions.assertNull(file);
5860
5961 stream.close();
6062
6567
6668 List<File> files = new ArrayList<>();
6769
68 File temp1 = tempDirectory.newFile();
69 files.add(temp1);
70
71 File temp2 = tempDirectory.newFile();
72 files.add(temp2);
73
74 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.getRoot(), null, false);
75
76 File file = stream.read();
77 Assert.assertTrue(files.contains(file));
78
79 file = stream.read();
80 Assert.assertTrue(files.contains(file));
81
82 file = stream.read();
83 Assert.assertNull(file);
70 File temp1 = createTempFile();
71 files.add(temp1);
72
73 File temp2 = createTempFile();
74 files.add(temp2);
75
76 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.toFile(), null, false);
77
78 File file = stream.read();
79 Assertions.assertTrue(files.contains(file));
80
81 file = stream.read();
82 Assertions.assertTrue(files.contains(file));
83
84 file = stream.read();
85 Assertions.assertNull(file);
8486
8587 stream.close();
8688
9395
9496 List<File> files = new ArrayList<>();
9597
96 File temp1 = tempDirectory.newFile();
97 files.add(temp1);
98
99 File tempSubDirectory = tempDirectory.newFolder("sub1");
98 File temp1 = createTempFile();
99 files.add(temp1);
100
101 File tempSubDirectory = createTempFolder("sub1");
100102 File temp2 = File.createTempFile("sub1", ".tmp", tempSubDirectory);
101103 files.add(temp2);
102104
103 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.getRoot(), filter, true);
104
105 File file = stream.read();
106 Assert.assertTrue(files.contains(file));
107
108 file = stream.read();
109 Assert.assertTrue(files.contains(file));
110
111 file = stream.read();
112 Assert.assertNull(file);
105 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.toFile(), filter, true);
106
107 File file = stream.read();
108 Assertions.assertTrue(files.contains(file));
109
110 file = stream.read();
111 Assertions.assertTrue(files.contains(file));
112
113 file = stream.read();
114 Assertions.assertNull(file);
113115
114116 stream.close();
115117
122124
123125 List<File> files = new ArrayList<>();
124126
125 File temp1 = tempDirectory.newFile();
126 files.add(temp1);
127
128 File temp2 = tempDirectory.newFile();
129 files.add(temp2);
130
131 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.getRoot(), filter, false);
132
133 File file = stream.read();
134 Assert.assertTrue(files.contains(file));
127 File temp1 = createTempFile();
128 files.add(temp1);
129
130 File temp2 = createTempFile();
131 files.add(temp2);
132
133 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.toFile(), filter, false);
134
135 File file = stream.read();
136 Assertions.assertTrue(files.contains(file));
135137
136138 stream.reset();
137139
138140 file = stream.read();
139 Assert.assertTrue(files.contains(file));
140
141 file = stream.read();
142 Assert.assertTrue(files.contains(file));
143
144 file = stream.read();
145 Assert.assertNull(file);
141 Assertions.assertTrue(files.contains(file));
142
143 file = stream.read();
144 Assertions.assertTrue(files.contains(file));
145
146 file = stream.read();
147 Assertions.assertNull(file);
146148
147149 stream.close();
148150
153155
154156 FileFilter filter = new TempFileNameFilter();
155157
156 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.getRoot(), filter, false);
157
158 Assert.assertNull(stream.read());
159
160 stream.close();
161
162 }
163
164 @Test(expected = IllegalArgumentException.class)
165 public void invalidDirectoryTest() throws IOException {
166
167 FileFilter filter = new TempFileNameFilter();
168
169 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.newFile(), filter, false);
170
171 Assert.assertNull(stream.read());
172
173 stream.close();
174
158 DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.toFile(), filter, false);
159
160 Assertions.assertNull(stream.read());
161
162 stream.close();
163
164 }
165
166 @Test
167 public void invalidDirectoryTest() {
168 Assertions.assertThrows(IllegalArgumentException.class, () -> {
169 FileFilter filter = new TempFileNameFilter();
170
171 DirectorySampleStream stream = new DirectorySampleStream(createTempFile(), filter, false);
172
173 Assertions.assertNull(stream.read());
174
175 stream.close();
176 });
177 }
178
179 private File createTempFolder(String name) {
180
181 Path subDir = tempDirectory.resolve(name);
182
183 try {
184 Files.createDirectory(subDir);
185 } catch (IOException e) {
186 throw new IllegalStateException(
187 "Could not create sub directory " + subDir.toFile().getAbsolutePath(), e);
188 }
189 return subDir.toFile();
190
191 }
192
193 private File createTempFile() {
194
195 Path tempFile = tempDirectory.resolve(UUID.randomUUID() + ".tmp");
196
197 try {
198 Files.createFile(tempFile);
199 } catch (IOException e) {
200 throw new IllegalStateException(
201 "Could not create file " + tempFile.toFile().getAbsolutePath(), e);
202 }
203 return tempFile.toFile();
204
175205 }
176206
177207 class TempFileNameFilter implements FileFilter {
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.formats.EvalitaNameSampleStream.LANGUAGE;
2525 import opennlp.tools.namefind.NameSample;
4141 }
4242
4343 @Test
44 public void testParsingItalianSample() throws IOException {
44 void testParsingItalianSample() throws IOException {
4545
4646 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.IT, "evalita-ner-it.sample");
4747
4848 NameSample personName = sampleStream.read();
4949
50 Assert.assertNotNull(personName);
50 Assertions.assertNotNull(personName);
5151
52 Assert.assertEquals(11, personName.getSentence().length);
53 Assert.assertEquals(1, personName.getNames().length);
54 Assert.assertEquals(true, personName.isClearAdaptiveDataSet());
52 Assertions.assertEquals(11, personName.getSentence().length);
53 Assertions.assertEquals(1, personName.getNames().length);
54 Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
5555
5656 Span nameSpan = personName.getNames()[0];
57 Assert.assertEquals(8, nameSpan.getStart());
58 Assert.assertEquals(10, nameSpan.getEnd());
59 Assert.assertEquals(true, personName.isClearAdaptiveDataSet());
57 Assertions.assertEquals(8, nameSpan.getStart());
58 Assertions.assertEquals(10, nameSpan.getEnd());
59 Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
6060
61 Assert.assertEquals(0, sampleStream.read().getNames().length);
61 Assertions.assertEquals(0, sampleStream.read().getNames().length);
6262
63 Assert.assertNull(sampleStream.read());
63 Assertions.assertNull(sampleStream.read());
6464 }
6565
6666 @Test
67 public void testReset() throws IOException {
67 void testReset() throws IOException {
6868 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.IT, "evalita-ner-it.sample");
6969 NameSample sample = sampleStream.read();
7070 sampleStream.reset();
71 Assert.assertEquals(sample, sampleStream.read());
71 Assertions.assertEquals(sample, sampleStream.read());
7272 }
7373 }
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2324
2425 import opennlp.tools.util.InputStreamFactory;
2526 import opennlp.tools.util.ObjectStream;
2627 import opennlp.tools.util.StringList;
27
28 import static org.junit.Assert.assertEquals;
29 import static org.junit.Assert.assertNotNull;
30 import static org.junit.Assert.assertNull;
3128
3229 public class NameFinderCensus90NameStreamTest {
3330
4138 }
4239
4340 @Test
44 public void testParsingEnglishSample() throws IOException {
41 void testParsingEnglishSample() throws IOException {
4542
4643 ObjectStream<StringList> sampleStream = openData("census90.sample");
4744
4845 StringList personName = sampleStream.read();
4946
5047 // verify the first 5 taken from the Surname data
51 assertNotNull(personName);
52 assertEquals("Smith", personName.getToken(0));
48 Assertions.assertNotNull(personName);
49 Assertions.assertEquals("Smith", personName.getToken(0));
5350 personName = sampleStream.read();
54 assertNotNull(personName);
55 assertEquals("Johnson", personName.getToken(0));
51 Assertions.assertNotNull(personName);
52 Assertions.assertEquals("Johnson", personName.getToken(0));
5653 personName = sampleStream.read();
57 assertNotNull(personName);
58 assertEquals("Williams", personName.getToken(0));
54 Assertions.assertNotNull(personName);
55 Assertions.assertEquals("Williams", personName.getToken(0));
5956 personName = sampleStream.read();
60 assertNotNull(personName);
61 assertEquals("Jones", personName.getToken(0));
57 Assertions.assertNotNull(personName);
58 Assertions.assertEquals("Jones", personName.getToken(0));
6259 personName = sampleStream.read();
63 assertNotNull(personName);
64 assertEquals("Brown", personName.getToken(0));
60 Assertions.assertNotNull(personName);
61 Assertions.assertEquals("Brown", personName.getToken(0));
6562
6663 // verify the next 5 taken from the female names
6764 personName = sampleStream.read();
68 assertNotNull(personName);
69 assertEquals("Mary", personName.getToken(0));
65 Assertions.assertNotNull(personName);
66 Assertions.assertEquals("Mary", personName.getToken(0));
7067 personName = sampleStream.read();
71 assertNotNull(personName);
72 assertEquals("Patricia", personName.getToken(0));
68 Assertions.assertNotNull(personName);
69 Assertions.assertEquals("Patricia", personName.getToken(0));
7370 personName = sampleStream.read();
74 assertNotNull(personName);
75 assertEquals("Linda", personName.getToken(0));
71 Assertions.assertNotNull(personName);
72 Assertions.assertEquals("Linda", personName.getToken(0));
7673 personName = sampleStream.read();
77 assertNotNull(personName);
78 assertEquals("Barbara", personName.getToken(0));
74 Assertions.assertNotNull(personName);
75 Assertions.assertEquals("Barbara", personName.getToken(0));
7976 personName = sampleStream.read();
80 assertNotNull(personName);
81 assertEquals("Elizabeth", personName.getToken(0));
77 Assertions.assertNotNull(personName);
78 Assertions.assertEquals("Elizabeth", personName.getToken(0));
8279
8380 // verify the last 5 taken from the male names
8481 personName = sampleStream.read();
85 assertNotNull(personName);
86 assertEquals("James", personName.getToken(0));
82 Assertions.assertNotNull(personName);
83 Assertions.assertEquals("James", personName.getToken(0));
8784 personName = sampleStream.read();
88 assertNotNull(personName);
89 assertEquals("John", personName.getToken(0));
85 Assertions.assertNotNull(personName);
86 Assertions.assertEquals("John", personName.getToken(0));
9087 personName = sampleStream.read();
91 assertNotNull(personName);
92 assertEquals("Robert", personName.getToken(0));
88 Assertions.assertNotNull(personName);
89 Assertions.assertEquals("Robert", personName.getToken(0));
9390 personName = sampleStream.read();
94 assertNotNull(personName);
95 assertEquals("Michael", personName.getToken(0));
91 Assertions.assertNotNull(personName);
92 Assertions.assertEquals("Michael", personName.getToken(0));
9693 personName = sampleStream.read();
97 assertNotNull(personName);
98 assertEquals("William", personName.getToken(0));
94 Assertions.assertNotNull(personName);
95 Assertions.assertEquals("William", personName.getToken(0));
9996
10097 // verify the end of the file.
10198 personName = sampleStream.read();
102 assertNull(personName);
99 Assertions.assertNull(personName);
103100 }
104101
105102 }
1616
1717 package opennlp.tools.formats;
1818
19 import java.io.IOException;
2019 import java.io.InputStream;
2120 import java.util.Objects;
2221
3332 }
3433
3534 @Override
36 public InputStream createInputStream() throws IOException {
35 public InputStream createInputStream() {
3736 return clazz.getResourceAsStream(name);
3837 }
3938 }
2121 import java.util.ArrayList;
2222 import java.util.List;
2323
24 import org.junit.Assert;
25 import org.junit.Before;
26 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeEach;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.chunker.ChunkSample;
2929 import opennlp.tools.formats.ResourceAsStreamFactory;
3535 private List<ChunkSample> samples = new ArrayList<>();
3636
3737 @Test
38 public void testSimpleCount() {
39 Assert.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
38 void testSimpleCount() {
39 Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
4040 }
4141
4242 @Test
43 public void testChunks() {
43 void testChunks() {
4444
45 Assert.assertEquals("Inicia", samples.get(0).getSentence()[0]);
46 Assert.assertEquals("v-fin", samples.get(0).getTags()[0]);
47 Assert.assertEquals("B-VP", samples.get(0).getPreds()[0]);
45 Assertions.assertEquals("Inicia", samples.get(0).getSentence()[0]);
46 Assertions.assertEquals("v-fin", samples.get(0).getTags()[0]);
47 Assertions.assertEquals("B-VP", samples.get(0).getPreds()[0]);
4848
49 Assert.assertEquals("em", samples.get(0).getSentence()[1]);
50 Assert.assertEquals("prp", samples.get(0).getTags()[1]);
51 Assert.assertEquals("B-PP", samples.get(0).getPreds()[1]);
49 Assertions.assertEquals("em", samples.get(0).getSentence()[1]);
50 Assertions.assertEquals("prp", samples.get(0).getTags()[1]);
51 Assertions.assertEquals("B-PP", samples.get(0).getPreds()[1]);
5252
53 Assert.assertEquals("o", samples.get(0).getSentence()[2]);
54 Assert.assertEquals("art", samples.get(0).getTags()[2]);
55 Assert.assertEquals("B-NP", samples.get(0).getPreds()[2]);
53 Assertions.assertEquals("o", samples.get(0).getSentence()[2]);
54 Assertions.assertEquals("art", samples.get(0).getTags()[2]);
55 Assertions.assertEquals("B-NP", samples.get(0).getPreds()[2]);
5656
57 Assert.assertEquals("próximo", samples.get(0).getSentence()[3]);
58 Assert.assertEquals("adj", samples.get(0).getTags()[3]);
59 Assert.assertEquals("I-NP", samples.get(0).getPreds()[3]);
57 Assertions.assertEquals("próximo", samples.get(0).getSentence()[3]);
58 Assertions.assertEquals("adj", samples.get(0).getTags()[3]);
59 Assertions.assertEquals("I-NP", samples.get(0).getPreds()[3]);
6060
61 Assert.assertEquals("Casas", samples.get(3).getSentence()[0]);
62 Assert.assertEquals("n", samples.get(3).getTags()[0]);
63 Assert.assertEquals("B-NP", samples.get(3).getPreds()[0]);
61 Assertions.assertEquals("Casas", samples.get(3).getSentence()[0]);
62 Assertions.assertEquals("n", samples.get(3).getTags()[0]);
63 Assertions.assertEquals("B-NP", samples.get(3).getPreds()[0]);
6464 }
6565
66 @Before
67 public void setup() throws IOException {
66 @BeforeEach
67 void setup() throws IOException {
6868 InputStreamFactory in = new ResourceAsStreamFactory(
6969 ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample");
7070
7171 try (ADChunkSampleStream stream = new ADChunkSampleStream(new PlainTextByLineStream(in,
72 StandardCharsets.UTF_8))) {
72 StandardCharsets.UTF_8))) {
7373 ChunkSample sample;
7474 while ((sample = stream.read()) != null) {
7575 samples.add(sample);
2121 import java.util.ArrayList;
2222 import java.util.List;
2323
24 import org.junit.Assert;
25 import org.junit.Before;
26 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeEach;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.formats.ResourceAsStreamFactory;
2929 import opennlp.tools.namefind.NameSample;
3636 private List<NameSample> samples = new ArrayList<>();
3737
3838 @Test
39 public void testSimpleCount() throws IOException {
40 Assert.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
39 void testSimpleCount() {
40 Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
4141 }
4242
4343 @Test
44 public void testCheckMergedContractions() throws IOException {
44 void testCheckMergedContractions() {
4545
46 Assert.assertEquals("no", samples.get(0).getSentence()[1]);
47 Assert.assertEquals("no", samples.get(0).getSentence()[11]);
48 Assert.assertEquals("Com", samples.get(1).getSentence()[0]);
49 Assert.assertEquals("relação", samples.get(1).getSentence()[1]);
50 Assert.assertEquals("à", samples.get(1).getSentence()[2]);
51 Assert.assertEquals("mais", samples.get(2).getSentence()[4]);
52 Assert.assertEquals("de", samples.get(2).getSentence()[5]);
53 Assert.assertEquals("da", samples.get(2).getSentence()[8]);
54 Assert.assertEquals("num", samples.get(3).getSentence()[26]);
46 Assertions.assertEquals("no", samples.get(0).getSentence()[1]);
47 Assertions.assertEquals("no", samples.get(0).getSentence()[11]);
48 Assertions.assertEquals("Com", samples.get(1).getSentence()[0]);
49 Assertions.assertEquals("relação", samples.get(1).getSentence()[1]);
50 Assertions.assertEquals("à", samples.get(1).getSentence()[2]);
51 Assertions.assertEquals("mais", samples.get(2).getSentence()[4]);
52 Assertions.assertEquals("de", samples.get(2).getSentence()[5]);
53 Assertions.assertEquals("da", samples.get(2).getSentence()[8]);
54 Assertions.assertEquals("num", samples.get(3).getSentence()[26]);
5555
5656 }
5757
5858 @Test
59 public void testSize() throws IOException {
60 Assert.assertEquals(25, samples.get(0).getSentence().length);
61 Assert.assertEquals(12, samples.get(1).getSentence().length);
62 Assert.assertEquals(59, samples.get(2).getSentence().length);
63 Assert.assertEquals(33, samples.get(3).getSentence().length);
59 void testSize() {
60 Assertions.assertEquals(25, samples.get(0).getSentence().length);
61 Assertions.assertEquals(12, samples.get(1).getSentence().length);
62 Assertions.assertEquals(59, samples.get(2).getSentence().length);
63 Assertions.assertEquals(33, samples.get(3).getSentence().length);
6464 }
6565
6666 @Test
67 public void testNames() throws IOException {
67 void testNames() {
6868
69 Assert.assertEquals(new Span(4, 7, "time"), samples.get(0).getNames()[0]);
70 Assert.assertEquals(new Span(8, 10, "place"), samples.get(0).getNames()[1]);
71 Assert.assertEquals(new Span(12, 14, "place"), samples.get(0).getNames()[2]);
72 Assert.assertEquals(new Span(15, 17, "person"), samples.get(0).getNames()[3]);
73 Assert.assertEquals(new Span(18, 19, "numeric"), samples.get(0).getNames()[4]);
74 Assert.assertEquals(new Span(20, 22, "place"), samples.get(0).getNames()[5]);
75 Assert.assertEquals(new Span(23, 24, "place"), samples.get(0).getNames()[6]);
69 Assertions.assertEquals(new Span(4, 7, "time"), samples.get(0).getNames()[0]);
70 Assertions.assertEquals(new Span(8, 10, "place"), samples.get(0).getNames()[1]);
71 Assertions.assertEquals(new Span(12, 14, "place"), samples.get(0).getNames()[2]);
72 Assertions.assertEquals(new Span(15, 17, "person"), samples.get(0).getNames()[3]);
73 Assertions.assertEquals(new Span(18, 19, "numeric"), samples.get(0).getNames()[4]);
74 Assertions.assertEquals(new Span(20, 22, "place"), samples.get(0).getNames()[5]);
75 Assertions.assertEquals(new Span(23, 24, "place"), samples.get(0).getNames()[6]);
7676
77 Assert.assertEquals(new Span(22, 24, "person"), samples.get(2).getNames()[0]);// 22..24
78 Assert.assertEquals(new Span(25, 27, "person"), samples.get(2).getNames()[1]);// 25..27
79 Assert.assertEquals(new Span(28, 30, "person"), samples.get(2).getNames()[2]);// 28..30
80 Assert.assertEquals(new Span(31, 34, "person"), samples.get(2).getNames()[3]);// 31..34
81 Assert.assertEquals(new Span(35, 37, "person"), samples.get(2).getNames()[4]);// 35..37
82 Assert.assertEquals(new Span(38, 40, "person"), samples.get(2).getNames()[5]);// 38..40
83 Assert.assertEquals(new Span(41, 43, "person"), samples.get(2).getNames()[6]);// 41..43
84 Assert.assertEquals(new Span(44, 46, "person"), samples.get(2).getNames()[7]);// 44..46
85 Assert.assertEquals(new Span(47, 49, "person"), samples.get(2).getNames()[8]);// 47..49
86 Assert.assertEquals(new Span(50, 52, "person"), samples.get(2).getNames()[9]);// 50..52
87 Assert.assertEquals(new Span(53, 55, "person"), samples.get(2).getNames()[10]);// 53..55
77 Assertions.assertEquals(new Span(22, 24, "person"), samples.get(2).getNames()[0]);// 22..24
78 Assertions.assertEquals(new Span(25, 27, "person"), samples.get(2).getNames()[1]);// 25..27
79 Assertions.assertEquals(new Span(28, 30, "person"), samples.get(2).getNames()[2]);// 28..30
80 Assertions.assertEquals(new Span(31, 34, "person"), samples.get(2).getNames()[3]);// 31..34
81 Assertions.assertEquals(new Span(35, 37, "person"), samples.get(2).getNames()[4]);// 35..37
82 Assertions.assertEquals(new Span(38, 40, "person"), samples.get(2).getNames()[5]);// 38..40
83 Assertions.assertEquals(new Span(41, 43, "person"), samples.get(2).getNames()[6]);// 41..43
84 Assertions.assertEquals(new Span(44, 46, "person"), samples.get(2).getNames()[7]);// 44..46
85 Assertions.assertEquals(new Span(47, 49, "person"), samples.get(2).getNames()[8]);// 47..49
86 Assertions.assertEquals(new Span(50, 52, "person"), samples.get(2).getNames()[9]);// 50..52
87 Assertions.assertEquals(new Span(53, 55, "person"), samples.get(2).getNames()[10]);// 53..55
8888
89 Assert.assertEquals(new Span(0, 1, "place"), samples.get(3).getNames()[0]);// 0..1
90 Assert.assertEquals(new Span(6, 7, "event"), samples.get(3).getNames()[1]);// 6..7
91 Assert.assertEquals(new Span(15, 16, "organization"), samples.get(3).getNames()[2]);// 15..16
92 Assert.assertEquals(new Span(18, 19, "event"), samples.get(3).getNames()[3]);// 18..19
93 Assert.assertEquals(new Span(27, 28, "event"), samples.get(3).getNames()[4]);// 27..28
94 Assert.assertEquals(new Span(29, 30, "event"), samples.get(3).getNames()[5]);// 29..30
89 Assertions.assertEquals(new Span(0, 1, "place"), samples.get(3).getNames()[0]);// 0..1
90 Assertions.assertEquals(new Span(6, 7, "event"), samples.get(3).getNames()[1]);// 6..7
91 Assertions.assertEquals(new Span(15, 16, "organization"), samples.get(3).getNames()[2]);// 15..16
92 Assertions.assertEquals(new Span(18, 19, "event"), samples.get(3).getNames()[3]);// 18..19
93 Assertions.assertEquals(new Span(27, 28, "event"), samples.get(3).getNames()[4]);// 27..28
94 Assertions.assertEquals(new Span(29, 30, "event"), samples.get(3).getNames()[5]);// 29..30
9595
96 Assert.assertEquals(new Span(1, 6, "time"), samples.get(4).getNames()[0]);// 0..1
97 Assert.assertEquals(new Span(0, 3, "person"), samples.get(5).getNames()[0]);// 0..1
96 Assertions.assertEquals(new Span(1, 6, "time"), samples.get(4).getNames()[0]);// 0..1
97 Assertions.assertEquals(new Span(0, 3, "person"), samples.get(5).getNames()[0]);// 0..1
9898 }
9999
100100 @Test
101 public void testSmallSentence() throws IOException {
102 Assert.assertEquals(2, samples.get(6).getSentence().length);
101 void testSmallSentence() {
102 Assertions.assertEquals(2, samples.get(6).getSentence().length);
103103 }
104104
105105 @Test
106 public void testMissingRightContraction() throws IOException {
107 Assert.assertEquals(new Span(0, 1, "person"), samples.get(7).getNames()[0]);
108 Assert.assertEquals(new Span(3, 4, "person"), samples.get(7).getNames()[1]);
109 Assert.assertEquals(new Span(5, 6, "person"), samples.get(7).getNames()[2]);
106 void testMissingRightContraction() {
107 Assertions.assertEquals(new Span(0, 1, "person"), samples.get(7).getNames()[0]);
108 Assertions.assertEquals(new Span(3, 4, "person"), samples.get(7).getNames()[1]);
109 Assertions.assertEquals(new Span(5, 6, "person"), samples.get(7).getNames()[2]);
110110 }
111111
112 @Before
113 public void setup() throws IOException {
112 @BeforeEach
113 void setup() throws IOException {
114114 InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class,
115115 "/opennlp/tools/formats/ad.sample");
116116
117117 try (ADNameSampleStream stream =
118 new ADNameSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
118 new ADNameSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
119119 NameSample sample;
120120 while ((sample = stream.read()) != null) {
121121 samples.add(sample);
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.formats.ResourceAsStreamFactory;
2626 import opennlp.tools.postag.POSSample;
2929 public class ADPOSSampleStreamTest {
3030
3131 @Test
32 public void testSimple() throws IOException {
32 void testSimple() throws IOException {
3333 // add one sentence with expandME = includeFeats = false
3434 try (ADPOSSampleStream stream = new ADPOSSampleStream(
3535 new PlainTextByLineStream(new ResourceAsStreamFactory(
3636 ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
37 StandardCharsets.UTF_8), false, false)) {
37 StandardCharsets.UTF_8), false, false)) {
3838 POSSample sample = stream.read();
3939
40 Assert.assertEquals(23, sample.getSentence().length);
40 Assertions.assertEquals(23, sample.getSentence().length);
4141
42 Assert.assertEquals("Inicia", sample.getSentence()[0]);
43 Assert.assertEquals("v-fin", sample.getTags()[0]);
42 Assertions.assertEquals("Inicia", sample.getSentence()[0]);
43 Assertions.assertEquals("v-fin", sample.getTags()[0]);
4444
45 Assert.assertEquals("em", sample.getSentence()[1]);
46 Assert.assertEquals("prp", sample.getTags()[1]);
45 Assertions.assertEquals("em", sample.getSentence()[1]);
46 Assertions.assertEquals("prp", sample.getTags()[1]);
4747
48 Assert.assertEquals("o", sample.getSentence()[2]);
49 Assert.assertEquals("art", sample.getTags()[2]);
48 Assertions.assertEquals("o", sample.getSentence()[2]);
49 Assertions.assertEquals("art", sample.getTags()[2]);
5050
51 Assert.assertEquals("Porto_Poesia", sample.getSentence()[9]);
52 Assert.assertEquals("prop", sample.getTags()[9]);
51 Assertions.assertEquals("Porto_Poesia", sample.getSentence()[9]);
52 Assertions.assertEquals("prop", sample.getTags()[9]);
5353 }
5454 }
5555
5656 @Test
57 public void testExpandME() throws IOException {
57 void testExpandME() throws IOException {
5858 // add one sentence with expandME = true
5959 try (ADPOSSampleStream stream = new ADPOSSampleStream(
6060 new PlainTextByLineStream(new ResourceAsStreamFactory(
6161 ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
62 StandardCharsets.UTF_8), true, false)) {
62 StandardCharsets.UTF_8), true, false)) {
6363
6464 POSSample sample = stream.read();
6565
66 Assert.assertEquals(27, sample.getSentence().length);
66 Assertions.assertEquals(27, sample.getSentence().length);
6767
68 Assert.assertEquals("Inicia", sample.getSentence()[0]);
69 Assert.assertEquals("v-fin", sample.getTags()[0]);
68 Assertions.assertEquals("Inicia", sample.getSentence()[0]);
69 Assertions.assertEquals("v-fin", sample.getTags()[0]);
7070
71 Assert.assertEquals("em", sample.getSentence()[1]);
72 Assert.assertEquals("prp", sample.getTags()[1]);
71 Assertions.assertEquals("em", sample.getSentence()[1]);
72 Assertions.assertEquals("prp", sample.getTags()[1]);
7373
74 Assert.assertEquals("o", sample.getSentence()[2]);
75 Assert.assertEquals("art", sample.getTags()[2]);
74 Assertions.assertEquals("o", sample.getSentence()[2]);
75 Assertions.assertEquals("art", sample.getTags()[2]);
7676
77 Assert.assertEquals("Porto", sample.getSentence()[9]);
78 Assert.assertEquals("B-prop", sample.getTags()[9]);
77 Assertions.assertEquals("Porto", sample.getSentence()[9]);
78 Assertions.assertEquals("B-prop", sample.getTags()[9]);
7979
80 Assert.assertEquals("Poesia", sample.getSentence()[10]);
81 Assert.assertEquals("I-prop", sample.getTags()[10]);
80 Assertions.assertEquals("Poesia", sample.getSentence()[10]);
81 Assertions.assertEquals("I-prop", sample.getTags()[10]);
8282 }
8383 }
8484
8585 @Test
86 public void testIncludeFeats() throws IOException {
86 void testIncludeFeats() throws IOException {
8787 // add one sentence with includeFeats = true
8888 try (ADPOSSampleStream stream = new ADPOSSampleStream(
8989 new PlainTextByLineStream(new ResourceAsStreamFactory(
9090 ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
91 StandardCharsets.UTF_8), false, true)) {
91 StandardCharsets.UTF_8), false, true)) {
9292
9393 POSSample sample = stream.read();
9494
95 Assert.assertEquals(23, sample.getSentence().length);
95 Assertions.assertEquals(23, sample.getSentence().length);
9696
97 Assert.assertEquals("Inicia", sample.getSentence()[0]);
98 Assert.assertEquals("v-fin=PR=3S=IND=VFIN", sample.getTags()[0]);
97 Assertions.assertEquals("Inicia", sample.getSentence()[0]);
98 Assertions.assertEquals("v-fin=PR=3S=IND=VFIN", sample.getTags()[0]);
9999
100 Assert.assertEquals("em", sample.getSentence()[1]);
101 Assert.assertEquals("prp", sample.getTags()[1]);
100 Assertions.assertEquals("em", sample.getSentence()[1]);
101 Assertions.assertEquals("prp", sample.getTags()[1]);
102102
103 Assert.assertEquals("o", sample.getSentence()[2]);
104 Assert.assertEquals("art=DET=M=S", sample.getTags()[2]);
103 Assertions.assertEquals("o", sample.getSentence()[2]);
104 Assertions.assertEquals("art=DET=M=S", sample.getTags()[2]);
105105
106 Assert.assertEquals("Porto_Poesia", sample.getSentence()[9]);
107 Assert.assertEquals("prop=M=S", sample.getTags()[9]);
106 Assertions.assertEquals("Porto_Poesia", sample.getSentence()[9]);
107 Assertions.assertEquals("prop=M=S", sample.getTags()[9]);
108108 }
109109 }
110110
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.formats.ResourceAsStreamFactory;
2626 import opennlp.tools.util.InputStreamFactory;
3131 public static final int NUM_SENTENCES = 8;
3232
3333 @Test
34 public void testSimpleReading() throws IOException {
34 void testSimpleReading() throws IOException {
3535 int count = 0;
3636
3737 ADSentenceStream stream = openData();
4444 // paragraph.getRoot();
4545 }
4646
47 Assert.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, count);
47 Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, count);
4848 }
4949
5050 @Test
51 public void testLeadingWithContraction() throws IOException {
51 void testLeadingWithContraction() throws IOException {
5252 int count = 0;
5353
5454 ADSentenceStream stream = openData();
6060 paragraph = stream.read();
6161 }
6262
63 Assert.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, count);
63 Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, count);
6464 }
6565
6666 private static ADSentenceStream openData() throws IOException {
2121 import java.util.ArrayList;
2222 import java.util.List;
2323
24 import org.junit.Assert;
25 import org.junit.Before;
26 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeEach;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.formats.ResourceAsStreamFactory;
2929 import opennlp.tools.sentdetect.SentenceSample;
3636 private List<SentenceSample> samples = new ArrayList<>();
3737
3838 @Test
39 public void testSimpleCount() throws IOException {
40 Assert.assertEquals(5, samples.size());
39 void testSimpleCount() {
40 Assertions.assertEquals(5, samples.size());
4141 }
4242
4343 @Test
44 public void testSentences() throws IOException {
44 void testSentences() {
4545
46 Assert.assertNotNull(samples.get(0).getDocument());
47 Assert.assertEquals(3, samples.get(0).getSentences().length);
48 Assert.assertEquals(new Span(0, 119), samples.get(0).getSentences()[0]);
49 Assert.assertEquals(new Span(120, 180), samples.get(0).getSentences()[1]);
46 Assertions.assertNotNull(samples.get(0).getDocument());
47 Assertions.assertEquals(3, samples.get(0).getSentences().length);
48 Assertions.assertEquals(new Span(0, 119), samples.get(0).getSentences()[0]);
49 Assertions.assertEquals(new Span(120, 180), samples.get(0).getSentences()[1]);
5050 }
5151
52 @Before
53 public void setup() throws IOException {
52 @BeforeEach
53 void setup() throws IOException {
5454 InputStreamFactory in = new ResourceAsStreamFactory(ADSentenceSampleStreamTest.class,
5555 "/opennlp/tools/formats/ad.sample");
5656
5757 try (ADSentenceSampleStream stream = new ADSentenceSampleStream(
58 new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
58 new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
5959
6060 SentenceSample sample;
6161
2323 import java.util.List;
2424 import java.util.Objects;
2525
26 import org.junit.Assert;
27 import org.junit.Before;
28 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.BeforeEach;
28 import org.junit.jupiter.api.Test;
2929
3030 import opennlp.tools.tokenize.TokenSample;
3131 import opennlp.tools.util.ObjectStream;
3535 private List<TokenSample> samples = new ArrayList<>();
3636
3737 @Test
38 public void testSimpleCount() throws IOException {
39 Assert.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
38 void testSimpleCount() {
39 Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
4040 }
4141
4242 @Test
43 public void testSentences() throws IOException {
44 Assert.assertTrue(samples.get(5).getText().contains("ofereceu-me"));
43 void testSentences() {
44 Assertions.assertTrue(samples.get(5).getText().contains("ofereceu-me"));
4545 }
4646
47 @Before
48 public void setup() throws IOException, URISyntaxException {
47 @BeforeEach
48 void setup() throws IOException, URISyntaxException {
4949 ADTokenSampleStreamFactory factory = new ADTokenSampleStreamFactory(
5050 ADTokenSampleStreamFactory.Parameters.class);
5151
5252 File dict = new File(Objects.requireNonNull(getClass().getClassLoader()
53 .getResource("opennlp/tools/tokenize/latin-detokenizer.xml")).toURI());
53 .getResource("opennlp/tools/tokenize/latin-detokenizer.xml")).toURI());
5454 File data = new File(Objects.requireNonNull(getClass().getClassLoader()
55 .getResource("opennlp/tools/formats/ad.sample")).toURI());
56 String[] args = { "-data", data.getCanonicalPath(), "-encoding", "UTF-8",
57 "-lang", "por", "-detokenizer", dict.getCanonicalPath() };
55 .getResource("opennlp/tools/formats/ad.sample")).toURI());
56 String[] args = {"-data", data.getCanonicalPath(), "-encoding", "UTF-8",
57 "-lang", "por", "-detokenizer", dict.getCanonicalPath()};
5858 ObjectStream<TokenSample> tokenSampleStream = factory.create(args);
5959
6060 TokenSample sample = tokenSampleStream.read();
2020 import java.util.HashMap;
2121 import java.util.Map;
2222
23 import org.junit.Test;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.util.ObjectStream;
2626
4141 }
4242
4343 @Test
44 public void testParsingEntities() throws Exception {
44 void testParsingEntities() throws Exception {
4545 Map<String, String> typeToClassMap = new HashMap<>();
4646 addEntityTypes(typeToClassMap);
4747
5959 }
6060
6161 @Test
62 public void testParsingRelations() throws Exception {
62 void testParsingRelations() throws Exception {
6363 Map<String, String> typeToClassMap = new HashMap<>();
6464 addEntityTypes(typeToClassMap);
6565 typeToClassMap.put("Related", AnnotationConfiguration.RELATION_TYPE);
2222 import java.util.List;
2323 import java.util.Map;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.namefind.NameSample;
2929 import opennlp.tools.sentdetect.NewlineSentenceDetector;
3232 public class BratDocumentParserTest {
3333
3434 @Test
35 public void testParse() throws IOException {
35 void testParse() throws IOException {
3636
3737 Map<String, String> typeToClassMap = new HashMap<>();
3838 BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
5151
5252 List<NameSample> names = parser.parse(doc);
5353
54 Assert.assertEquals(3, names.size());
54 Assertions.assertEquals(3, names.size());
5555
5656 NameSample sample1 = names.get(0);
5757
58 Assert.assertEquals(1, sample1.getNames().length);
59 Assert.assertEquals(0, sample1.getNames()[0].getStart());
60 Assert.assertEquals(2, sample1.getNames()[0].getEnd());
58 Assertions.assertEquals(1, sample1.getNames().length);
59 Assertions.assertEquals(0, sample1.getNames()[0].getStart());
60 Assertions.assertEquals(2, sample1.getNames()[0].getEnd());
6161
6262
6363 NameSample sample2 = names.get(1);
64 Assert.assertEquals(1, sample2.getNames().length);
65 Assert.assertEquals(0, sample2.getNames()[0].getStart());
66 Assert.assertEquals(1, sample2.getNames()[0].getEnd());
64 Assertions.assertEquals(1, sample2.getNames().length);
65 Assertions.assertEquals(0, sample2.getNames()[0].getStart());
66 Assertions.assertEquals(1, sample2.getNames()[0].getEnd());
6767
6868 NameSample sample3 = names.get(2);
69 Assert.assertEquals(3, sample3.getNames().length);
70 Assert.assertEquals(0, sample3.getNames()[0].getStart());
71 Assert.assertEquals(1, sample3.getNames()[0].getEnd());
72 Assert.assertEquals(1, sample3.getNames()[1].getStart());
73 Assert.assertEquals(2, sample3.getNames()[1].getEnd());
74 Assert.assertEquals(2, sample3.getNames()[2].getStart());
75 Assert.assertEquals(3, sample3.getNames()[2].getEnd());
69 Assertions.assertEquals(3, sample3.getNames().length);
70 Assertions.assertEquals(0, sample3.getNames()[0].getStart());
71 Assertions.assertEquals(1, sample3.getNames()[0].getEnd());
72 Assertions.assertEquals(1, sample3.getNames()[1].getStart());
73 Assertions.assertEquals(2, sample3.getNames()[1].getEnd());
74 Assertions.assertEquals(2, sample3.getNames()[2].getStart());
75 Assertions.assertEquals(3, sample3.getNames()[2].getEnd());
7676 }
7777 }
2121 import java.util.HashMap;
2222 import java.util.Map;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 public class BratDocumentTest {
2828
2929 @Test
30 public void testDocumentWithEntitiesParsing() throws IOException {
30 void testDocumentWithEntitiesParsing() throws IOException {
3131
3232 Map<String, String> typeToClassMap = new HashMap<>();
3333 BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
4141
4242 BratDocument doc = BratDocument.parseDocument(config, "voa-with-entities", txtIn, annIn);
4343
44 Assert.assertEquals("voa-with-entities", doc.getId());
45 Assert.assertTrue(doc.getText().startsWith(" U . S . President "));
46 Assert.assertTrue(doc.getText().endsWith("multinational process . \n"));
44 Assertions.assertEquals("voa-with-entities", doc.getId());
45 Assertions.assertTrue(doc.getText().startsWith(" U . S . President "));
46 Assertions.assertTrue(doc.getText().endsWith("multinational process . \n"));
4747
48 Assert.assertEquals(18, doc.getAnnotations().size());
49
48 Assertions.assertEquals(18, doc.getAnnotations().size());
49
5050 BratAnnotation annotation = doc.getAnnotation("T2");
5151 checkNote(annotation, "Barack Obama", "President Obama was the 44th U.S. president");
5252 annotation = doc.getAnnotation("T3");
53 checkNote(annotation,"South Korea","The capital of South Korea is Seoul");
53 checkNote(annotation, "South Korea", "The capital of South Korea is Seoul");
5454 }
55
55
5656 private void checkNote(BratAnnotation annotation, String expectedCoveredText, String expectedNote) {
57 Assert.assertTrue(annotation instanceof SpanAnnotation);
57 Assertions.assertTrue(annotation instanceof SpanAnnotation);
5858 SpanAnnotation spanAnn = (SpanAnnotation) annotation;
59 Assert.assertEquals(expectedCoveredText, spanAnn.getCoveredText());
60 Assert.assertEquals(expectedNote, spanAnn.getNote());
59 Assertions.assertEquals(expectedCoveredText, spanAnn.getCoveredText());
60 Assertions.assertEquals(expectedNote, spanAnn.getNote());
6161 }
6262
6363 /**
6464 * Parse spans that have multiple fragments and ensure they are matched to the correct tokens.
65 *
65 * <p>
6666 * Test to ensure OPENNLP-1193 works.
6767 */
6868 @Test
69 public void testSpanWithMultiFragments() throws IOException {
69 void testSpanWithMultiFragments() throws IOException {
7070 Map<String, String> typeToClassMap = new HashMap<>();
7171 BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
7272 AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
8080 BratDocument doc = BratDocument.parseDocument(config, "opennlp-1193", txtIn, annIn);
8181
8282 SpanAnnotation t1 = (SpanAnnotation) doc.getAnnotation("T1");
83 Assert.assertEquals(t1.getSpans()[0].getStart(), 0);
84 Assert.assertEquals(t1.getSpans()[0].getEnd(), 7);
85 Assert.assertEquals(t1.getSpans()[1].getStart(), 8);
86 Assert.assertEquals(t1.getSpans()[1].getEnd(), 15);
87 Assert.assertEquals(t1.getSpans()[2].getStart(), 17);
88 Assert.assertEquals(t1.getSpans()[2].getEnd(), 24);
83 Assertions.assertEquals(t1.getSpans()[0].getStart(), 0);
84 Assertions.assertEquals(t1.getSpans()[0].getEnd(), 7);
85 Assertions.assertEquals(t1.getSpans()[1].getStart(), 8);
86 Assertions.assertEquals(t1.getSpans()[1].getEnd(), 15);
87 Assertions.assertEquals(t1.getSpans()[2].getStart(), 17);
88 Assertions.assertEquals(t1.getSpans()[2].getEnd(), 24);
8989
9090 SpanAnnotation t2 = (SpanAnnotation) doc.getAnnotation("T2");
91 Assert.assertEquals(t2.getSpans()[0].getStart(), 26);
92 Assert.assertEquals(t2.getSpans()[0].getEnd(), 33);
93 Assert.assertEquals(t2.getSpans()[1].getStart(), 40);
94 Assert.assertEquals(t2.getSpans()[1].getEnd(), 47);
91 Assertions.assertEquals(t2.getSpans()[0].getStart(), 26);
92 Assertions.assertEquals(t2.getSpans()[0].getEnd(), 33);
93 Assertions.assertEquals(t2.getSpans()[1].getStart(), 40);
94 Assertions.assertEquals(t2.getSpans()[1].getEnd(), 47);
9595 }
9696 }
2424 import java.util.Map;
2525 import java.util.Set;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 import opennlp.tools.namefind.NameSample;
3131 import opennlp.tools.sentdetect.NewlineSentenceDetector;
5151 }
5252
5353 @Test
54 public void readNoOverlap() throws IOException {
54 void readNoOverlap() throws IOException {
5555 BratNameSampleStream stream = createNameSampleWith("-entities.",
5656 null);
5757 int count = 0;
6161 sample = stream.read();
6262 }
6363
64 Assert.assertEquals(8, count);
65 }
66
67 @Test(expected = RuntimeException.class)
68 public void readOverlapFail() throws IOException {
69 BratNameSampleStream stream = createNameSampleWith("overlapping",
70 null);
71
72 NameSample sample = stream.read();
73 while (sample != null) {
74 sample = stream.read();
75 }
76 }
77
78 @Test(expected = IllegalArgumentException.class)
79 public void emptySample() throws IOException {
80 createNameSampleWith("overlapping",
81 Collections.emptySet());
64 Assertions.assertEquals(8, count);
8265 }
8366
8467 @Test
85 public void readOverlapFilter() throws IOException {
68 void readOverlapFail() {
69 Assertions.assertThrows(RuntimeException.class, () -> {
70 BratNameSampleStream stream = createNameSampleWith("overlapping",
71 null);
72
73 NameSample sample = stream.read();
74 while (sample != null) {
75 sample = stream.read();
76 }
77 });
78
79 }
80
81 @Test
82 void emptySample() {
83 Assertions.assertThrows(IllegalArgumentException.class, () -> {
84 createNameSampleWith("overlapping",
85 Collections.emptySet());
86 });
87 }
88
89 @Test
90 void readOverlapFilter() throws IOException {
8691 BratNameSampleStream stream = createNameSampleWith("overlapping",
8792 Collections.singleton("Person"));
8893 int count = 0;
9297 sample = stream.read();
9398 }
9499
95 Assert.assertEquals(8, count);
100 Assertions.assertEquals(8, count);
96101 }
97102 }
1919
2020 import java.io.IOException;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.formats.ResourceAsStreamFactory;
2626 import opennlp.tools.lemmatizer.LemmaSample;
3131
3232
3333 @Test
34 public void testParseSpanishS300() throws IOException {
34 void testParseSpanishS300() throws IOException {
3535 InputStreamFactory streamFactory =
3636 new ResourceAsStreamFactory(ConlluStreamTest.class, "es-ud-sample.conllu");
3737
4040
4141 LemmaSample predicted = stream.read();
4242 System.out.println(predicted);
43 Assert.assertEquals("digám+tú+él", predicted.getLemmas()[0]);
44 Assert.assertEquals("la", predicted.getTokens()[3]);
45 Assert.assertEquals("el", predicted.getLemmas()[3]);
43 Assertions.assertEquals("digám+tú+él", predicted.getLemmas()[0]);
44 Assertions.assertEquals("la", predicted.getTokens()[3]);
45 Assertions.assertEquals("el", predicted.getLemmas()[3]);
4646 }
4747 }
4848 }
1919
2020 import java.io.IOException;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.formats.ResourceAsStreamFactory;
2626 import opennlp.tools.postag.POSSample;
2929
3030 public class ConlluPOSSampleStreamTest {
3131 @Test
32 public void testParseContraction() throws IOException {
32 void testParseContraction() throws IOException {
3333 InputStreamFactory streamFactory =
3434 new ResourceAsStreamFactory(ConlluStreamTest.class, "pt_br-ud-sample.conllu");
3535
4747 "antigo_ADJ Ciago_PROPN ._PUNCT");
4848
4949 POSSample predicted = stream.read();
50 Assert.assertEquals(expected, predicted);
50 Assertions.assertEquals(expected, predicted);
5151 }
5252 }
5353
5454
5555 @Test
56 public void testParseSpanishS300() throws IOException {
56 void testParseSpanishS300() throws IOException {
5757 InputStreamFactory streamFactory =
5858 new ResourceAsStreamFactory(ConlluStreamTest.class, "es-ud-sample.conllu");
5959
7070 "plantea_VERB ni_CCONJ siquiera_ADV \"_PUNCT esperar_VERB un_DET mejor_ADJ " +
7171 "gobierno_NOUN \"_PUNCT ._PUNCT");
7272 POSSample predicted = stream.read();
73 Assert.assertEquals(expected1, predicted);
73 Assertions.assertEquals(expected1, predicted);
7474 }
7575 }
7676 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.formats.ResourceAsStreamFactory;
2525 import opennlp.tools.sentdetect.SentenceSample;
3030 public class ConlluSentenceSampleStreamTest {
3131
3232 @Test
33 public void testParseTwoSentences() throws IOException {
33 void testParseTwoSentences() throws IOException {
3434 InputStreamFactory streamFactory =
3535 new ResourceAsStreamFactory(ConlluStreamTest.class, "de-ud-train-sample.conllu");
3636
3939
4040 SentenceSample sample1 = stream.read();
4141
42 Assert.assertEquals("Fachlich kompetent, sehr gute Beratung und ein freundliches Team.",
42 Assertions.assertEquals("Fachlich kompetent, sehr gute Beratung und ein freundliches Team.",
4343 sample1.getDocument());
4444
45 Assert.assertEquals(new Span(0, 65), sample1.getSentences()[0]);
45 Assertions.assertEquals(new Span(0, 65), sample1.getSentences()[0]);
4646
4747 SentenceSample sample2 = stream.read();
4848
49 Assert.assertEquals("Beiden Zahnärzten verdanke ich einen neuen Biss und dadurch " +
49 Assertions.assertEquals("Beiden Zahnärzten verdanke ich einen neuen Biss und dadurch " +
5050 "endlich keine Rückenschmerzen mehr.", sample2.getDocument());
51 Assert.assertEquals(new Span(0, 95), sample2.getSentences()[0]);
51 Assertions.assertEquals(new Span(0, 95), sample2.getSentences()[0]);
5252
53 Assert.assertNull("Stream must be exhausted", stream.read());
53 Assertions.assertNull(stream.read(), "Stream must be exhausted");
5454 }
5555
5656 try (ObjectStream<SentenceSample> stream =
5757 new ConlluSentenceSampleStream(new ConlluStream(streamFactory), 3)) {
5858 SentenceSample sample = stream.read();
5959
60 Assert.assertEquals("Fachlich kompetent, sehr gute Beratung und ein freundliches Team."
61 + " Beiden Zahnärzten verdanke ich einen neuen Biss und dadurch endlich keine "
62 + "Rückenschmerzen mehr.",
60 Assertions.assertEquals("Fachlich kompetent, sehr gute Beratung und ein freundliches Team."
61 + " Beiden Zahnärzten verdanke ich einen neuen Biss und dadurch endlich keine "
62 + "Rückenschmerzen mehr.",
6363 sample.getDocument());
6464
65 Assert.assertNull("Stream must be exhausted", stream.read());
65 Assertions.assertNull(stream.read(), "Stream must be exhausted");
6666 }
6767 }
6868 }
2323 import java.util.Map;
2424 import java.util.Optional;
2525
26 import org.junit.Assert;
27 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.tools.formats.ResourceAsStreamFactory;
3030 import opennlp.tools.util.InputStreamFactory;
3333 public class ConlluStreamTest {
3434
3535 @Test
36 public void testParseTwoSentences() throws IOException {
36 void testParseTwoSentences() throws IOException {
3737
3838 InputStreamFactory streamFactory =
3939 new ResourceAsStreamFactory(ConlluStreamTest.class, "de-ud-train-sample.conllu");
4141 try (ObjectStream<ConlluSentence> stream = new ConlluStream(streamFactory)) {
4242 ConlluSentence sent1 = stream.read();
4343
44 Assert.assertEquals("train-s21", sent1.getSentenceIdComment());
45 Assert.assertEquals("Fachlich kompetent, sehr gute Beratung und ein freundliches Team.",
44 Assertions.assertEquals("train-s21", sent1.getSentenceIdComment());
45 Assertions.assertEquals("Fachlich kompetent, sehr gute Beratung und ein freundliches Team.",
4646 sent1.getTextComment());
47 Assert.assertEquals(11, sent1.getWordLines().size());
47 Assertions.assertEquals(11, sent1.getWordLines().size());
4848
4949 ConlluSentence sent2 = stream.read();
5050
51 Assert.assertEquals("train-s22", sent2.getSentenceIdComment());
52 Assert.assertEquals(
51 Assertions.assertEquals("train-s22", sent2.getSentenceIdComment());
52 Assertions.assertEquals(
5353 "Beiden Zahnärzten verdanke ich einen neuen Biss und dadurch endlich keine Rückenschmerzen mehr.",
5454 sent2.getTextComment());
55 Assert.assertEquals(14, sent2.getWordLines().size());
55 Assertions.assertEquals(14, sent2.getWordLines().size());
5656
57 Assert.assertNull("Stream must be exhausted", stream.read());
57 Assertions.assertNull(stream.read(), "Stream must be exhausted");
5858 }
5959 }
6060
6161 @Test
62 public void testOptionalComments() throws IOException {
62 void testOptionalComments() throws IOException {
6363 InputStreamFactory streamFactory =
64 new ResourceAsStreamFactory(ConlluStreamTest.class, "full-sample.conllu");
64 new ResourceAsStreamFactory(ConlluStreamTest.class, "full-sample.conllu");
6565
6666 try (ObjectStream<ConlluSentence> stream = new ConlluStream(streamFactory)) {
6767 ConlluSentence sent1 = stream.read();
6868
69 Assert.assertEquals("1", sent1.getSentenceIdComment());
70 Assert.assertEquals("They buy and sell books.",
71 sent1.getTextComment());
72 Assert.assertTrue(sent1.isNewDocument());
73 Assert.assertTrue(sent1.isNewParagraph());
74 Assert.assertEquals(6, sent1.getWordLines().size());
69 Assertions.assertEquals("1", sent1.getSentenceIdComment());
70 Assertions.assertEquals("They buy and sell books.",
71 sent1.getTextComment());
72 Assertions.assertTrue(sent1.isNewDocument());
73 Assertions.assertTrue(sent1.isNewParagraph());
74 Assertions.assertEquals(6, sent1.getWordLines().size());
7575
7676 ConlluSentence sent2 = stream.read();
7777
78 Assert.assertEquals("2", sent2.getSentenceIdComment());
79 Assert.assertEquals(
80 "I have no clue.",
81 sent2.getTextComment());
82 Assert.assertTrue(sent2.isNewDocument());
83 Assert.assertEquals(5, sent2.getWordLines().size());
78 Assertions.assertEquals("2", sent2.getSentenceIdComment());
79 Assertions.assertEquals(
80 "I have no clue.",
81 sent2.getTextComment());
82 Assertions.assertTrue(sent2.isNewDocument());
83 Assertions.assertEquals(5, sent2.getWordLines().size());
8484
8585 ConlluSentence sent3 = stream.read();
8686
87 Assert.assertEquals("panc0.s4", sent3.getSentenceIdComment());
88 Assert.assertEquals(Optional.of("tat yathānuśrūyate."), sent3.getTranslit());
89 Assert.assertEquals("तत् यथानुश्रूयते।", sent3.getTextComment());
90 Assert.assertEquals(3, sent3.getWordLines().size());
91 Assert.assertTrue(sent3.isNewParagraph());
87 Assertions.assertEquals("panc0.s4", sent3.getSentenceIdComment());
88 Assertions.assertEquals(Optional.of("tat yathānuśrūyate."), sent3.getTranslit());
89 Assertions.assertEquals("तत् यथानुश्रूयते।", sent3.getTextComment());
90 Assertions.assertEquals(3, sent3.getWordLines().size());
91 Assertions.assertTrue(sent3.isNewParagraph());
9292 Map<Object, Object> textLang3 = new HashMap<>();
9393 textLang3.put(new Locale("fr"), "Voilà ce qui nous est parvenu par la tradition orale.");
9494 textLang3.put(new Locale("en"), "This is what is heard.");
95 Assert.assertEquals(Optional.of(textLang3)
96 , sent3.getTextLang());
95 Assertions.assertEquals(Optional.of(textLang3)
96 , sent3.getTextLang());
9797
9898 ConlluSentence sent4 = stream.read();
9999
100 Assert.assertEquals("mf920901-001-p1s1A", sent4.getSentenceIdComment());
101 Assert.assertEquals(
102 "Slovenská ústava: pro i proti",
103 sent4.getTextComment());
104 Assert.assertEquals(6, sent4.getWordLines().size());
105 Assert.assertTrue(sent4.isNewDocument());
106 Assert.assertTrue(sent4.isNewParagraph());
107 Assert.assertEquals(Optional.of("mf920901-001"), sent4.getDocumentId());
108 Assert.assertEquals(Optional.of("mf920901-001-p1"), sent4.getParagraphId());
109 Assert.assertEquals(Optional.of(Collections.singletonMap(new Locale("en"),
110 "Slovak constitution: pros and cons"))
111 , sent4.getTextLang());
100 Assertions.assertEquals("mf920901-001-p1s1A", sent4.getSentenceIdComment());
101 Assertions.assertEquals(
102 "Slovenská ústava: pro i proti",
103 sent4.getTextComment());
104 Assertions.assertEquals(6, sent4.getWordLines().size());
105 Assertions.assertTrue(sent4.isNewDocument());
106 Assertions.assertTrue(sent4.isNewParagraph());
107 Assertions.assertEquals(Optional.of("mf920901-001"), sent4.getDocumentId());
108 Assertions.assertEquals(Optional.of("mf920901-001-p1"), sent4.getParagraphId());
109 Assertions.assertEquals(Optional.of(Collections.singletonMap(new Locale("en"),
110 "Slovak constitution: pros and cons"))
111 , sent4.getTextLang());
112112
113 Assert.assertNull("Stream must be exhausted", stream.read());
113 Assertions.assertNull(stream.read(), "Stream must be exhausted");
114114 }
115115 }
116116 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.formats.ResourceAsStreamFactory;
2525 import opennlp.tools.tokenize.TokenSample;
2929 public class ConlluTokenSampleStreamTest {
3030
3131 @Test
32 public void testParseTwoSentences() throws IOException {
32 void testParseTwoSentences() throws IOException {
3333 InputStreamFactory streamFactory =
3434 new ResourceAsStreamFactory(ConlluStreamTest.class, "de-ud-train-sample.conllu");
3535
3939 "Fachlich kompetent" + TokenSample.DEFAULT_SEPARATOR_CHARS
4040 + ", sehr gute Beratung und ein freundliches Team" + TokenSample.DEFAULT_SEPARATOR_CHARS
4141 + ".", TokenSample.DEFAULT_SEPARATOR_CHARS);
42 Assert.assertEquals(expected1, stream.read());
42 Assertions.assertEquals(expected1, stream.read());
4343
4444 TokenSample expected2 = TokenSample.parse("Beiden Zahnärzten verdanke ich einen " +
4545 "neuen Biss und dadurch endlich keine Rückenschmerzen mehr"
4646 + TokenSample.DEFAULT_SEPARATOR_CHARS + ".", TokenSample.DEFAULT_SEPARATOR_CHARS);
47 Assert.assertEquals(expected2, stream.read());
47 Assertions.assertEquals(expected2, stream.read());
4848
49 Assert.assertNull("Stream must be exhausted", stream.read());
49 Assertions.assertNull(stream.read(), "Stream must be exhausted");
5050 }
5151 }
5252
5353 @Test
54 public void testParseContraction() throws IOException {
54 void testParseContraction() throws IOException {
5555 InputStreamFactory streamFactory =
5656 new ResourceAsStreamFactory(ConlluStreamTest.class, "pt_br-ud-sample.conllu");
5757
6868 TokenSample.DEFAULT_SEPARATOR_CHARS + "."
6969 , TokenSample.DEFAULT_SEPARATOR_CHARS);
7070 TokenSample predicted = stream.read();
71 Assert.assertEquals(expected1, predicted);
71 Assertions.assertEquals(expected1, predicted);
7272 }
7373 }
7474
7575 @Test
76 public void testParseSpanishS300() throws IOException {
76 void testParseSpanishS300() throws IOException {
7777 InputStreamFactory streamFactory =
7878 new ResourceAsStreamFactory(ConlluStreamTest.class, "es-ud-sample.conllu");
7979
9393
9494 , TokenSample.DEFAULT_SEPARATOR_CHARS);
9595 TokenSample predicted = stream.read();
96 Assert.assertEquals(expected1, predicted);
96 Assertions.assertEquals(expected1, predicted);
9797 }
9898 }
9999 }
1616
1717 package opennlp.tools.formats.conllu;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 import opennlp.tools.util.InvalidFormatException;
2323
2424 public class ConlluWordLineTest {
2525
2626 @Test
27 public void testParseLine() throws InvalidFormatException {
27 void testParseLine() throws InvalidFormatException {
2828 ConlluWordLine line = new ConlluWordLine(
2929 "12\tHänden\tHand\tNOUN\tNN\tCase=Dat|Number=Plur\t5\tnmod\t_\t_");
3030
31 Assert.assertEquals("12", line.getId());
32 Assert.assertEquals("Händen", line.getForm());
33 Assert.assertEquals("Hand", line.getLemma());
34 Assert.assertEquals("NOUN", line.getPosTag(ConlluTagset.U));
35 Assert.assertEquals("NN", line.getPosTag(ConlluTagset.X));
36 Assert.assertEquals("Case=Dat|Number=Plur", line.getFeats());
37 Assert.assertEquals("5", line.getHead());
38 Assert.assertEquals("nmod", line.getDeprel());
39 Assert.assertEquals("_", line.getDeps());
40 Assert.assertEquals("_", line.getMisc());
31 Assertions.assertEquals("12", line.getId());
32 Assertions.assertEquals("Händen", line.getForm());
33 Assertions.assertEquals("Hand", line.getLemma());
34 Assertions.assertEquals("NOUN", line.getPosTag(ConlluTagset.U));
35 Assertions.assertEquals("NN", line.getPosTag(ConlluTagset.X));
36 Assertions.assertEquals("Case=Dat|Number=Plur", line.getFeats());
37 Assertions.assertEquals("5", line.getHead());
38 Assertions.assertEquals("nmod", line.getDeprel());
39 Assertions.assertEquals("_", line.getDeps());
40 Assertions.assertEquals("_", line.getMisc());
4141 }
4242 }
2020 import java.io.IOException;
2121 import java.io.InputStream;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.parser.Parse;
2727 import opennlp.tools.util.ObjectStream;
2929
3030 public class ConstitParseSampleStreamTest {
3131
32 private String[] sample1Tokens = new String[]{
32 private String[] sample1Tokens = new String[] {
3333 "L'",
3434 "autonomie",
3535 "de",
9090 byte[] buffer = new byte[1024];
9191 int length;
9292 try (InputStream sampleIn =
93 ConstitParseSampleStreamTest.class.getResourceAsStream("sample1.xml")) {
93 ConstitParseSampleStreamTest.class.getResourceAsStream("sample1.xml")) {
9494 while ((length = sampleIn.read(buffer)) > 0) {
9595 out.write(buffer, 0, length);
9696 }
100100 }
101101
102102 @Test
103 public void testThereIsExactlyOneSent() throws IOException {
103 void testThereIsExactlyOneSent() throws IOException {
104104 try (ObjectStream<Parse> samples =
105 new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) {
106 Assert.assertNotNull(samples.read());
107 Assert.assertNull(samples.read());
108 Assert.assertNull(samples.read());
105 new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) {
106 Assertions.assertNotNull(samples.read());
107 Assertions.assertNull(samples.read());
108 Assertions.assertNull(samples.read());
109109 }
110110 }
111111
112112 @Test
113 public void testTokensAreCorrect() throws IOException {
113 void testTokensAreCorrect() throws IOException {
114114
115115 try (ObjectStream<Parse> samples =
116 new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) {
116 new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) {
117117 Parse p = samples.read();
118118
119119 Parse[] tagNodes = p.getTagNodes();
122122 tokens[ti] = tagNodes[ti].getCoveredText();
123123 }
124124
125 Assert.assertArrayEquals(sample1Tokens, tokens);
125 Assertions.assertArrayEquals(sample1Tokens, tokens);
126126 }
127127 }
128128 }
2020 import java.io.InputStream;
2121 import java.util.List;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.tokenize.TokenSample;
2727 import opennlp.tools.util.Span;
2929 public class IrishSentenceBankDocumentTest {
3030
3131 @Test
32 public void testParsingSimpleDoc() throws IOException {
33 try (InputStream irishSBXmlIn =
34 IrishSentenceBankDocumentTest.class.getResourceAsStream("irishsentencebank-sample.xml")) {
32 void testParsingSimpleDoc() throws IOException {
33 try (InputStream irishSBXmlIn =
34 IrishSentenceBankDocumentTest.class.getResourceAsStream("irishsentencebank-sample.xml")) {
3535
3636 IrishSentenceBankDocument doc = IrishSentenceBankDocument.parse(irishSBXmlIn);
3737
3838 List<IrishSentenceBankDocument.IrishSentenceBankSentence> sents = doc.getSentences();
3939
40 Assert.assertEquals(2, sents.size());
40 Assertions.assertEquals(2, sents.size());
4141
4242 IrishSentenceBankDocument.IrishSentenceBankSentence sent1 = sents.get(0);
4343 IrishSentenceBankDocument.IrishSentenceBankSentence sent2 = sents.get(1);
4444
45 Assert.assertEquals("A Dhia, tá mé ag iompar clainne!", sent1.getOriginal());
45 Assertions.assertEquals("A Dhia, tá mé ag iompar clainne!", sent1.getOriginal());
4646
4747 IrishSentenceBankDocument.IrishSentenceBankFlex[] flex = sent1.getFlex();
48 Assert.assertEquals(7, flex.length);
49 Assert.assertEquals("A", flex[0].getSurface());
50 Assert.assertArrayEquals(new String[]{"a"}, flex[0].getFlex());
48 Assertions.assertEquals(7, flex.length);
49 Assertions.assertEquals("A", flex[0].getSurface());
50 Assertions.assertArrayEquals(new String[] {"a"}, flex[0].getFlex());
5151
5252 IrishSentenceBankDocument.IrishSentenceBankFlex[] flex2 = sent2.getFlex();
53 Assert.assertEquals("ón", flex2[4].getSurface());
54 Assert.assertArrayEquals(new String[]{"ó", "an"}, flex2[4].getFlex());
53 Assertions.assertEquals("ón", flex2[4].getSurface());
54 Assertions.assertArrayEquals(new String[] {"ó", "an"}, flex2[4].getFlex());
5555
56 Assert.assertEquals("Excuse me, are you from the stone age?", sent2.getTranslation());
56 Assertions.assertEquals("Excuse me, are you from the stone age?", sent2.getTranslation());
5757
5858 TokenSample ts = sent1.getTokenSample();
5959 Span[] spans = ts.getTokenSpans();
60 Assert.assertEquals(9, spans.length);
61 Assert.assertEquals(24, spans[7].getStart());
62 Assert.assertEquals(31, spans[7].getEnd());
63 Assert.assertEquals("clainne", ts.getText().substring(spans[7].getStart(), spans[7].getEnd()));
60 Assertions.assertEquals(9, spans.length);
61 Assertions.assertEquals(24, spans[7].getStart());
62 Assertions.assertEquals(31, spans[7].getEnd());
63 Assertions.assertEquals("clainne", ts.getText().substring(spans[7].getStart(), spans[7].getEnd()));
6464 }
6565 }
6666 }
1919 import java.io.File;
2020 import java.io.IOException;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.util.InvalidFormatException;
2626
2727 /**
2828 * Tests for the {@link LeipzigLanguageSampleStream} class.
2929 */
30
3031 public class LeipzigLanguageSampleStreamTest {
3132
3233 private static String testDataPath = LeipzigLanguageSampleStreamTest.class
33 .getClassLoader().getResource("opennlp/tools/formats/leipzig/samples").getPath();
34 .getClassLoader().getResource("opennlp/tools/formats/leipzig/samples").getPath();
3435
3536 @Test
36 public void testReadSentenceFiles() {
37 void testReadSentenceFiles() {
3738
3839 int samplesPerLanguage = 2;
3940 int sentencesPerSample = 1;
4041 try {
4142 LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(new File(testDataPath),
42 sentencesPerSample, samplesPerLanguage);
43 sentencesPerSample, samplesPerLanguage);
4344 int count = 0;
44 while (stream.read() != null)
45 while (stream.read() != null) {
4546 count++;
47 }
4648
47 Assert.assertEquals(4, count);
49 Assertions.assertEquals(4, count);
4850
4951 } catch (IOException e) {
50 Assert.fail();
52 Assertions.fail();
5153 }
5254 }
5355
54 @Test(expected = InvalidFormatException.class)
55 public void testNotEnoughSentences() throws IOException {
56 int samplesPerLanguage = 2;
57 int sentencesPerSample = 2;
56 @Test
57 void testNotEnoughSentences() {
58 Assertions.assertThrows(InvalidFormatException.class, () -> {
59 int samplesPerLanguage = 2;
60 int sentencesPerSample = 2;
5861
59 LeipzigLanguageSampleStream stream =
60 new LeipzigLanguageSampleStream(new File(testDataPath),
62 LeipzigLanguageSampleStream stream =
63 new LeipzigLanguageSampleStream(new File(testDataPath),
6164 sentencesPerSample, samplesPerLanguage);
62 while (stream.read() != null);
65 while (stream.read() != null) ;
66
67 });
68
6369
6470 }
6571
2020 import java.io.InputStream;
2121 import java.util.List;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 public class LetsmtDocumentTest {
2727
2828 @Test
29 public void testParsingSimpleDoc() throws IOException {
29 void testParsingSimpleDoc() throws IOException {
3030 try (InputStream letsmtXmlIn = LetsmtDocumentTest.class.getResourceAsStream("letsmt-with-words.xml");) {
3131
3232 LetsmtDocument doc = LetsmtDocument.parse(letsmtXmlIn);
3333
3434 List<LetsmtDocument.LetsmtSentence> sents = doc.getSentences();
3535
36 Assert.assertEquals(2, sents.size());
36 Assertions.assertEquals(2, sents.size());
3737
3838 LetsmtDocument.LetsmtSentence sent1 = sents.get(0);
39 Assert.assertNull(sent1.getNonTokenizedText());
39 Assertions.assertNull(sent1.getNonTokenizedText());
4040
41 Assert.assertArrayEquals(new String[]{
41 Assertions.assertArrayEquals(new String[] {
4242 "The",
4343 "Apache",
4444 "Software",
7171 "software",
7272 "products",
7373 "."
74 }, sent1.getTokens());
74 }, sent1.getTokens());
7575
7676 LetsmtDocument.LetsmtSentence sent2 = sents.get(1);
77 Assert.assertNull(sent2.getNonTokenizedText());
77 Assertions.assertNull(sent2.getNonTokenizedText());
7878
79 Assert.assertArrayEquals(new String[]{
79 Assertions.assertArrayEquals(new String[] {
8080 "All",
8181 "software",
8282 "produced",
104104 "listed",
105105 "below",
106106 "."
107 }, sent2.getTokens());
107 }, sent2.getTokens());
108108 }
109109 }
110110 }
2020 import java.io.FileFilter;
2121 import java.io.IOException;
2222
23 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2425
2526 import opennlp.tools.namefind.NameFinderME;
2627 import opennlp.tools.namefind.NameSample;
3132 import opennlp.tools.util.Span;
3233 import opennlp.tools.util.TrainingParameters;
3334
34 import static org.junit.Assert.assertArrayEquals;
35 import static org.junit.Assert.assertEquals;
36 import static org.junit.Assert.assertNull;
37 import static org.junit.Assert.assertTrue;
38 import static org.junit.Assert.fail;
39
4035 public class MascNamedEntitySampleStreamTest {
4136
4237 @Test
43 public void read() {
38 void read() {
4439 try {
4540 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
4641 File directory = new File(this.getClass().getResource(
5247 NameSample s = stream.read();
5348
5449 String[] expectedTokens = {"This", "is", "a", "test", "Sentence", "."};
55 assertArrayEquals(expectedTokens, s.getSentence());
50 Assertions.assertArrayEquals(expectedTokens, s.getSentence());
5651
5752 Span[] expectedTags = new Span[] {new Span(4, 5, "org")};
5853 Span[] returnedTags = s.getNames();
5954 // check the start/end positions
60 assertEquals(expectedTags.length, returnedTags.length);
55 Assertions.assertEquals(expectedTags.length, returnedTags.length);
6156 for (int i = 0; i < returnedTags.length; i++) {
62 assertTrue(expectedTags[i].equals(returnedTags[i]));
57 Assertions.assertTrue(expectedTags[i].equals(returnedTags[i]));
6358 }
6459
6560 s = stream.read();
6661 expectedTokens = new String[] {"This", "is", "'nother", "test", "sentence", "."};
67 assertArrayEquals(expectedTokens, s.getSentence());
62 Assertions.assertArrayEquals(expectedTokens, s.getSentence());
6863
6964 expectedTags = new Span[] {};
7065 returnedTags = s.getNames();
71 assertArrayEquals(expectedTags, returnedTags);
66 Assertions.assertArrayEquals(expectedTags, returnedTags);
7267
7368 } catch (IOException e) {
74 fail("IO Exception: " + e.getMessage());
69 Assertions.fail("IO Exception: " + e.getMessage());
7570 }
7671 }
7772
7873 @Test
79 public void close() {
74 void close() {
8075 try {
8176 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
8277 File directory = new File(this.getClass().getResource(
8883 stream.close();
8984 NameSample s = stream.read();
9085 } catch (IOException e) {
91 assertEquals(e.getMessage(),
86 Assertions.assertEquals(e.getMessage(),
9287 "You are reading an empty document stream. " +
9388 "Did you close it?");
9489 }
9590 }
9691
9792 @Test
98 public void reset() {
93 void reset() {
9994 try {
10095 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
10196 File directory = new File(this.getClass().getResource(
107102 NameSample s = stream.read();
108103 s = stream.read();
109104 s = stream.read();
110 assertNull(s); //The stream should be exhausted by now
105 Assertions.assertNull(s); //The stream should be exhausted by now
111106
112107 stream.reset();
113108
114109 s = stream.read();
115110 String[] expectedTokens = {"This", "is", "a", "test", "Sentence", "."};
116 assertArrayEquals(expectedTokens, s.getSentence());
111 Assertions.assertArrayEquals(expectedTokens, s.getSentence());
117112
118113 Span[] expectedTags = new Span[] {new Span(4, 5, "org")};
119114 Span[] returnedTags = s.getNames();
120115 // check the start/end positions
121 assertEquals(expectedTags.length, returnedTags.length);
116 Assertions.assertEquals(expectedTags.length, returnedTags.length);
122117 for (int i = 0; i < returnedTags.length; i++) {
123 assertTrue(expectedTags[i].equals(returnedTags[i]));
118 Assertions.assertTrue(expectedTags[i].equals(returnedTags[i]));
124119 }
125120
126121 } catch (IOException e) {
127 fail("IO Exception: " + e.getMessage());
122 Assertions.fail("IO Exception: " + e.getMessage());
128123 }
129124 }
130125
131126 @Test
132 public void train() {
127 void train() {
133128 try {
134129 File directory = new File(this.getClass().getResource(
135130 "/opennlp/tools/formats/masc/").getFile());
159154 for (StackTraceElement trace : traces) {
160155 System.err.println(trace.toString());
161156 }
162 fail("Exception raised");
157 Assertions.fail("Exception raised");
163158 }
164159 }
165160
2121 import java.io.IOException;
2222 import java.util.Arrays;
2323
24 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2526
2627 import opennlp.tools.postag.POSEvaluator;
2728 import opennlp.tools.postag.POSModel;
3132 import opennlp.tools.util.ObjectStream;
3233 import opennlp.tools.util.TrainingParameters;
3334
34 import static org.junit.Assert.assertArrayEquals;
35 import static org.junit.Assert.assertEquals;
36 import static org.junit.Assert.assertNull;
37 import static org.junit.Assert.fail;
38
3935 public class MascPOSSampleStreamTest {
4036
4137 @Test
42 public void read() {
38 void read() {
4339 try {
4440 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
4541 File directory = new File(this.getClass().getResource(
5147 POSSample s = stream.read();
5248
5349 String[] expectedTokens = {"This", "is", "a", "test", "Sentence", "."};
54 assertArrayEquals(expectedTokens, s.getSentence());
50 Assertions.assertArrayEquals(expectedTokens, s.getSentence());
5551
5652 String[] expectedTags = {"DT", "VB", "AT", "NN", "NN", "."};
57 assertArrayEquals(expectedTags, s.getTags());
53 Assertions.assertArrayEquals(expectedTags, s.getTags());
5854
5955 s = stream.read();
6056 expectedTokens = new String[] {"This", "is", "'nother", "test", "sentence", "."};
61 assertArrayEquals(expectedTokens, s.getSentence());
57 Assertions.assertArrayEquals(expectedTokens, s.getSentence());
6258
6359 expectedTags = new String[] {"DT", "VB", "RB", "NN", "NN", "."};
64 assertArrayEquals(expectedTags, s.getTags());
60 Assertions.assertArrayEquals(expectedTags, s.getTags());
6561 } catch (IOException e) {
66 fail("IO Exception: " + e.getMessage());
62 Assertions.fail("IO Exception: " + e.getMessage());
6763 }
6864 }
6965
7066 @Test
71 public void close() {
67 void close() {
7268 try {
7369 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
7470 File directory = new File(this.getClass().getResource(
8076 stream.close();
8177 POSSample s = stream.read();
8278 } catch (IOException e) {
83 assertEquals(e.getMessage(),
79 Assertions.assertEquals(e.getMessage(),
8480 "You are reading an empty document stream. " +
8581 "Did you close it?");
8682 }
8783 }
8884
8985 @Test
90 public void reset() {
86 void reset() {
9187 try {
9288 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
9389 File directory = new File(this.getClass().getResource(
9995 POSSample s = stream.read();
10096 s = stream.read();
10197 s = stream.read();
102 assertNull(s); //The stream should be exhausted by now
98 Assertions.assertNull(s); //The stream should be exhausted by now
10399
104100 stream.reset();
105101
106102 s = stream.read();
107103
108104 String[] expectedTokens = {"This", "is", "a", "test", "Sentence", "."};
109 assertArrayEquals(expectedTokens, s.getSentence());
105 Assertions.assertArrayEquals(expectedTokens, s.getSentence());
110106
111107 String[] expectedTags = {"DT", "VB", "AT", "NN", "NN", "."};
112 assertArrayEquals(expectedTags, s.getTags());
108 Assertions.assertArrayEquals(expectedTags, s.getTags());
113109
114110 } catch (IOException e) {
115 fail("IO Exception: " + e.getMessage());
111 Assertions.fail("IO Exception: " + e.getMessage());
116112 }
117113 }
118114
119115 @Test
120 public void train() {
116 void train() {
121117 try {
122118 File directory = new File(this.getClass().getResource(
123119 "/opennlp/tools/formats/masc/").getFile());
144140 } catch (Exception e) {
145141 System.err.println(e.getMessage());
146142 System.err.println(Arrays.toString(e.getStackTrace()));
147 fail("Exception raised");
143 Assertions.fail("Exception raised");
148144 }
149145
150146
2323 import java.util.Arrays;
2424 import java.util.List;
2525
26 import org.junit.Ignore;
27 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.Disabled;
28 import org.junit.jupiter.api.Test;
2829
2930 import opennlp.tools.sentdetect.SentenceDetectorEvaluator;
3031 import opennlp.tools.sentdetect.SentenceDetectorFactory;
3536 import opennlp.tools.util.Span;
3637 import opennlp.tools.util.TrainingParameters;
3738
38 import static org.junit.Assert.assertEquals;
39 import static org.junit.Assert.assertNotNull;
40 import static org.junit.Assert.assertNull;
41 import static org.junit.Assert.fail;
42
4339 public class MascSentenceSampleStreamTest {
4440
4541 @Test
46 public void reset() {
42 void reset() {
4743 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
4844 File directory = new File(this.getClass().getResource(
4945 "/opennlp/tools/formats/masc/").getFile());
5652
5753 //now we should get null
5854 testSample = stream.read();
59 assertNull(testSample);
55 Assertions.assertNull(testSample);
6056
6157 //by resetting, we should get good results again
6258 stream.reset();
6359 testSample = stream.read();
64 assertNotNull(testSample);
60 Assertions.assertNotNull(testSample);
6561
6662 String documentText = "This is a test Sentence. This is 'nother test sentence. ";
6763 List<Span> sentenceSpans = new ArrayList<>();
7066 SentenceSample expectedSample = new SentenceSample(documentText,
7167 sentenceSpans.toArray(new Span[sentenceSpans.size()]));
7268
73 assertEquals(testSample.toString(), expectedSample.toString());
69 Assertions.assertEquals(testSample.toString(), expectedSample.toString());
7470
7571 } catch (IOException e) {
76 fail("IO Exception");
72 Assertions.fail("IO Exception");
7773 }
7874 }
7975
8076 @Test
81 public void close() {
77 void close() {
8278
8379 try {
8480 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
9086 stream.close();
9187 stream.read();
9288 } catch (IOException e) {
93 assertEquals(e.getMessage(),
89 Assertions.assertEquals(e.getMessage(),
9490 "You are reading an empty document stream. " +
9591 "Did you close it?");
9692 }
9793 }
9894
9995 @Test
100 public void read() {
96 void read() {
10197 FileFilter fileFilter = pathname -> pathname.getName().contains("");
10298 File directory = new File(this.getClass().getResource("/opennlp/tools/formats/masc").getFile());
10399 try {
112108 SentenceSample expectedSample = new SentenceSample(documentText,
113109 sentenceSpans.toArray(new Span[sentenceSpans.size()]));
114110 SentenceSample testSample = stream.read();
115 assertEquals(testSample.toString(), expectedSample.toString());
111 Assertions.assertEquals(testSample.toString(), expectedSample.toString());
116112
117113 //the fake file is exhausted, we should get null now
118114 testSample = stream.read();
119 assertNull(testSample);
115 Assertions.assertNull(testSample);
120116
121117 } catch (IOException e) {
122118 System.out.println(e.getMessage());
123119 System.out.println(Arrays.toString(e.getStackTrace()));
124 fail("IO Exception");
120 Assertions.fail("IO Exception");
125121 }
126122
127123 }
128124
129 @Ignore //todo: We can't train on the FakeMasc data, it is too small.
125 @Disabled //todo: We can't train on the FakeMasc data, it is too small.
130126 @Test
131 public void train() {
127 void train() {
132128 try {
133129 File directory = new File(this.getClass().getResource(
134130 "/opennlp/tools/formats/masc/").getFile());
155151 } catch (Exception e) {
156152 System.err.println(e.getMessage());
157153 System.err.println(Arrays.toString(e.getStackTrace()));
158 fail("Exception raised");
154 Assertions.fail("Exception raised");
159155 }
160156
161157
2121 import java.io.IOException;
2222 import java.util.Arrays;
2323
24 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2526
2627 import opennlp.tools.tokenize.TokenSample;
2728 import opennlp.tools.tokenize.TokenizerEvaluator;
3233 import opennlp.tools.util.Span;
3334 import opennlp.tools.util.TrainingParameters;
3435
35 import static org.junit.Assert.assertArrayEquals;
36 import static org.junit.Assert.assertEquals;
37 import static org.junit.Assert.assertNull;
38 import static org.junit.Assert.fail;
39
4036 public class MascTokenSampleStreamTest {
4137
4238 @Test
43 public void read() {
39 void read() {
4440 try {
4541 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
4642 File directory = new File(this.getClass().getResource(
5248 TokenSample s = stream.read();
5349
5450 String expectedString = "This is a test Sentence.";
55 assertEquals(expectedString, s.getText());
51 Assertions.assertEquals(expectedString, s.getText());
5652
5753 Span[] expectedTags = {
5854 new Span(0, 4),
6157 new Span(10, 14),
6258 new Span(15, 23),
6359 new Span(23, 24)};
64 assertArrayEquals(expectedTags, s.getTokenSpans());
60 Assertions.assertArrayEquals(expectedTags, s.getTokenSpans());
6561
6662 s = stream.read();
6763 String expectedTokens = "This is 'nother test sentence.";
68 assertEquals(expectedTokens, s.getText());
64 Assertions.assertEquals(expectedTokens, s.getText());
6965
7066 expectedTags = new Span[] {
7167 new Span(0, 4),
7470 new Span(16, 20),
7571 new Span(21, 29),
7672 new Span(29, 30)};
77 assertArrayEquals(expectedTags, s.getTokenSpans());
73 Assertions.assertArrayEquals(expectedTags, s.getTokenSpans());
7874 } catch (IOException e) {
79 fail("IO Exception: " + e.getMessage());
75 Assertions.fail("IO Exception: " + e.getMessage());
8076 }
8177 }
8278
8379 @Test
84 public void close() {
80 void close() {
8581 try {
8682 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
8783 File directory = new File(this.getClass().getResource(
9389 stream.close();
9490 TokenSample s = stream.read();
9591 } catch (IOException e) {
96 assertEquals(e.getMessage(),
92 Assertions.assertEquals(e.getMessage(),
9793 "You are reading an empty document stream. " +
9894 "Did you close it?");
9995 }
10096 }
10197
10298 @Test
103 public void reset() {
99 void reset() {
104100 try {
105101 FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
106102 File directory = new File(this.getClass().getResource(
112108 TokenSample s = stream.read();
113109 s = stream.read();
114110 s = stream.read();
115 assertNull(s); //The stream should be exhausted by now
111 Assertions.assertNull(s); //The stream should be exhausted by now
116112
117113 stream.reset();
118114
119115 s = stream.read();
120116
121117 String expectedString = "This is a test Sentence.";
122 assertEquals(expectedString, s.getText());
118 Assertions.assertEquals(expectedString, s.getText());
123119
124120 Span[] expectedTags = {
125121 new Span(0, 4),
128124 new Span(10, 14),
129125 new Span(15, 23),
130126 new Span(23, 24)};
131 assertArrayEquals(expectedTags, s.getTokenSpans());
127 Assertions.assertArrayEquals(expectedTags, s.getTokenSpans());
132128
133129 } catch (IOException e) {
134 fail("IO Exception: " + e.getMessage());
130 Assertions.fail("IO Exception: " + e.getMessage());
135131 }
136132 }
137133
138134
139135 @Test
140 public void train() {
136 void train() {
141137 try {
142138 File directory = new File(this.getClass().getResource(
143139 "/opennlp/tools/formats/masc/").getFile());
164160 } catch (Exception e) {
165161 System.err.println(e.getMessage());
166162 System.err.println(Arrays.toString(e.getStackTrace()));
167 fail("Exception raised");
163 Assertions.fail("Exception raised");
168164 }
169165
170166
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.util.ObjectStream;
2525 import opennlp.tools.util.ObjectStreamUtils;
2727 public class DocumentSplitterStreamTest {
2828
2929 @Test
30 public void testSplitTwoDocuments() throws IOException {
30 void testSplitTwoDocuments() throws IOException {
3131
3232 StringBuilder docsString = new StringBuilder();
3333
4040 try (ObjectStream<String> docs = new DocumentSplitterStream(
4141 ObjectStreamUtils.createObjectStream(docsString.toString()))) {
4242 String doc1 = docs.read();
43 Assert.assertEquals(docsString.length() / 2, doc1.length() + 1);
44 Assert.assertTrue(doc1.contains("#0"));
43 Assertions.assertEquals(docsString.length() / 2, doc1.length() + 1);
44 Assertions.assertTrue(doc1.contains("#0"));
4545
4646 String doc2 = docs.read();
47 Assert.assertEquals(docsString.length() / 2, doc2.length() + 1);
48 Assert.assertTrue(doc2.contains("#1"));
47 Assertions.assertEquals(docsString.length() / 2, doc2.length() + 1);
48 Assertions.assertTrue(doc2.contains("#1"));
4949
50 Assert.assertNull(docs.read());
51 Assert.assertNull(docs.read());
50 Assertions.assertNull(docs.read());
51 Assertions.assertNull(docs.read());
5252 }
5353 }
5454 }
2121 import java.io.Reader;
2222 import java.nio.charset.StandardCharsets;
2323
24 import org.junit.Test;
24 import org.junit.jupiter.api.Test;
2525
2626 public class SgmlParserTest {
2727
2828 @Test
29 public void testParse1() throws IOException {
29 void testParse1() throws IOException {
3030
3131 try (Reader in = new InputStreamReader(
3232 SgmlParserTest.class.getResourceAsStream("parsertest1.sgml"), StandardCharsets.UTF_8)) {
1919 import java.io.IOException;
2020 import java.io.InputStream;
2121
22 import org.junit.Test;
23
24 import static org.junit.Assert.assertEquals;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2524
2625 public class NKJPSegmentationDocumentTest {
2726 @Test
28 public void testParsingSimpleDoc() throws IOException {
27 void testParsingSimpleDoc() throws IOException {
2928 try (InputStream nkjpSegXmlIn =
30 NKJPSegmentationDocumentTest.class.getResourceAsStream("ann_segmentation.xml")) {
29 NKJPSegmentationDocumentTest.class.getResourceAsStream("ann_segmentation.xml")) {
3130
3231 NKJPSegmentationDocument doc = NKJPSegmentationDocument.parse(nkjpSegXmlIn);
3332
34 assertEquals(1, doc.getSegments().size());
33 Assertions.assertEquals(1, doc.getSegments().size());
3534
36 assertEquals(7, doc.getSegments().get("segm_1.1-s").size());
35 Assertions.assertEquals(7, doc.getSegments().get("segm_1.1-s").size());
3736
3837 String src = "To krótkie zdanie w drugim akapicie.";
3938
4039 int offset = doc.getSegments().get("segm_1.1-s").get("segm_1.1-seg").offset;
41 assertEquals(0, offset);
40 Assertions.assertEquals(0, offset);
4241 int length = doc.getSegments().get("segm_1.1-s").get("segm_1.1-seg").length;
43 assertEquals(2, length);
44 assertEquals("To", src.substring(offset, length));
42 Assertions.assertEquals(2, length);
43 Assertions.assertEquals("To", src.substring(offset, length));
4544 }
4645 }
4746 }
1919 import java.io.InputStream;
2020 import java.util.Map;
2121
22 import org.junit.Test;
23
24 import static org.junit.Assert.assertEquals;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2524
2625 public class NKJPTextDocumentTest {
2726 @Test
28 public void testParsingSimpleDoc() throws Exception {
27 void testParsingSimpleDoc() throws Exception {
2928 try (InputStream nkjpTextXmlIn =
30 NKJPTextDocumentTest.class.getResourceAsStream("text_structure.xml")) {
29 NKJPTextDocumentTest.class.getResourceAsStream("text_structure.xml")) {
3130
3231 NKJPTextDocument doc = NKJPTextDocument.parse(nkjpTextXmlIn);
3332
34 assertEquals(1, doc.getDivtypes().size());
35 assertEquals("article", doc.getDivtypes().get("div-1"));
33 Assertions.assertEquals(1, doc.getDivtypes().size());
34 Assertions.assertEquals("article", doc.getDivtypes().get("div-1"));
3635
37 assertEquals(1, doc.getTexts().size());
38 assertEquals(1, doc.getTexts().get("text-1").size());
39 assertEquals(2, doc.getTexts().get("text-1").get("div-1").size());
36 Assertions.assertEquals(1, doc.getTexts().size());
37 Assertions.assertEquals(1, doc.getTexts().get("text-1").size());
38 Assertions.assertEquals(2, doc.getTexts().get("text-1").get("div-1").size());
4039
4140 String exp = "To krótki tekst w formacie NKJP. Zawiera dwa zdania.";
42 assertEquals(exp, doc.getTexts().get("text-1").get("div-1").get("p-1"));
41 Assertions.assertEquals(exp, doc.getTexts().get("text-1").get("div-1").get("p-1"));
4342 }
4443 }
4544
4645 @Test
47 public void testGetParagraphs() throws Exception {
46 void testGetParagraphs() throws Exception {
4847 try (InputStream nkjpTextXmlIn =
49 NKJPTextDocumentTest.class.getResourceAsStream("text_structure.xml")) {
48 NKJPTextDocumentTest.class.getResourceAsStream("text_structure.xml")) {
5049
5150 NKJPTextDocument doc = NKJPTextDocument.parse(nkjpTextXmlIn);
5251 Map<String, String> paras = doc.getParagraphs();
53 assertEquals("To krótkie zdanie w drugim akapicie.", paras.get("ab-1"));
52 Assertions.assertEquals("To krótkie zdanie w drugim akapicie.", paras.get("ab-1"));
5453 }
5554 }
5655 }
1919 import java.util.Arrays;
2020 import java.util.Collection;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525
2626 public class DefaultLanguageDetectorContextGeneratorTest {
2727
2828 @Test
29 public void extractContext() throws Exception {
29 void extractContext() {
3030 String doc = "abcde fghijk";
3131
3232 LanguageDetectorContextGenerator cg = new DefaultLanguageDetectorContextGenerator(1, 3);
3333
3434 Collection<String> features = Arrays.asList(cg.getContext(doc));
3535
36 Assert.assertEquals(33, features.size());
37 Assert.assertTrue(features.contains("ab"));
38 Assert.assertTrue(features.contains("abc"));
39 Assert.assertTrue(features.contains("e f"));
40 Assert.assertTrue(features.contains(" fg"));
36 Assertions.assertEquals(33, features.size());
37 Assertions.assertTrue(features.contains("ab"));
38 Assertions.assertTrue(features.contains("abc"));
39 Assertions.assertTrue(features.contains("e f"));
40 Assertions.assertTrue(features.contains(" fg"));
4141 }
4242 }
1818
1919 import java.util.concurrent.atomic.AtomicInteger;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2324
2425 import opennlp.tools.util.TrainingParameters;
2526
5657
5758 cv.evaluate(sampleStream, 2);
5859
59 Assert.assertEquals(99, cv.getDocumentCount());
60 Assert.assertEquals(0.98989898989899, cv.getDocumentAccuracy(), 0.01);
60 Assertions.assertEquals(99, cv.getDocumentCount());
61 Assertions.assertEquals(0.98989898989899, cv.getDocumentAccuracy(), 0.01);
6162 }
6263
6364 }
2020 import java.nio.charset.StandardCharsets;
2121 import java.util.concurrent.atomic.AtomicInteger;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.cmdline.langdetect.LanguageDetectorEvaluationErrorListener;
2727
2929 public class LanguageDetectorEvaluatorTest {
3030
3131 @Test
32 public void processSample() throws Exception {
32 void processSample() throws Exception {
3333 LanguageDetectorModel model = LanguageDetectorMETest.trainModel();
3434 LanguageDetectorME langdetector = new LanguageDetectorME(model);
3535
6363 "escreve e faz palestras pelo mundo inteiro sobre anjos"));
6464
6565
66 Assert.assertEquals(1, correctCount.get());
67 Assert.assertEquals(2, incorrectCount.get());
66 Assertions.assertEquals(1, correctCount.get());
67 Assertions.assertEquals(2, incorrectCount.get());
6868
69 Assert.assertEquals(3, evaluator.getDocumentCount());
70 Assert.assertEquals(0.33, evaluator.getAccuracy(), 0.01);
69 Assertions.assertEquals(3, evaluator.getDocumentCount());
70 Assertions.assertEquals(evaluator.getAccuracy(), 0.01, 0.33);
7171
7272 String report = outputStream.toString(StandardCharsets.UTF_8.name());
7373
74 Assert.assertEquals("Expected\tPredicted\tContext" + System.lineSeparator() +
74 Assertions.assertEquals("Expected\tPredicted\tContext" + System.lineSeparator() +
7575 "fra\tpob\tescreve e faz palestras pelo mundo inteiro sobre anjos" + System.lineSeparator() +
76 "fra\tpob\tescreve e faz palestras pelo mundo inteiro sobre anjos" + System.lineSeparator(), report);
76 "fra\tpob\tescreve e faz palestras pelo mundo inteiro sobre anjos" + System.lineSeparator(), report);
7777 }
7878
7979 }
2424 import java.util.HashSet;
2525 import java.util.Set;
2626
27 import org.junit.Assert;
28 import org.junit.BeforeClass;
29 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.BeforeAll;
29 import org.junit.jupiter.api.Test;
3030
3131 import opennlp.tools.formats.ResourceAsStreamFactory;
3232 import opennlp.tools.util.PlainTextByLineStream;
3737
3838 private static LanguageDetectorModel model;
3939
40 @BeforeClass
41 public static void train() throws Exception {
40 @BeforeAll
41 static void train() throws Exception {
4242
4343 ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory(
4444 LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt");
5656 }
5757
5858 @Test
59 public void testCorrectFactory() throws IOException {
59 void testCorrectFactory() throws IOException {
6060 byte[] serialized = LanguageDetectorMETest.serializeModel(model);
6161
6262 LanguageDetectorModel myModel = new LanguageDetectorModel(new ByteArrayInputStream(serialized));
6363
64 Assert.assertTrue(myModel.getFactory() instanceof DummyFactory);
64 Assertions.assertTrue(myModel.getFactory() instanceof DummyFactory);
6565
6666 }
6767
6868 @Test
69 public void testDummyFactory() throws Exception {
69 void testDummyFactory() throws Exception {
7070 byte[] serialized = LanguageDetectorMETest.serializeModel(model);
7171
7272 LanguageDetectorModel myModel = new LanguageDetectorModel(new ByteArrayInputStream(serialized));
7373
74 Assert.assertTrue(myModel.getFactory() instanceof DummyFactory);
74 Assertions.assertTrue(myModel.getFactory() instanceof DummyFactory);
7575 }
7676
7777 @Test
78 public void testDummyFactoryContextGenerator() throws Exception {
78 void testDummyFactoryContextGenerator() {
7979 LanguageDetectorContextGenerator cg = model.getFactory().getContextGenerator();
8080 String[] context = cg.getContext(
8181 "a dummy text phrase to test if the context generator works!!!!!!!!!!!!");
8282
8383 Set<String> set = new HashSet(Arrays.asList(context));
8484
85 Assert.assertTrue(set.contains("!!!!!")); // default normalizer would remove the repeated !
86 Assert.assertTrue(set.contains("a dum"));
87 Assert.assertTrue(set.contains("tg=[THE,CONTEXT,GENERATOR]"));
85 Assertions.assertTrue(set.contains("!!!!!")); // default normalizer would remove the repeated !
86 Assertions.assertTrue(set.contains("a dum"));
87 Assertions.assertTrue(set.contains("tg=[THE,CONTEXT,GENERATOR]"));
8888 }
8989
9090 }
2020 import java.io.IOException;
2121 import java.nio.charset.StandardCharsets;
2222
23 import org.junit.Assert;
24 import org.junit.Before;
25 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.BeforeEach;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.formats.ResourceAsStreamFactory;
2828 import opennlp.tools.util.PlainTextByLineStream;
3333
3434 private LanguageDetectorModel model;
3535
36 @Before
37 public void init() throws Exception {
36 @BeforeEach
37 void init() throws Exception {
3838
3939 this.model = trainModel();
4040
4141 }
4242
4343 @Test
44 public void testPredictLanguages() {
44 void testPredictLanguages() {
4545 LanguageDetector ld = new LanguageDetectorME(this.model);
4646 Language[] languages = ld.predictLanguages("estava em uma marcenaria na Rua Bruno");
4747
48 Assert.assertEquals(4, languages.length);
49 Assert.assertEquals("pob", languages[0].getLang());
50 Assert.assertEquals("ita", languages[1].getLang());
51 Assert.assertEquals("spa", languages[2].getLang());
52 Assert.assertEquals("fra", languages[3].getLang());
48 Assertions.assertEquals(4, languages.length);
49 Assertions.assertEquals("pob", languages[0].getLang());
50 Assertions.assertEquals("ita", languages[1].getLang());
51 Assertions.assertEquals("spa", languages[2].getLang());
52 Assertions.assertEquals("fra", languages[3].getLang());
5353 }
5454
5555 @Test
56 public void testProbingPredictLanguages() {
56 void testProbingPredictLanguages() {
5757 LanguageDetectorME ld = new LanguageDetectorME(this.model);
5858 for (int i = 0; i < 10000; i += 1000) {
5959 StringBuilder sb = new StringBuilder();
6161 sb.append("estava em uma marcenaria na Rua Bruno ");
6262 }
6363 ProbingLanguageDetectionResult result = ld.probingPredictLanguages(sb.toString());
64 Assert.assertTrue(result.getLength() <= 600);
64 Assertions.assertTrue(result.getLength() <= 600);
6565 Language[] languages = result.getLanguages();
66 Assert.assertEquals(4, languages.length);
67 Assert.assertEquals("pob", languages[0].getLang());
68 Assert.assertEquals("ita", languages[1].getLang());
69 Assert.assertEquals("spa", languages[2].getLang());
70 Assert.assertEquals("fra", languages[3].getLang());
66 Assertions.assertEquals(4, languages.length);
67 Assertions.assertEquals("pob", languages[0].getLang());
68 Assertions.assertEquals("ita", languages[1].getLang());
69 Assertions.assertEquals("spa", languages[2].getLang());
70 Assertions.assertEquals("fra", languages[3].getLang());
7171 }
7272 }
7373
7474 @Test
75 public void testPredictLanguage() {
75 void testPredictLanguage() {
7676 LanguageDetector ld = new LanguageDetectorME(this.model);
7777 Language language = ld.predictLanguage("Dove è meglio che giochi");
7878
79 Assert.assertEquals("ita", language.getLang());
79 Assertions.assertEquals("ita", language.getLang());
8080 }
8181
8282 @Test
83 public void testSupportedLanguages() {
83 void testSupportedLanguages() {
8484
8585 LanguageDetector ld = new LanguageDetectorME(this.model);
8686 String[] supportedLanguages = ld.getSupportedLanguages();
8787
88 Assert.assertEquals(4, supportedLanguages.length);
88 Assertions.assertEquals(4, supportedLanguages.length);
8989 }
9090
9191 @Test
92 public void testLoadFromSerialized() throws IOException {
92 void testLoadFromSerialized() throws IOException {
9393 byte[] serialized = serializeModel(model);
9494
9595 LanguageDetectorModel myModel = new LanguageDetectorModel(new ByteArrayInputStream(serialized));
9696
97 Assert.assertNotNull(myModel);
97 Assertions.assertNotNull(myModel);
9898
9999 }
100100
2424 import java.io.ObjectOutput;
2525 import java.io.ObjectOutputStream;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 public class LanguageSampleTest {
3131
3232 @Test
33 public void testConstructor() {
33 void testConstructor() {
3434 Language lang = new Language("aLang");
3535 CharSequence context = "aContext";
3636
3737 LanguageSample sample = new LanguageSample(lang, context);
3838
39 Assert.assertEquals(lang, sample.getLanguage());
40 Assert.assertEquals(context, sample.getContext());
39 Assertions.assertEquals(lang, sample.getLanguage());
40 Assertions.assertEquals(context, sample.getContext());
4141 }
4242
4343 @Test
44 public void testLanguageSampleSerDe() throws IOException {
44 void testLanguageSampleSerDe() throws IOException {
4545 Language lang = new Language("aLang");
4646 CharSequence context = "aContext";
4747
6363 // do nothing
6464 }
6565
66 Assert.assertNotNull(deSerializedLanguageSample);
67 Assert.assertEquals(languageSample.getContext(), deSerializedLanguageSample.getContext());
68 Assert.assertEquals(languageSample.getLanguage(), deSerializedLanguageSample.getLanguage());
69 Assert.assertEquals(languageSample, deSerializedLanguageSample);
70 }
71
72 @Test(expected = NullPointerException.class)
73 public void testNullLang() throws Exception {
74 CharSequence context = "aContext";
75
76 new LanguageSample(null, context);
77 }
78
79 @Test(expected = NullPointerException.class)
80 public void testNullContext() {
81 Language lang = new Language("aLang");
82
83 new LanguageSample(lang, null);
66 Assertions.assertNotNull(deSerializedLanguageSample);
67 Assertions.assertEquals(languageSample.getContext(), deSerializedLanguageSample.getContext());
68 Assertions.assertEquals(languageSample.getLanguage(), deSerializedLanguageSample.getLanguage());
69 Assertions.assertEquals(languageSample, deSerializedLanguageSample);
8470 }
8571
8672 @Test
87 public void testToString() {
73 void testNullLang() {
74 Assertions.assertThrows(NullPointerException.class, () -> {
75 CharSequence context = "aContext";
76
77 new LanguageSample(null, context);
78 });
79
80 }
81
82 @Test
83 void testNullContext() {
84 Assertions.assertThrows(NullPointerException.class, () -> {
85 Language lang = new Language("aLang");
86
87 new LanguageSample(lang, null);
88 });
89
90 }
91
92 @Test
93 void testToString() {
8894 Language lang = new Language("aLang");
8995 CharSequence context = "aContext";
9096
9197 LanguageSample sample = new LanguageSample(lang, context);
9298
93 Assert.assertEquals(lang.getLang() + "\t" + context, sample.toString());
99 Assertions.assertEquals(lang.getLang() + "\t" + context, sample.toString());
94100 }
95101
96102 @Test
97 public void testHash() {
103 void testHash() {
98104
99105 int hashA = new LanguageSample(new Language("aLang"), "aContext").hashCode();
100106 int hashB = new LanguageSample(new Language("bLang"), "aContext").hashCode();
101107 int hashC = new LanguageSample(new Language("aLang"), "bContext").hashCode();
102108
103 Assert.assertNotEquals(hashA, hashB);
104 Assert.assertNotEquals(hashA, hashC);
105 Assert.assertNotEquals(hashB, hashC);
109 Assertions.assertNotEquals(hashA, hashB);
110 Assertions.assertNotEquals(hashA, hashC);
111 Assertions.assertNotEquals(hashB, hashC);
106112 }
107113
108114 @Test
109 public void testEquals() throws Exception {
115 void testEquals() {
110116
111117 LanguageSample sampleA = new LanguageSample(new Language("aLang"), "aContext");
112118 LanguageSample sampleA1 = new LanguageSample(new Language("aLang"), "aContext");
113119 LanguageSample sampleB = new LanguageSample(new Language("bLang"), "aContext");
114120 LanguageSample sampleC = new LanguageSample(new Language("aLang"), "bContext");
115121
116 Assert.assertEquals(sampleA, sampleA);
117 Assert.assertEquals(sampleA, sampleA1);
118 Assert.assertNotEquals(sampleA, sampleB);
119 Assert.assertNotEquals(sampleA, sampleC);
120 Assert.assertNotEquals(sampleB, sampleC);
121 Assert.assertNotEquals(sampleA, "something else");
122 Assertions.assertEquals(sampleA, sampleA);
123 Assertions.assertEquals(sampleA, sampleA1);
124 Assertions.assertNotEquals(sampleA, sampleB);
125 Assertions.assertNotEquals(sampleA, sampleC);
126 Assertions.assertNotEquals(sampleB, sampleC);
127 Assertions.assertNotEquals(sampleA, "something else");
122128 }
123129 }
1616
1717 package opennlp.tools.langdetect;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
21
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2221
2322 public class LanguageTest {
2423
2524
2625 @Test
27 public void emptyConfidence() throws Exception {
26 void emptyConfidence() {
2827 String languageCode = "aLanguage";
2928 Language lang = new Language(languageCode);
3029
31 Assert.assertEquals(languageCode, lang.getLang());
32 Assert.assertEquals(0, lang.getConfidence(), 0);
30 Assertions.assertEquals(languageCode, lang.getLang());
31 Assertions.assertEquals(0, lang.getConfidence(), 0);
3332 }
3433
3534 @Test
36 public void nonEmptyConfidence() throws Exception {
35 void nonEmptyConfidence() {
3736 String languageCode = "aLanguage";
3837 double confidence = 0.05;
3938 Language lang = new Language(languageCode, confidence);
4039
41 Assert.assertEquals(languageCode, lang.getLang());
42 Assert.assertEquals(confidence, lang.getConfidence(), 0);
43 }
44
45 @Test(expected = NullPointerException.class)
46 public void emptyLanguage() throws Exception {
47 new Language(null);
48 }
49
50 @Test(expected = NullPointerException.class)
51 public void emptyLanguageConfidence() throws Exception {
52 new Language(null, 0.05);
40 Assertions.assertEquals(languageCode, lang.getLang());
41 Assertions.assertEquals(confidence, lang.getConfidence(), 0);
5342 }
5443
5544 @Test
56 public void testToString() {
45 void emptyLanguage() {
46 Assertions.assertThrows(NullPointerException.class, () -> {
47 new Language(null);
48 });
49 }
50
51 @Test
52 void emptyLanguageConfidence() {
53 Assertions.assertThrows(NullPointerException.class, () -> {
54 new Language(null, 0.05);
55 });
56 }
57
58 @Test
59 void testToString() {
5760 Language lang = new Language("aLang");
5861
59 Assert.assertEquals("aLang (0.0)", lang.toString());
62 Assertions.assertEquals("aLang (0.0)", lang.toString());
6063
6164 lang = new Language("aLang", 0.0886678);
6265
63 Assert.assertEquals("aLang (0.0886678)", lang.toString());
66 Assertions.assertEquals("aLang (0.0886678)", lang.toString());
6467 }
6568
6669
6770 @Test
68 public void testHash() {
71 void testHash() {
6972 int hashA = new Language("aLang").hashCode();
7073 int hashAA = new Language("aLang").hashCode();
7174 int hashB = new Language("BLang").hashCode();
7275 int hashA5 = new Language("aLang", 5.0).hashCode();
7376 int hashA6 = new Language("BLang", 6.0).hashCode();
7477
75 Assert.assertEquals(hashA, hashAA);
78 Assertions.assertEquals(hashA, hashAA);
7679
77 Assert.assertNotEquals(hashA, hashB);
78 Assert.assertNotEquals(hashA, hashA5);
79 Assert.assertNotEquals(hashB, hashA5);
80 Assert.assertNotEquals(hashA5, hashA6);
80 Assertions.assertNotEquals(hashA, hashB);
81 Assertions.assertNotEquals(hashA, hashA5);
82 Assertions.assertNotEquals(hashB, hashA5);
83 Assertions.assertNotEquals(hashA5, hashA6);
8184 }
8285
8386 @Test
84 public void testEquals() {
87 void testEquals() {
8588 Language langA = new Language("langA");
8689 Language langB = new Language("langB");
8790 Language langA5 = new Language("langA5", 5.0);
8891 Language langA6 = new Language("langA5", 6.0);
8992
90 Assert.assertEquals(langA, langA);
91 Assert.assertEquals(langA5, langA5);
93 Assertions.assertEquals(langA, langA);
94 Assertions.assertEquals(langA5, langA5);
9295
93 Assert.assertNotEquals(langA, langA5);
94 Assert.assertNotEquals(langA, langB);
96 Assertions.assertNotEquals(langA, langA5);
97 Assertions.assertNotEquals(langA, langB);
9598
96 Assert.assertEquals(langA6, langA5);
99 Assertions.assertEquals(langA6, langA5);
97100
98 Assert.assertNotEquals(langA, "something else");
101 Assertions.assertNotEquals(langA, "something else");
99102 }
100103 }
1818
1919 import java.util.Collection;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.util.StringList;
2525
2929 public class LanguageModelEvaluationTest {
3030
3131 @Test
32 public void testPerplexityComparison() throws Exception {
32 void testPerplexityComparison() {
3333
3434 Collection<String[]> trainingVocabulary =
3535 LanguageModelTestUtils.generateRandomVocabulary(1100000);
4949 }
5050 double bigramPerplexity =
5151 LanguageModelTestUtils.getPerplexity(bigramLM, testVocabulary, 2);
52 Assert.assertTrue(unigramPerplexity >= bigramPerplexity);
52 Assertions.assertTrue(unigramPerplexity >= bigramPerplexity);
5353
5454 NGramLanguageModel trigramLM = new NGramLanguageModel(3);
5555 for (String[] sentence : trainingVocabulary) {
5757 }
5858 double trigramPerplexity =
5959 LanguageModelTestUtils.getPerplexity(trigramLM, testVocabulary, 3);
60 Assert.assertTrue(bigramPerplexity >= trigramPerplexity);
60 Assertions.assertTrue(bigramPerplexity >= trigramPerplexity);
6161
6262 }
6363 }
2222 import java.util.LinkedList;
2323 import java.util.Random;
2424
25 import org.junit.Ignore;
25 import org.junit.jupiter.api.Disabled;
2626
2727 import opennlp.tools.ngram.NGramUtils;
2828
2929 /**
3030 * Utility class for language models tests
3131 */
32 @Ignore
32 @Disabled
3333 public class LanguageModelTestUtils {
3434
3535 private static final java.math.MathContext CONTEXT = MathContext.DECIMAL128;
3636 private static Random r = new Random();
3737
38 private static final char[] chars = new char[]{'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
38 private static final char[] chars = new char[] {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
3939
4040 public static Collection<String[]> generateRandomVocabulary(int size) {
4141 Collection<String[]> vocabulary = new LinkedList<>();
2121 import java.util.List;
2222
2323 import org.apache.commons.io.IOUtils;
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.ngram.NGramGenerator;
2828
3434 @Test
3535 public void testEmptyVocabularyProbability() {
3636 NGramLanguageModel model = new NGramLanguageModel();
37 Assert.assertEquals("probability with an empty vocabulary is always 0",
38 0d, model.calculateProbability(""), 0d);
39 Assert.assertEquals("probability with an empty vocabulary is always 0",
40 0d, model.calculateProbability("1", "2", "3"), 0d);
37 Assertions.assertEquals(0d, model.calculateProbability(""), 0d,
38 "probability with an empty vocabulary is always 0");
39 Assertions.assertEquals(0d, model.calculateProbability("1", "2", "3"), 0d,
40 "probability with an empty vocabulary is always 0");
4141 }
4242
4343 @Test
4747 model.add(sentence);
4848 }
4949 double probability = model.calculateProbability(LanguageModelTestUtils.generateRandomSentence());
50 Assert.assertTrue("a probability measure should be between 0 and 1 [was "
51 + probability + "]", probability >= 0 && probability <= 1);
50 Assertions.assertTrue(probability >= 0 && probability <= 1,
51 "a probability measure should be between 0 and 1 [was " + probability + "]");
5252 }
5353
5454 @Test
5858 model.add("the", "red", "house");
5959 model.add("I", "saw", "something", "nice");
6060 double probability = model.calculateProbability("I", "saw", "the", "red", "house");
61 Assert.assertTrue("a probability measure should be between 0 and 1 [was "
62 + probability + "]", probability >= 0 && probability <= 1);
61 Assertions.assertTrue(probability >= 0 && probability <= 1,
62 "a probability measure should be between 0 and 1 [was " + probability + "]");
6363
6464 String[] tokens = model.predictNextTokens("I", "saw");
65 Assert.assertNotNull(tokens);
66 Assert.assertArrayEquals(new String[] {"the", "fox"}, tokens);
65 Assertions.assertNotNull(tokens);
66 Assertions.assertArrayEquals(new String[] {"the", "fox"}, tokens);
6767 }
6868
6969 @Test
7373 model.add("<s>", "Sam", "I", "am", "</s>");
7474 model.add("<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>");
7575 double probability = model.calculateProbability("<s>", "I");
76 Assert.assertEquals(0.666d, probability, 0.001);
76 Assertions.assertEquals(0.666d, probability, 0.001);
7777 probability = model.calculateProbability("Sam", "</s>");
78 Assert.assertEquals(0.5d, probability, 0.001);
78 Assertions.assertEquals(0.5d, probability, 0.001);
7979 probability = model.calculateProbability("<s>", "Sam");
80 Assert.assertEquals(0.333d, probability, 0.001);
80 Assertions.assertEquals(0.333d, probability, 0.001);
8181 probability = model.calculateProbability("am", "Sam");
82 Assert.assertEquals(0.5d, probability, 0.001);
82 Assertions.assertEquals(0.5d, probability, 0.001);
8383 probability = model.calculateProbability("I", "am");
84 Assert.assertEquals(0.666d, probability, 0.001);
84 Assertions.assertEquals(0.666d, probability, 0.001);
8585 probability = model.calculateProbability("I", "do");
86 Assert.assertEquals(0.333d, probability, 0.001);
86 Assertions.assertEquals(0.333d, probability, 0.001);
8787 probability = model.calculateProbability("I", "am", "Sam");
88 Assert.assertEquals(0.333d, probability, 0.001);
88 Assertions.assertEquals(0.333d, probability, 0.001);
8989 }
9090
9191 @Test
9595 model.add("the", "red", "house");
9696 model.add("I", "saw", "something", "nice");
9797 double probability = model.calculateProbability("I", "saw", "the", "red", "house");
98 Assert.assertTrue("a probability measure should be between 0 and 1 [was "
99 + probability + "]", probability >= 0 && probability <= 1);
98 Assertions.assertTrue(probability >= 0 && probability <= 1,
99 "a probability measure should be between 0 and 1 [was " + probability + "]");
100100
101101 String[] tokens = model.predictNextTokens("I", "saw");
102 Assert.assertNotNull(tokens);
103 Assert.assertArrayEquals(new String[] {"something"}, tokens);
102 Assertions.assertNotNull(tokens);
103 Assertions.assertArrayEquals(new String[] {"something"}, tokens);
104104 }
105105
106106 @Test
110110 model.add("the", "red", "house");
111111 model.add("I", "saw", "something", "nice");
112112 double probability = model.calculateProbability("I", "saw", "the", "red", "house");
113 Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
114 probability >= 0 && probability <= 1);
113 Assertions.assertTrue(probability >= 0 && probability <= 1,
114 "a probability measure should be between 0 and 1 [was " + probability + "]");
115115
116116 String[] tokens = model.predictNextTokens("I", "saw");
117 Assert.assertNotNull(tokens);
118 Assert.assertArrayEquals(new String[] {"something"}, tokens);
117 Assertions.assertNotNull(tokens);
118 Assertions.assertArrayEquals(new String[] {"something"}, tokens);
119119 }
120120
121121 @Test
123123 NGramLanguageModel languageModel = new NGramLanguageModel(getClass().getResourceAsStream(
124124 "/opennlp/tools/ngram/ngram-model.xml"), 3);
125125 double probability = languageModel.calculateProbability("The", "brown", "fox", "jumped");
126 Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
127 probability >= 0 && probability <= 1);
126 Assertions.assertTrue(probability >= 0 && probability <= 1,
127 "a probability measure should be between 0 and 1 [was " + probability + "]");
128128 String[] tokens = languageModel.predictNextTokens("the", "brown", "fox");
129 Assert.assertNotNull(tokens);
130 Assert.assertArrayEquals(new String[] {"jumped"}, tokens);
129 Assertions.assertNotNull(tokens);
130 Assertions.assertArrayEquals(new String[] {"jumped"}, tokens);
131131 }
132132
133133 @Test
148148 }
149149 String[] tokens = languageModel.predictNextTokens("neural",
150150 "network", "language");
151 Assert.assertNotNull(tokens);
152 Assert.assertArrayEquals(new String[] {"models"}, tokens);
151 Assertions.assertNotNull(tokens);
152 Assertions.assertArrayEquals(new String[] {"models"}, tokens);
153153 double p1 = languageModel.calculateProbability("neural", "network",
154154 "language", "models");
155155 double p2 = languageModel.calculateProbability("neural", "network",
156156 "language", "model");
157 Assert.assertTrue(p1 > p2);
157 Assertions.assertTrue(p1 > p2);
158158 }
159159 }
1919 import java.util.Arrays;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.BeforeClass;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeAll;
24 import org.junit.jupiter.api.Test;
2525
2626 public class DictionaryLemmatizerMultiTest {
2727
2828 private static DictionaryLemmatizer dictionaryLemmatizer;
2929
30 @BeforeClass
31 public static void loadDictionary() throws Exception {
30 @BeforeAll
31 static void loadDictionary() throws Exception {
3232 dictionaryLemmatizer = new DictionaryLemmatizer(
3333 DictionaryLemmatizerTest.class.getResourceAsStream(
34 "/opennlp/tools/lemmatizer/smalldictionarymulti.dict")
34 "/opennlp/tools/lemmatizer/smalldictionarymulti.dict")
3535 );
3636 }
37
37
3838 @Test
39 public void testForNullPointerException() {
40 List<String> sentence = Arrays.asList("The","dogs","were","running","and","barking",
41 "down","the","street");
42 List<String> sentencePOS = Arrays.asList("DT","NNS","VBD","VBG","CC","VBG","RP","DT","NN");
39 void testForNullPointerException() {
40 List<String> sentence = Arrays.asList("The", "dogs", "were", "running", "and", "barking",
41 "down", "the", "street");
42 List<String> sentencePOS = Arrays.asList("DT", "NNS", "VBD", "VBG", "CC", "VBG", "RP", "DT", "NN");
4343 List<List<String>> expectedLemmas = new ArrayList<>();
4444 expectedLemmas.add(Arrays.asList("the"));
4545 expectedLemmas.add(Arrays.asList("dog"));
5050 expectedLemmas.add(Arrays.asList("down"));
5151 expectedLemmas.add(Arrays.asList("the"));
5252 expectedLemmas.add(Arrays.asList("street"));
53
53
5454 List<List<String>> actualLemmas = dictionaryLemmatizer.lemmatize(sentence, sentencePOS);
55
55
5656 for (int i = 0; i < sentence.size(); i++) {
5757 // don't compare cases where the word is not in the dictionary...
58 if (!actualLemmas.get(0).get(0).equals("O"))
59 Assert.assertEquals(expectedLemmas.get(i), actualLemmas.get(i));
58 if (!actualLemmas.get(0).get(0).equals("O")) {
59 Assertions.assertEquals(expectedLemmas.get(i), actualLemmas.get(i));
60 }
6061 }
6162 }
6263
1616
1717 package opennlp.tools.lemmatizer;
1818
19 import org.junit.Assert;
20 import org.junit.BeforeClass;
21 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.BeforeAll;
21 import org.junit.jupiter.api.Test;
2222
2323 public class DictionaryLemmatizerTest {
2424
2525 private static DictionaryLemmatizer dictionaryLemmatizer;
2626
27 @BeforeClass
28 public static void loadDictionary() throws Exception {
27 @BeforeAll
28 static void loadDictionary() throws Exception {
2929 dictionaryLemmatizer = new DictionaryLemmatizer(
30 DictionaryLemmatizerTest.class.getResourceAsStream("/opennlp/tools/lemmatizer/smalldictionary.dict")
30 DictionaryLemmatizerTest.class.getResourceAsStream("/opennlp/tools/lemmatizer/smalldictionary.dict")
3131 );
3232 }
33
33
3434 @Test
35 public void testForNullPointerException() {
36 String[] sentence = new String[]{"The","dogs","were","running","and","barking","down","the","street"};
37 String[] sentencePOS = new String[]{"DT","NNS","VBD","VBG","CC","VBG","RP","DT","NN"};
38 String[] expectedLemma = new String[]{"the","dog","is","run","and","bark","down","the","street"};
39
35 void testForNullPointerException() {
36 String[] sentence = new String[] {"The", "dogs", "were", "running", "and", "barking",
37 "down", "the", "street"};
38 String[] sentencePOS = new String[] {"DT", "NNS", "VBD", "VBG", "CC", "VBG", "RP", "DT", "NN"};
39 String[] expectedLemma = new String[] {"the", "dog", "is", "run", "and", "bark", "down", "the", "street"};
40
4041 String[] actualLemma = dictionaryLemmatizer.lemmatize(sentence, sentencePOS);
41
42 for (int i = 0;i < sentence.length;i++) {
42
43 for (int i = 0; i < sentence.length; i++) {
4344 // don't compare cases where the word is not in the dictionary...
44 if (!actualLemma[i].equals("O")) Assert.assertEquals(expectedLemma[i], actualLemma[i]);
45 if (!actualLemma[i].equals("O")) {
46 Assertions.assertEquals(expectedLemma[i], actualLemma[i]);
47 }
4548 }
4649 }
4750
2626 import java.io.ObjectOutputStream;
2727 import java.io.StringReader;
2828
29 import org.junit.Assert;
30 import org.junit.Test;
29 import org.junit.jupiter.api.Assertions;
30 import org.junit.jupiter.api.Test;
3131
3232 public class LemmaSampleTest {
3333
34 @Test(expected = IllegalArgumentException.class)
35 public void testParameterValidation() {
36 new LemmaSample(new String[] { "" }, new String[] { "" },
37 new String[] { "test", "one element to much" });
34 @Test
35 void testParameterValidation() {
36 Assertions.assertThrows(IllegalArgumentException.class, () -> {
37 new LemmaSample(new String[] {""}, new String[] {""},
38 new String[] {"test", "one element to much"});
39 });
3840 }
3941
4042 private static String[] createSentence() {
41 return new String[] { "Forecasts", "for", "the", "trade", "figures",
42 "range", "widely", "." };
43 return new String[] {"Forecasts", "for", "the", "trade", "figures",
44 "range", "widely", "."};
4345 }
4446
4547 private static String[] createTags() {
4648
47 return new String[] { "NNS", "IN", "DT", "NN", "NNS", "VBP", "RB", "." };
49 return new String[] {"NNS", "IN", "DT", "NN", "NNS", "VBP", "RB", "."};
4850 }
4951
5052 private static String[] createLemmas() {
51 return new String[] { "Forecast", "for", "the", "trade", "figure", "range",
52 "widely", "." };
53 return new String[] {"Forecast", "for", "the", "trade", "figure", "range",
54 "widely", "."};
5355 }
5456
5557 @Test
56 public void testLemmaSampleSerDe() throws IOException {
58 void testLemmaSampleSerDe() throws IOException {
5759 LemmaSample lemmaSample = createGoldSample();
5860 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
5961 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
7173 // do nothing
7274 }
7375
74 Assert.assertNotNull(deSerializedLemmaSample);
75 Assert.assertArrayEquals(lemmaSample.getLemmas(), deSerializedLemmaSample.getLemmas());
76 Assert.assertArrayEquals(lemmaSample.getTokens(), deSerializedLemmaSample.getTokens());
77 Assert.assertArrayEquals(lemmaSample.getTags(), deSerializedLemmaSample.getTags());
76 Assertions.assertNotNull(deSerializedLemmaSample);
77 Assertions.assertArrayEquals(lemmaSample.getLemmas(), deSerializedLemmaSample.getLemmas());
78 Assertions.assertArrayEquals(lemmaSample.getTokens(), deSerializedLemmaSample.getTokens());
79 Assertions.assertArrayEquals(lemmaSample.getTags(), deSerializedLemmaSample.getTags());
7880 }
7981
8082 @Test
81 public void testRetrievingContent() {
83 void testRetrievingContent() {
8284 LemmaSample sample = new LemmaSample(createSentence(), createTags(), createLemmas());
8385
84 Assert.assertArrayEquals(createSentence(), sample.getTokens());
85 Assert.assertArrayEquals(createTags(), sample.getTags());
86 Assert.assertArrayEquals(createLemmas(), sample.getLemmas());
86 Assertions.assertArrayEquals(createSentence(), sample.getTokens());
87 Assertions.assertArrayEquals(createTags(), sample.getTags());
88 Assertions.assertArrayEquals(createLemmas(), sample.getLemmas());
8789 }
8890
8991 @Test
90 public void testToString() throws IOException {
92 void testToString() throws IOException {
9193
9294 LemmaSample sample = new LemmaSample(createSentence(), createTags(),
9395 createLemmas());
100102 for (int i = 0; i < sentence.length; i++) {
101103 String line = reader.readLine();
102104 String[] parts = line.split("\t");
103 Assert.assertEquals(3, parts.length);
104 Assert.assertEquals(sentence[i], parts[0]);
105 Assert.assertEquals(tags[i], parts[1]);
106 Assert.assertEquals(lemmas[i], parts[2]);
105 Assertions.assertEquals(3, parts.length);
106 Assertions.assertEquals(sentence[i], parts[0]);
107 Assertions.assertEquals(tags[i], parts[1]);
108 Assertions.assertEquals(lemmas[i], parts[2]);
107109 }
108110 }
109111
110112 @Test
111 public void testEquals() {
112 Assert.assertFalse(createGoldSample() == createGoldSample());
113 Assert.assertTrue(createGoldSample().equals(createGoldSample()));
114 Assert.assertFalse(createPredSample().equals(createGoldSample()));
115 Assert.assertFalse(createPredSample().equals(new Object()));
113 void testEquals() {
114 Assertions.assertFalse(createGoldSample() == createGoldSample());
115 Assertions.assertTrue(createGoldSample().equals(createGoldSample()));
116 Assertions.assertFalse(createPredSample().equals(createGoldSample()));
117 Assertions.assertFalse(createPredSample().equals(new Object()));
116118 }
117119
118120 public static LemmaSample createGoldSample() {
2222 import java.io.OutputStream;
2323 import java.nio.charset.StandardCharsets;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.cmdline.lemmatizer.LemmaEvaluationErrorListener;
2929 import opennlp.tools.util.MockInputStreamFactory;
4646 * @throws IOException
4747 */
4848 @Test
49 public void testEvaluator() throws IOException {
49 void testEvaluator() throws IOException {
5050 String inPredicted = "opennlp/tools/lemmatizer/output.txt";
5151 String inExpected = "opennlp/tools/lemmatizer/output.txt";
5252
5454
5555 DummyLemmaSampleStream predictedSample = new DummyLemmaSampleStream(
5656 new PlainTextByLineStream(
57 new MockInputStreamFactory(new File(inPredicted)), encoding), true);
57 new MockInputStreamFactory(new File(inPredicted)), encoding), true);
5858
5959 DummyLemmaSampleStream expectedSample = new DummyLemmaSampleStream(
6060 new PlainTextByLineStream(
61 new MockInputStreamFactory(new File(inExpected)), encoding), false);
61 new MockInputStreamFactory(new File(inExpected)), encoding), false);
6262
6363 Lemmatizer dummyLemmatizer = new DummyLemmatizer(predictedSample);
6464
6868
6969 evaluator.evaluate(expectedSample);
7070
71 Assert.assertEquals(0.9877049180327869, evaluator.getWordAccuracy(), DELTA);
72 Assert.assertNotSame(stream.toString().length(), 0);
71 Assertions.assertEquals(0.9877049180327869, evaluator.getWordAccuracy(), DELTA);
72 Assertions.assertNotSame(0, stream.toString().length());
7373
7474 }
7575
2020 import java.io.IOException;
2121 import java.nio.charset.StandardCharsets;
2222
23 import org.junit.Assert;
24 import org.junit.Before;
25 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.BeforeEach;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.util.InsufficientTrainingDataException;
2828 import opennlp.tools.util.MockInputStreamFactory;
4545 * training sentences and then the computed model is used to predict sentences
4646 * from the training sentences.
4747 */
48
4849 public class LemmatizerMETest {
4950
5051 private LemmatizerME lemmatizer;
5152
52 private static String[] tokens = { "Rockwell", "said", "the", "agreement", "calls", "for",
53 private static String[] tokens = {"Rockwell", "said", "the", "agreement", "calls", "for",
5354 "it", "to", "supply", "200", "additional", "so-called", "shipsets", "for",
54 "the", "planes", "." };
55 "the", "planes", "."};
5556
56 private static String[] postags = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
57 "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." };
57 private static String[] postags = {"NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
58 "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "."};
5859
59 private static String[] expect = { "rockwell", "say", "the", "agreement", "call", "for",
60 private static String[] expect = {"rockwell", "say", "the", "agreement", "call", "for",
6061 "it", "to", "supply", "200", "additional", "so-called", "shipset", "for",
61 "the", "plane", "." };
62 "the", "plane", "."};
6263
63 @Before
64 public void startup() throws IOException {
64 @BeforeEach
65 void startup() throws IOException {
6566 // train the lemmatizer
6667
6768 ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream(
6869 new PlainTextByLineStream(new MockInputStreamFactory(
69 new File("opennlp/tools/lemmatizer/trial.old.tsv")), StandardCharsets.UTF_8));
70 new File("opennlp/tools/lemmatizer/trial.old.tsv")), StandardCharsets.UTF_8));
7071
7172 TrainingParameters params = new TrainingParameters();
7273 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
7980 }
8081
8182 @Test
82 public void testLemmasAsArray() throws Exception {
83 void testLemmasAsArray() {
8384
8485 String[] lemmas = lemmatizer.lemmatize(tokens, postags);
8586
86 Assert.assertArrayEquals(expect, lemmas);
87 Assertions.assertArrayEquals(expect, lemmas);
8788 }
88
89 @Test(expected = InsufficientTrainingDataException.class)
90 public void testInsufficientData() throws IOException {
91
92 ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream(
93 new PlainTextByLineStream(new MockInputStreamFactory(
94 new File("opennlp/tools/lemmatizer/trial.old-insufficient.tsv")), StandardCharsets.UTF_8));
9589
96 TrainingParameters params = new TrainingParameters();
97 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
98 params.put(TrainingParameters.CUTOFF_PARAM, 5);
90 @Test
91 void testInsufficientData() {
9992
100 LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory());
93 Assertions.assertThrows(InsufficientTrainingDataException.class, () -> {
94
95 ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream(
96 new PlainTextByLineStream(new MockInputStreamFactory(
97 new File("opennlp/tools/lemmatizer/trial.old-insufficient.tsv")), StandardCharsets.UTF_8));
98
99 TrainingParameters params = new TrainingParameters();
100 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
101 params.put(TrainingParameters.CUTOFF_PARAM, 5);
102
103 LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory());
104
105 });
106
101107
102108 }
103109
1919 import java.util.Arrays;
2020 import java.util.Collections;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 public class ArrayMathTest {
2626
2727 @Test
28 public void testInnerProductDoubleNaN() throws Exception {
29 Assert.assertTrue(Double.isNaN(ArrayMath.innerProduct(null, new double[]{0})));
30 Assert.assertTrue(Double.isNaN(ArrayMath.innerProduct(new double[]{0}, null)));
31 Assert.assertTrue(Double.isNaN(ArrayMath.innerProduct(new double[]{0, 1, 2}, new double[]{0, 1, 2, 3})));
28 public void testInnerProductDoubleNaN() {
29 Assertions.assertTrue(Double.isNaN(ArrayMath.innerProduct(null, new double[] {0})));
30 Assertions.assertTrue(Double.isNaN(ArrayMath.innerProduct(new double[] {0}, null)));
31 Assertions.assertTrue(Double.isNaN(ArrayMath.innerProduct(new double[] {0, 1, 2},
32 new double[] {0, 1, 2, 3})));
3233 }
3334
3435 @Test
35 public void testInnerProduct() throws Exception {
36 Assert.assertEquals(0, ArrayMath.innerProduct(new double[] {}, new double[] {}), 0);
37 Assert.assertEquals(-1, ArrayMath.innerProduct(new double[] {1}, new double[] {-1}), 0);
38 Assert.assertEquals(14, ArrayMath.innerProduct(new double[] {1, 2, 3}, new double[] {1, 2, 3}), 0);
36 public void testInnerProduct() {
37 Assertions.assertEquals(0, ArrayMath.innerProduct(new double[] {}, new double[] {}), 0);
38 Assertions.assertEquals(-1, ArrayMath.innerProduct(new double[] {1}, new double[] {-1}), 0);
39 Assertions.assertEquals(14, ArrayMath.innerProduct(new double[] {1, 2, 3}, new double[] {1, 2, 3}), 0);
3940 }
4041
4142 @Test
42 public void testL1Norm() throws Exception {
43 Assert.assertEquals(0, ArrayMath.l1norm(new double[]{}), 0);
44 Assert.assertEquals(0, ArrayMath.l1norm(new double[] {0}), 0);
45 Assert.assertEquals(2, ArrayMath.l1norm(new double[] {1, -1}), 0);
46 Assert.assertEquals(55, ArrayMath.l1norm(new double[] {1, -2, 3, -4, 5, -6, 7, -8, 9, -10}), 0);
43 public void testL1Norm() {
44 Assertions.assertEquals(0, ArrayMath.l1norm(new double[] {}), 0);
45 Assertions.assertEquals(0, ArrayMath.l1norm(new double[] {0}), 0);
46 Assertions.assertEquals(2, ArrayMath.l1norm(new double[] {1, -1}), 0);
47 Assertions.assertEquals(55, ArrayMath.l1norm(new double[] {1, -2, 3, -4, 5, -6, 7, -8, 9, -10}), 0);
4748 }
4849
4950 @Test
50 public void testL2Norm() throws Exception {
51 Assert.assertEquals(0, ArrayMath.l2norm(new double[] {}), 0);
52 Assert.assertEquals(0, ArrayMath.l2norm(new double[] {0}), 0);
53 Assert.assertEquals(1.41421, ArrayMath.l2norm(new double[] {1, -1}), 0.001);
54 Assert.assertEquals(0.54772, ArrayMath.l2norm(new double[] {0.1, -0.2, 0.3, -0.4}), 0.001);
51 public void testL2Norm() {
52 Assertions.assertEquals(0, ArrayMath.l2norm(new double[] {}), 0);
53 Assertions.assertEquals(0, ArrayMath.l2norm(new double[] {0}), 0);
54 Assertions.assertEquals(1.41421, ArrayMath.l2norm(new double[] {1, -1}), 0.001);
55 Assertions.assertEquals(0.54772, ArrayMath.l2norm(new double[] {0.1, -0.2, 0.3, -0.4}), 0.001);
5556 }
5657
5758 @Test
58 public void testInvL2Norm() throws Exception {
59 Assert.assertEquals(0.70711, ArrayMath.invL2norm(new double[] {1, -1}), 0.001);
60 Assert.assertEquals(1.82575, ArrayMath.invL2norm(new double[] {0.1, -0.2, 0.3, -0.4}), 0.001);
59 public void testInvL2Norm() {
60 Assertions.assertEquals(0.70711, ArrayMath.invL2norm(new double[] {1, -1}), 0.001);
61 Assertions.assertEquals(1.82575, ArrayMath.invL2norm(new double[] {0.1, -0.2, 0.3, -0.4}), 0.001);
6162 }
6263
6364 @Test
64 public void testLogSumOfExps() throws Exception {
65 Assert.assertEquals(0, ArrayMath.logSumOfExps(new double[] {0}), 0);
66 Assert.assertEquals(1, ArrayMath.logSumOfExps(new double[] {1}), 0);
67 Assert.assertEquals(2.048587, ArrayMath.logSumOfExps(new double[] {-1, 2}), 0.001);
68 Assert.assertEquals(1.472216, ArrayMath.logSumOfExps(new double[] {-0.1, 0.2, -0.3, 0.4}), 0.001);
65 public void testLogSumOfExps() {
66 Assertions.assertEquals(0, ArrayMath.logSumOfExps(new double[] {0}), 0);
67 Assertions.assertEquals(1, ArrayMath.logSumOfExps(new double[] {1}), 0);
68 Assertions.assertEquals(2.048587, ArrayMath.logSumOfExps(new double[] {-1, 2}), 0.001);
69 Assertions.assertEquals(1.472216, ArrayMath.logSumOfExps(new double[] {-0.1, 0.2, -0.3, 0.4}), 0.001);
6970 }
7071
7172 @Test
72 public void testMax() throws Exception {
73 Assert.assertEquals(0, ArrayMath.max(new double[] {0}), 0);
74 Assert.assertEquals(0, ArrayMath.max(new double[] {0, 0, 0}), 0);
75 Assert.assertEquals(2, ArrayMath.max(new double[] {0, 1, 2}), 0);
76 Assert.assertEquals(200, ArrayMath.max(new double[] {100, 200, 2}), 0);
77 Assert.assertEquals(300, ArrayMath.max(new double[] {100, 200, 300, -10, -20}), 0);
78 }
79
80 @Test(expected = IllegalArgumentException.class)
81 public void testArgmaxException1() throws Exception {
82 ArrayMath.argmax(null);
83 }
84
85 @Test(expected = IllegalArgumentException.class)
86 public void testArgmaxException2() throws Exception {
87 ArrayMath.argmax(new double[]{});
73 public void testMax() {
74 Assertions.assertEquals(0, ArrayMath.max(new double[] {0}), 0);
75 Assertions.assertEquals(0, ArrayMath.max(new double[] {0, 0, 0}), 0);
76 Assertions.assertEquals(2, ArrayMath.max(new double[] {0, 1, 2}), 0);
77 Assertions.assertEquals(200, ArrayMath.max(new double[] {100, 200, 2}), 0);
78 Assertions.assertEquals(300, ArrayMath.max(new double[] {100, 200, 300, -10, -20}), 0);
8879 }
8980
9081 @Test
91 public void testArgmax() throws Exception {
92 Assert.assertEquals(0, ArrayMath.argmax(new double[] {0}));
93 Assert.assertEquals(0, ArrayMath.argmax(new double[] {0, 0, 0}));
94 Assert.assertEquals(2, ArrayMath.argmax(new double[] {0, 1, 2}));
95 Assert.assertEquals(1, ArrayMath.argmax(new double[] {100, 200, 2}));
96 Assert.assertEquals(2, ArrayMath.argmax(new double[] {100, 200, 300, -10, -20}));
82 public void testArgmaxException1() {
83 Assertions.assertThrows(IllegalArgumentException.class, () -> {
84 ArrayMath.argmax(null);
85 });
9786 }
9887
9988 @Test
100 public void testToDoubleArray() throws Exception {
101 Assert.assertEquals(0, ArrayMath.toDoubleArray(Collections.EMPTY_LIST).length);
102 Assert.assertArrayEquals(new double[] {0}, ArrayMath.toDoubleArray(Arrays.asList(0D)), 0);
103 Assert.assertArrayEquals(new double[] {0, 1, -2.5, -0.3, 4},
89 public void testArgmaxException2() {
90 Assertions.assertThrows(IllegalArgumentException.class, () -> {
91 ArrayMath.argmax(new double[] {});
92 });
93 }
94
95 @Test
96 public void testArgmax() {
97 Assertions.assertEquals(0, ArrayMath.argmax(new double[] {0}));
98 Assertions.assertEquals(0, ArrayMath.argmax(new double[] {0, 0, 0}));
99 Assertions.assertEquals(2, ArrayMath.argmax(new double[] {0, 1, 2}));
100 Assertions.assertEquals(1, ArrayMath.argmax(new double[] {100, 200, 2}));
101 Assertions.assertEquals(2, ArrayMath.argmax(new double[] {100, 200, 300, -10, -20}));
102 }
103
104 @Test
105 public void testToDoubleArray() {
106 Assertions.assertEquals(0, ArrayMath.toDoubleArray(Collections.EMPTY_LIST).length);
107 Assertions.assertArrayEquals(new double[] {0}, ArrayMath.toDoubleArray(Arrays.asList(0D)), 0);
108 Assertions.assertArrayEquals(new double[] {0, 1, -2.5, -0.3, 4},
104109 ArrayMath.toDoubleArray(Arrays.asList(0D, 1D, -2.5D, -0.3D, 4D)), 0);
105110 }
106111
107112 @Test
108 public void testToIntArray() throws Exception {
109 Assert.assertEquals(0, ArrayMath.toIntArray(Collections.EMPTY_LIST).length);
110 Assert.assertArrayEquals(new int[] {0}, ArrayMath.toIntArray(Arrays.asList(0)));
111 Assert.assertArrayEquals(new int[] {0, 1, -2, -3, 4},
113 public void testToIntArray() {
114 Assertions.assertEquals(0, ArrayMath.toIntArray(Collections.EMPTY_LIST).length);
115 Assertions.assertArrayEquals(new int[] {0}, ArrayMath.toIntArray(Arrays.asList(0)));
116 Assertions.assertArrayEquals(new int[] {0, 1, -2, -3, 4},
112117 ArrayMath.toIntArray(Arrays.asList(0, 1, -2, -3, 4)));
113118 }
114119 }
1919 import java.util.HashMap;
2020 import java.util.Map;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.ml.model.MaxentModel;
2626 import opennlp.tools.util.BeamSearchContextGenerator;
3737 }
3838
3939 public String[] getContext(int index, String[] sequence,
40 String[] priorDecisions, Object[] additionalContext) {
40 String[] priorDecisions, Object[] additionalContext) {
4141 return new String[] {outcomeSequence[index]};
4242 }
4343 }
6969 for (int i = 0; i < probs.length; i++) {
7070 if (outcomes[i].equals(context[0])) {
7171 probs[i] = bestOutcomeProb;
72 }
73 else {
72 } else {
7473 probs[i] = otherOutcomeProb;
7574 }
7675 }
115114 * Tests that beam search does not fail to detect an empty sequence.
116115 */
117116 @Test
118 public void testBestSequenceZeroLengthInput() {
117 void testBestSequenceZeroLengthInput() {
119118
120119 String[] sequence = new String[0];
121120 BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
127126
128127 Sequence seq = bs.bestSequence(sequence, null, cg,
129128 (int i, String[] inputSequence, String[] outcomesSequence, String outcome) -> true);
130
131 Assert.assertNotNull(seq);
132 Assert.assertEquals(sequence.length, seq.getOutcomes().size());
129
130 Assertions.assertNotNull(seq);
131 Assertions.assertEquals(sequence.length, seq.getOutcomes().size());
133132 }
134133
135134 /**
136135 * Tests finding a sequence of length one.
137136 */
138137 @Test
139 public void testBestSequenceOneElementInput() {
138 void testBestSequenceOneElementInput() {
140139 String[] sequence = {"1"};
141140 BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
142141
147146
148147 Sequence seq = bs.bestSequence(sequence, null, cg,
149148 (int i, String[] inputSequence, String[] outcomesSequence,
150 String outcome) -> true);
151
152 Assert.assertNotNull(seq);
153 Assert.assertEquals(sequence.length, seq.getOutcomes().size());
154 Assert.assertEquals("1", seq.getOutcomes().get(0));
149 String outcome) -> true);
150
151 Assertions.assertNotNull(seq);
152 Assertions.assertEquals(sequence.length, seq.getOutcomes().size());
153 Assertions.assertEquals("1", seq.getOutcomes().get(0));
155154 }
156155
157156 /**
158157 * Tests finding the best sequence on a short input sequence.
159158 */
160159 @Test
161 public void testBestSequence() {
160 void testBestSequence() {
162161 String[] sequence = {"1", "2", "3", "2", "1"};
163162 BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
164163
169168
170169 Sequence seq = bs.bestSequence(sequence, null, cg,
171170 (int i, String[] inputSequence, String[] outcomesSequence,
172 String outcome) -> true);
173
174 Assert.assertNotNull(seq);
175 Assert.assertEquals(sequence.length, seq.getOutcomes().size());
176 Assert.assertEquals("1", seq.getOutcomes().get(0));
177 Assert.assertEquals("2", seq.getOutcomes().get(1));
178 Assert.assertEquals("3", seq.getOutcomes().get(2));
179 Assert.assertEquals("2", seq.getOutcomes().get(3));
180 Assert.assertEquals("1", seq.getOutcomes().get(4));
171 String outcome) -> true);
172
173 Assertions.assertNotNull(seq);
174 Assertions.assertEquals(sequence.length, seq.getOutcomes().size());
175 Assertions.assertEquals("1", seq.getOutcomes().get(0));
176 Assertions.assertEquals("2", seq.getOutcomes().get(1));
177 Assertions.assertEquals("3", seq.getOutcomes().get(2));
178 Assertions.assertEquals("2", seq.getOutcomes().get(3));
179 Assertions.assertEquals("1", seq.getOutcomes().get(4));
181180 }
182181
183182 /**
184183 * Tests finding the best sequence on a short input sequence.
185184 */
186185 @Test
187 public void testBestSequenceWithValidator() {
186 void testBestSequenceWithValidator() {
188187 String[] sequence = {"1", "2", "3", "2", "1"};
189188 BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
190189
196195 Sequence seq = bs.bestSequence(sequence, null, cg,
197196 (int i, String[] inputSequence, String[] outcomesSequence,
198197 String outcome) -> !"2".equals(outcome));
199 Assert.assertNotNull(seq);
200 Assert.assertEquals(sequence.length, seq.getOutcomes().size());
201 Assert.assertEquals("1", seq.getOutcomes().get(0));
202 Assert.assertNotSame("2", seq.getOutcomes().get(1));
203 Assert.assertEquals("3", seq.getOutcomes().get(2));
204 Assert.assertNotSame("2", seq.getOutcomes().get(3));
205 Assert.assertEquals("1", seq.getOutcomes().get(4));
198 Assertions.assertNotNull(seq);
199 Assertions.assertEquals(sequence.length, seq.getOutcomes().size());
200 Assertions.assertEquals("1", seq.getOutcomes().get(0));
201 Assertions.assertNotSame("2", seq.getOutcomes().get(1));
202 Assertions.assertEquals("3", seq.getOutcomes().get(2));
203 Assertions.assertNotSame("2", seq.getOutcomes().get(3));
204 Assertions.assertEquals("1", seq.getOutcomes().get(4));
206205 }
207206 }
1616
1717 package opennlp.tools.ml;
1818
19 import java.io.IOException;
2019 import java.util.Map;
2120
2221 import opennlp.tools.ml.model.DataIndexer;
2726
2827 public class MockEventTrainer implements EventTrainer {
2928
30 public MaxentModel train(ObjectStream<Event> events) throws IOException {
29 public MaxentModel train(ObjectStream<Event> events) {
3130 return null;
3231 }
3332
3433 @Override
35 public MaxentModel train(DataIndexer indexer) throws IOException {
34 public MaxentModel train(DataIndexer indexer) {
3635 return null;
3736 }
3837
1616
1717 package opennlp.tools.ml;
1818
19 import java.io.IOException;
2019 import java.util.Map;
2120
2221 import opennlp.tools.ml.model.AbstractModel;
2524
2625 public class MockSequenceTrainer implements EventModelSequenceTrainer {
2726
28 public AbstractModel train(SequenceStream events) throws IOException {
27 public AbstractModel train(SequenceStream events) {
2928 return null;
3029 }
3130
2424 import java.util.ArrayList;
2525 import java.util.List;
2626
27 import org.junit.Assert;
27 import org.junit.jupiter.api.Assertions;
2828
2929 import opennlp.tools.ml.model.Event;
3030 import opennlp.tools.ml.model.MaxentModel;
8686 double accuracy = correct / (double) total;
8787 System.out.println("Accuracy on PPA devset: (" + correct + "/" + total + ") " + accuracy);
8888
89 Assert.assertEquals(expecedAccuracy, accuracy, .00001);
89 Assertions.assertEquals(expecedAccuracy, accuracy, .00001);
9090 }
9191 }
1616
1717 package opennlp.tools.ml;
1818
19 import org.junit.Assert;
20 import org.junit.Before;
21 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.BeforeEach;
21 import org.junit.jupiter.api.Test;
2222
2323 import opennlp.tools.ml.TrainerFactory.TrainerType;
2424 import opennlp.tools.ml.maxent.GISTrainer;
2929
3030 private TrainingParameters mlParams;
3131
32 @Before
33 public void setup() {
32 @BeforeEach
33 void setup() {
3434 mlParams = new TrainingParameters();
3535 mlParams.put(TrainingParameters.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
3636 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 10);
3838 }
3939
4040 @Test
41 public void testBuiltInValid() {
42 Assert.assertTrue(TrainerFactory.isValid(mlParams));
41 void testBuiltInValid() {
42 Assertions.assertTrue(TrainerFactory.isValid(mlParams));
4343 }
4444
4545 @Test
46 public void testSequenceTrainerValid() {
46 void testSequenceTrainerValid() {
4747 mlParams.put(TrainingParameters.ALGORITHM_PARAM, MockSequenceTrainer.class.getCanonicalName());
48 Assert.assertTrue(TrainerFactory.isValid(mlParams));
48 Assertions.assertTrue(TrainerFactory.isValid(mlParams));
4949 }
5050
5151 @Test
52 public void testEventTrainerValid() {
52 void testEventTrainerValid() {
5353 mlParams.put(TrainingParameters.ALGORITHM_PARAM, MockEventTrainer.class.getCanonicalName());
54 Assert.assertTrue(TrainerFactory.isValid(mlParams));
54 Assertions.assertTrue(TrainerFactory.isValid(mlParams));
5555 }
5656
5757 @Test
58 public void testInvalidTrainer() {
58 void testInvalidTrainer() {
5959 mlParams.put(TrainingParameters.ALGORITHM_PARAM, "xyz");
60 Assert.assertFalse(TrainerFactory.isValid(mlParams));
60 Assertions.assertFalse(TrainerFactory.isValid(mlParams));
6161 }
6262
6363 @Test
64 public void testIsSequenceTrainerTrue() {
64 void testIsSequenceTrainerTrue() {
6565 mlParams.put(AbstractTrainer.ALGORITHM_PARAM,
6666 SimplePerceptronSequenceTrainer.PERCEPTRON_SEQUENCE_VALUE);
6767
6868 TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
6969
70 Assert.assertTrue(TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType));
70 Assertions.assertTrue(TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType));
7171 }
7272
7373 @Test
74 public void testIsSequenceTrainerFalse() {
74 void testIsSequenceTrainerFalse() {
7575 mlParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
7676 TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
77 Assert.assertFalse(TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType));
77 Assertions.assertFalse(TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType));
7878 }
7979
8080 }
2525
2626 public class FootballEventStream implements ObjectStream<Event> {
2727 ObjectStream<String> textStream;
28
28
2929 public FootballEventStream() throws IOException {
3030 textStream = new PlainTextByLineStream(
3131 new URLInputStreamFactory(this.getClass().getResource("/opennlp/tools/ml/maxent/football.dat")),
32 StandardCharsets.US_ASCII );
32 StandardCharsets.US_ASCII);
3333 }
34
34
3535 @Override
3636 public Event read() throws IOException {
3737 String line = textStream.read();
38 if (line == null) return null;
38 if (line == null) {
39 return null;
40 }
3941 String[] tokens = line.split("\\s+");
40
42
4143 return new Event(tokens[tokens.length - 1], Arrays.copyOf(tokens, tokens.length - 1));
4244 }
43
45
4446 @Override
4547 public void reset() throws IOException, UnsupportedOperationException {
4648 textStream.reset();
2222 import java.util.List;
2323 import java.util.Map;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.ml.AbstractEventTrainer;
2929 import opennlp.tools.ml.AbstractTrainer;
4141
4242 public class GISIndexingTest {
4343
44 private static String[][] cntx = new String[][]{
45 {"dog","cat","mouse"},
46 {"text", "print", "mouse"},
47 {"dog", "pig", "cat", "mouse"}
44 private static String[][] cntx = new String[][] {
45 {"dog", "cat", "mouse"},
46 {"text", "print", "mouse"},
47 {"dog", "pig", "cat", "mouse"}
4848 };
49 private static String[] outputs = new String[]{"A","B","A"};
49 private static String[] outputs = new String[] {"A", "B", "A"};
5050
5151 private ObjectStream<Event> createEventStream() {
5252 List<Event> events = new ArrayList<>();
5555 }
5656 return ObjectStreamUtils.createObjectStream(events);
5757 }
58
58
5959 /*
6060 * Test the GIS.trainModel(ObjectStream<Event> eventStream) method
6161 */
6262 @Test
63 public void testGISTrainSignature1() throws IOException {
64 try (ObjectStream<Event> eventStream = createEventStream()) {
65 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
66 params.put(AbstractTrainer.CUTOFF_PARAM, 1);
67
68 EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
69
70 Assert.assertNotNull(trainer.train(eventStream));
63 void testGISTrainSignature1() throws IOException {
64 try (ObjectStream<Event> eventStream = createEventStream()) {
65 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
66 params.put(AbstractTrainer.CUTOFF_PARAM, 1);
67
68 EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
69
70 Assertions.assertNotNull(trainer.train(eventStream));
7171 }
7272 }
7373
7575 * Test the GIS.trainModel(ObjectStream<Event> eventStream,boolean smoothing) method
7676 */
7777 @Test
78 public void testGISTrainSignature2() throws IOException {
78 void testGISTrainSignature2() throws IOException {
7979 try (ObjectStream<Event> eventStream = createEventStream()) {
8080 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
8181 params.put(AbstractTrainer.CUTOFF_PARAM, 1);
8282 params.put("smoothing", true);
8383 EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
8484
85 Assert.assertNotNull(trainer.train(eventStream));
86 }
87 }
88
85 Assertions.assertNotNull(trainer.train(eventStream));
86 }
87 }
88
8989 /*
9090 * Test the GIS.trainModel(ObjectStream<Event> eventStream, int iterations, int cutoff) method
9191 */
9292 @Test
93 public void testGISTrainSignature3() throws IOException {
93 void testGISTrainSignature3() throws IOException {
9494 try (ObjectStream<Event> eventStream = createEventStream()) {
9595 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
9696
9999
100100 EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
101101
102 Assert.assertNotNull(trainer.train(eventStream));
103 }
104 }
105
102 Assertions.assertNotNull(trainer.train(eventStream));
103 }
104 }
105
106106 /*
107107 * Test the GIS.trainModel(ObjectStream<Event> eventStream, int iterations, int cutoff, double sigma) method
108108 */
109109 @Test
110 public void testGISTrainSignature4() throws IOException {
110 void testGISTrainSignature4() throws IOException {
111111 try (ObjectStream<Event> eventStream = createEventStream()) {
112112 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
113113 params.put(AbstractTrainer.ITERATIONS_PARAM, 10);
115115 GISTrainer trainer = (GISTrainer) TrainerFactory.getEventTrainer(params, null);
116116 trainer.setGaussianSigma(0.01);
117117
118 Assert.assertNotNull(trainer.trainModel(eventStream));
119 }
120 }
121
122 /*
123 * Test the GIS.trainModel((ObjectStream<Event> eventStream, int iterations, int cutoff,
118 Assertions.assertNotNull(trainer.trainModel(eventStream));
119 }
120 }
121
122 /*
123 * Test the GIS.trainModel((ObjectStream<Event> eventStream, int iterations, int cutoff,
124124 * boolean smoothing, boolean printMessagesWhileTraining)) method
125125 */
126126 @Test
127 public void testGISTrainSignature5() throws IOException {
127 void testGISTrainSignature5() throws IOException {
128128 try (ObjectStream<Event> eventStream = createEventStream()) {
129129 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
130130
134134 params.put(AbstractTrainer.VERBOSE_PARAM, false);
135135
136136 EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
137 Assert.assertNotNull(trainer.train(eventStream));
138 }
139 }
140
141 @Test
142 public void testIndexingWithTrainingParameters() throws IOException {
137 Assertions.assertNotNull(trainer.train(eventStream));
138 }
139 }
140
141 @Test
142 void testIndexingWithTrainingParameters() throws IOException {
143143 ObjectStream<Event> eventStream = createEventStream();
144
144
145145 TrainingParameters parameters = TrainingParameters.defaultParams();
146146 // by default we are using GIS/EventTrainer/Cutoff of 5/100 iterations
147147 parameters.put(TrainingParameters.ITERATIONS_PARAM, 10);
153153 // guarantee that you have a GIS trainer...
154154 EventTrainer trainer =
155155 TrainerFactory.getEventTrainer(parameters, new HashMap<>());
156 Assert.assertEquals("opennlp.tools.ml.maxent.GISTrainer", trainer.getClass().getName());
157 AbstractEventTrainer aeTrainer = (AbstractEventTrainer)trainer;
156 Assertions.assertEquals("opennlp.tools.ml.maxent.GISTrainer", trainer.getClass().getName());
157 AbstractEventTrainer aeTrainer = (AbstractEventTrainer) trainer;
158158 // guarantee that you have a OnePassDataIndexer ...
159159 DataIndexer di = aeTrainer.getDataIndexer(eventStream);
160 Assert.assertEquals("opennlp.tools.ml.model.OnePassDataIndexer", di.getClass().getName());
161 Assert.assertEquals(3, di.getNumEvents());
162 Assert.assertEquals(2, di.getOutcomeLabels().length);
163 Assert.assertEquals(6, di.getPredLabels().length);
160 Assertions.assertEquals("opennlp.tools.ml.model.OnePassDataIndexer", di.getClass().getName());
161 Assertions.assertEquals(3, di.getNumEvents());
162 Assertions.assertEquals(2, di.getOutcomeLabels().length);
163 Assertions.assertEquals(6, di.getPredLabels().length);
164164
165165 // change the parameters and try again...
166166
167167 eventStream.reset();
168
168
169169 parameters.put(TrainingParameters.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
170170 parameters.put(AbstractEventTrainer.DATA_INDEXER_PARAM, AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE);
171171 parameters.put(AbstractEventTrainer.CUTOFF_PARAM, 2);
172
172
173173 trainer = TrainerFactory.getEventTrainer(parameters, new HashMap<>());
174 Assert.assertEquals("opennlp.tools.ml.maxent.quasinewton.QNTrainer", trainer.getClass().getName());
175 aeTrainer = (AbstractEventTrainer)trainer;
174 Assertions.assertEquals("opennlp.tools.ml.maxent.quasinewton.QNTrainer", trainer.getClass().getName());
175 aeTrainer = (AbstractEventTrainer) trainer;
176176 di = aeTrainer.getDataIndexer(eventStream);
177 Assert.assertEquals("opennlp.tools.ml.model.TwoPassDataIndexer", di.getClass().getName());
178
177 Assertions.assertEquals("opennlp.tools.ml.model.TwoPassDataIndexer", di.getClass().getName());
178
179179 eventStream.close();
180180 }
181
182 @Test
183 public void testIndexingFactory() throws IOException {
184 Map<String,String> myReportMap = new HashMap<>();
181
182 @Test
183 void testIndexingFactory() throws IOException {
184 Map<String, String> myReportMap = new HashMap<>();
185185 ObjectStream<Event> eventStream = createEventStream();
186186
187187 // set the cutoff to 1 for this test.
188188 TrainingParameters parameters = new TrainingParameters();
189189 parameters.put(AbstractDataIndexer.CUTOFF_PARAM, 1);
190
190
191191 // test with a 1 pass data indexer...
192192 parameters.put(AbstractEventTrainer.DATA_INDEXER_PARAM, AbstractEventTrainer.DATA_INDEXER_ONE_PASS_VALUE);
193193 DataIndexer di = DataIndexerFactory.getDataIndexer(parameters, myReportMap);
194 Assert.assertEquals("opennlp.tools.ml.model.OnePassDataIndexer", di.getClass().getName());
194 Assertions.assertEquals("opennlp.tools.ml.model.OnePassDataIndexer", di.getClass().getName());
195195 di.index(eventStream);
196 Assert.assertEquals(3, di.getNumEvents());
197 Assert.assertEquals(2, di.getOutcomeLabels().length);
198 Assert.assertEquals(6, di.getPredLabels().length);
196 Assertions.assertEquals(3, di.getNumEvents());
197 Assertions.assertEquals(2, di.getOutcomeLabels().length);
198 Assertions.assertEquals(6, di.getPredLabels().length);
199199
200200 eventStream.reset();
201
201
202202 // test with a 2-pass data indexer...
203203 parameters.put(AbstractEventTrainer.DATA_INDEXER_PARAM, AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE);
204204 di = DataIndexerFactory.getDataIndexer(parameters, myReportMap);
205 Assert.assertEquals("opennlp.tools.ml.model.TwoPassDataIndexer", di.getClass().getName());
205 Assertions.assertEquals("opennlp.tools.ml.model.TwoPassDataIndexer", di.getClass().getName());
206206 di.index(eventStream);
207 Assert.assertEquals(3, di.getNumEvents());
208 Assert.assertEquals(2, di.getOutcomeLabels().length);
209 Assert.assertEquals(6, di.getPredLabels().length);
207 Assertions.assertEquals(3, di.getNumEvents());
208 Assertions.assertEquals(2, di.getOutcomeLabels().length);
209 Assertions.assertEquals(6, di.getPredLabels().length);
210210
211211 // the rest of the test doesn't actually index, so we can close the eventstream.
212212 eventStream.close();
213
213
214214 // test with a 1-pass Real value dataIndexer
215 parameters.put(AbstractEventTrainer.DATA_INDEXER_PARAM,
215 parameters.put(AbstractEventTrainer.DATA_INDEXER_PARAM,
216216 AbstractEventTrainer.DATA_INDEXER_ONE_PASS_REAL_VALUE);
217217 di = DataIndexerFactory.getDataIndexer(parameters, myReportMap);
218 Assert.assertEquals("opennlp.tools.ml.model.OnePassRealValueDataIndexer", di.getClass().getName());
219
220
218 Assertions.assertEquals("opennlp.tools.ml.model.OnePassRealValueDataIndexer", di.getClass().getName());
219
220
221221 // test with an UNRegistered MockIndexer
222 parameters.put(AbstractEventTrainer.DATA_INDEXER_PARAM, "opennlp.tools.ml.maxent.MockDataIndexer");
222 parameters.put(AbstractEventTrainer.DATA_INDEXER_PARAM, "opennlp.tools.ml.maxent.MockDataIndexer");
223223 di = DataIndexerFactory.getDataIndexer(parameters, myReportMap);
224 Assert.assertEquals("opennlp.tools.ml.maxent.MockDataIndexer", di.getClass().getName());
224 Assertions.assertEquals("opennlp.tools.ml.maxent.MockDataIndexer", di.getClass().getName());
225225 }
226226 }
1919 import java.util.HashMap;
2020 import java.util.Map;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.ml.EventTrainer;
2626 import opennlp.tools.ml.TrainerFactory;
3434
3535 @SuppressWarnings("unchecked")
3636 @Test
37 public void testGaussianSmoothing() throws Exception {
38
37 void testGaussianSmoothing() throws Exception {
38
3939 TrainingParameters params = new TrainingParameters();
4040 params.put("Algorithm", "MAXENT");
4141 params.put("DataIndexer", "OnePass");
4545
4646 Map<String, String> reportMap = new HashMap<>();
4747 EventTrainer trainer = TrainerFactory.getEventTrainer(params, reportMap);
48
48
4949 ObjectStream<Event> eventStream = new FootballEventStream();
50 AbstractModel smoothedModel = (AbstractModel)trainer.train(eventStream);
51 Map<String, Context> predMap = (Map<String, Context>)smoothedModel.getDataStructures()[1];
50 AbstractModel smoothedModel = (AbstractModel) trainer.train(eventStream);
51 Map<String, Context> predMap = (Map<String, Context>) smoothedModel.getDataStructures()[1];
5252
53 double[] nevilleFalseExpected = new double[] {-0.17,.10,0.05};
54 double[] nevilleTrueExpected = new double[] {0.080,-0.047,-0.080};
53 double[] nevilleFalseExpected = new double[] {-0.17, .10, 0.05};
54 double[] nevilleTrueExpected = new double[] {0.080, -0.047, -0.080};
5555
5656 String predicateToTest = "Neville=false";
57 Assert.assertArrayEquals(nevilleFalseExpected, predMap.get(predicateToTest).getParameters(), 0.01);
57 Assertions.assertArrayEquals(nevilleFalseExpected, predMap.get(predicateToTest).getParameters(), 0.01);
5858 predicateToTest = "Neville=true";
59 Assert.assertArrayEquals(nevilleTrueExpected, predMap.get(predicateToTest).getParameters(), 0.001);
60
59 Assertions.assertArrayEquals(nevilleTrueExpected, predMap.get(predicateToTest).getParameters(), 0.001);
60
6161 eventStream.reset();
6262 params.put("GaussianSmoothing", false);
6363 trainer = TrainerFactory.getEventTrainer(params, reportMap);
64 AbstractModel unsmoothedModel = (AbstractModel)trainer.train(eventStream);
65 predMap = (Map<String, Context>)unsmoothedModel.getDataStructures()[1];
66
67 nevilleFalseExpected = new double[] {-0.19,0.11,0.06};
68 nevilleTrueExpected = new double[] {0.081,-0.050,-0.084};
64 AbstractModel unsmoothedModel = (AbstractModel) trainer.train(eventStream);
65 predMap = (Map<String, Context>) unsmoothedModel.getDataStructures()[1];
66
67 nevilleFalseExpected = new double[] {-0.19, 0.11, 0.06};
68 nevilleTrueExpected = new double[] {0.081, -0.050, -0.084};
6969
7070 predicateToTest = "Neville=false";
71 Assert.assertArrayEquals(nevilleFalseExpected, predMap.get(predicateToTest).getParameters(), 0.01);
71 Assertions.assertArrayEquals(nevilleFalseExpected, predMap.get(predicateToTest).getParameters(), 0.01);
7272 predicateToTest = "Neville=true";
73 Assert.assertArrayEquals(nevilleTrueExpected, predMap.get(predicateToTest).getParameters(), 0.001);
73 Assertions.assertArrayEquals(nevilleTrueExpected, predMap.get(predicateToTest).getParameters(), 0.001);
7474
7575 eventStream.close();
7676 }
77
77
7878 }
1919 import java.io.IOException;
2020 import java.util.HashMap;
2121
22 import org.junit.Before;
23 import org.junit.Test;
22 import org.junit.jupiter.api.BeforeEach;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.ml.AbstractEventTrainer;
2626 import opennlp.tools.ml.AbstractTrainer;
3838 public class MaxentPrepAttachTest {
3939
4040 private DataIndexer testDataIndexer;
41 @Before
42 public void initIndexer() {
41
42 @BeforeEach
43 void initIndexer() {
4344 TrainingParameters trainingParameters = new TrainingParameters();
4445 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
4546 trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);
4647 testDataIndexer = new TwoPassDataIndexer();
4748 testDataIndexer.init(trainingParameters, new HashMap<>());
4849 }
49
50
5051 @Test
51 public void testMaxentOnPrepAttachData() throws IOException {
52 void testMaxentOnPrepAttachData() throws IOException {
5253 testDataIndexer.index(PrepAttachDataUtil.createTrainingStream());
5354 // this shows why the GISTrainer should be a AbstractEventTrainer.
5455 // TODO: make sure that the trainingParameter cutoff and the
5556 // cutoff value passed here are equal.
5657 AbstractModel model =
5758 new GISTrainer(true).trainModel(100,
58 testDataIndexer,
59 new UniformPrior(), 1);
59 testDataIndexer,
60 new UniformPrior(), 1);
6061 PrepAttachDataUtil.testModel(model, 0.7997028967566229);
6162 }
6263
6364 @Test
64 public void testMaxentOnPrepAttachData2Threads() throws IOException {
65 void testMaxentOnPrepAttachData2Threads() throws IOException {
6566 testDataIndexer.index(PrepAttachDataUtil.createTrainingStream());
6667 AbstractModel model =
6768 new GISTrainer(true).trainModel(100,
7172 }
7273
7374 @Test
74 public void testMaxentOnPrepAttachDataWithParams() throws IOException {
75 void testMaxentOnPrepAttachDataWithParams() throws IOException {
7576
7677 TrainingParameters trainParams = new TrainingParameters();
7778 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
8687 }
8788
8889 @Test
89 public void testMaxentOnPrepAttachDataWithParamsDefault() throws IOException {
90 void testMaxentOnPrepAttachDataWithParamsDefault() throws IOException {
9091
9192 TrainingParameters trainParams = new TrainingParameters();
9293 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
9495 EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null);
9596 MaxentModel model = trainer.train(PrepAttachDataUtil.createTrainingStream());
9697
97 PrepAttachDataUtil.testModel(model, 0.8086159940579352 );
98 PrepAttachDataUtil.testModel(model, 0.8086159940579352);
9899 }
99
100
100101 @Test
101 public void testMaxentOnPrepAttachDataWithParamsLLThreshold() throws IOException {
102 void testMaxentOnPrepAttachDataWithParamsLLThreshold() throws IOException {
102103 TrainingParameters trainParams = new TrainingParameters();
103104 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
104105 trainParams.put(GISTrainer.LOG_LIKELIHOOD_THRESHOLD_PARAM, 5.);
106107 EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null);
107108 MaxentModel model = trainer.train(PrepAttachDataUtil.createTrainingStream());
108109
109 PrepAttachDataUtil.testModel(model, 0.8103490963109681 );
110 PrepAttachDataUtil.testModel(model, 0.8103490963109681);
110111 }
111112 }
1616
1717 package opennlp.tools.ml.maxent;
1818
19 import java.io.IOException;
2019 import java.util.Map;
2120
2221 import opennlp.tools.ml.model.DataIndexer;
6968
7069 @Override
7170 public void init(TrainingParameters trainParams,
72 Map<String, String> reportMap) {
71 Map<String, String> reportMap) {
7372 }
7473
7574 @Override
76 public void index(ObjectStream<Event> eventStream) throws IOException {
75 public void index(ObjectStream<Event> eventStream) {
7776 }
7877
7978 }
1919 import java.io.IOException;
2020 import java.util.HashMap;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.ml.AbstractTrainer;
2727 import opennlp.tools.ml.model.DataIndexer;
3434 public class RealValueModelTest {
3535
3636 private DataIndexer testDataIndexer;
37 @Before
38 public void initIndexer() {
37
38 @BeforeEach
39 void initIndexer() {
3940 TrainingParameters trainingParameters = new TrainingParameters();
4041 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
4142 testDataIndexer = new OnePassRealValueDataIndexer();
4243 testDataIndexer.init(trainingParameters, new HashMap<>());
4344 }
44
45
4546 @Test
46 public void testRealValuedWeightsVsRepeatWeighting() throws IOException {
47 void testRealValuedWeightsVsRepeatWeighting() throws IOException {
4748 GISModel realModel;
4849 GISTrainer gisTrainer = new GISTrainer();
4950 try (RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
5657 try (FileEventStream rvfes2 = new FileEventStream(
5758 "src/test/resources/data/opennlp/maxent/repeat-weighting-training-data.txt")) {
5859 testDataIndexer.index(rvfes2);
59 repeatModel = gisTrainer.trainModel(100,testDataIndexer);
60 repeatModel = gisTrainer.trainModel(100, testDataIndexer);
6061 }
6162
62 String[] features2Classify = new String[] {"feature2","feature5"};
63 String[] features2Classify = new String[] {"feature2", "feature5"};
6364 double[] realResults = realModel.eval(features2Classify);
6465 double[] repeatResults = repeatModel.eval(features2Classify);
6566
66 Assert.assertEquals(realResults.length, repeatResults.length);
67 Assertions.assertEquals(realResults.length, repeatResults.length);
6768 for (int i = 0; i < realResults.length; i++) {
6869 System.out.println(String.format("classifiy with realModel: %1$s = %2$f",
6970 realModel.getOutcome(i), realResults[i]));
7071 System.out.println(String.format("classifiy with repeatModel: %1$s = %2$f",
7172 repeatModel.getOutcome(i), repeatResults[i]));
72 Assert.assertEquals(realResults[i], repeatResults[i], 0.01f);
73 Assertions.assertEquals(repeatResults[i], realResults[i], 0.01f);
7374 }
7475
75 features2Classify = new String[] {"feature1","feature2","feature3","feature4","feature5"};
76 features2Classify = new String[] {"feature1", "feature2", "feature3", "feature4", "feature5"};
7677 realResults = realModel.eval(features2Classify, new float[] {5.5f, 6.1f, 9.1f, 4.0f, 1.8f});
7778 repeatResults = repeatModel.eval(features2Classify, new float[] {5.5f, 6.1f, 9.1f, 4.0f, 1.8f});
7879
7980 System.out.println();
80 Assert.assertEquals(realResults.length, repeatResults.length);
81 Assertions.assertEquals(realResults.length, repeatResults.length);
8182 for (int i = 0; i < realResults.length; i++) {
8283 System.out.println(String.format("classifiy with realModel: %1$s = %2$f",
8384 realModel.getOutcome(i), realResults[i]));
8485 System.out.println(String.format("classifiy with repeatModel: %1$s = %2$f",
8586 repeatModel.getOutcome(i), repeatResults[i]));
86 Assert.assertEquals(realResults[i], repeatResults[i], 0.01f);
87 Assertions.assertEquals(repeatResults[i], realResults[i],0.01f);
8788 }
8889
8990 }
1919 import java.nio.charset.StandardCharsets;
2020 import java.util.HashMap;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.ml.AbstractTrainer;
2727 import opennlp.tools.ml.EventTrainer;
4141
4242 private DataIndexer testDataIndexer;
4343
44 @Before
45 public void initIndexer() {
44 @BeforeEach
45 void initIndexer() {
4646 TrainingParameters trainingParameters = new TrainingParameters();
4747 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 0);
4848 testDataIndexer = new OnePassRealValueDataIndexer();
5656 * we use (0.1,0.2) and (10,20) there is a difference.
5757 */
5858 @Test
59 public void testScaleResults() throws Exception {
59 void testScaleResults() throws Exception {
6060 String smallValues = "predA=0.1 predB=0.2 A\n" + "predB=0.3 predA=0.1 B\n";
6161
6262 String smallTest = "predA=0.2 predB=0.2";
9797 String largeResultString = largeModel.getAllOutcomes(largeResults);
9898 System.out.println("largeResults: " + largeResultString);
9999
100 Assert.assertEquals(smallResults.length, largeResults.length);
100 Assertions.assertEquals(smallResults.length, largeResults.length);
101101 for (int i = 0; i < smallResults.length; i++) {
102102 System.out.println(String.format(
103103 "classifiy with smallModel: %1$s = %2$f", smallModel.getOutcome(i),
105105 System.out.println(String.format(
106106 "classifiy with largeModel: %1$s = %2$f", largeModel.getOutcome(i),
107107 largeResults[i]));
108 Assert.assertEquals(smallResults[i], largeResults[i], 0.01f);
108 Assertions.assertEquals(largeResults[i], smallResults[i], 0.01f);
109109 }
110110 }
111111 }
2626
2727
2828 private URL url;
29
2930 public URLInputStreamFactory(URL url) {
3031 this.url = url;
3132 }
32
33
3334 @Override
3435 public InputStream createInputStream() throws IOException {
3536 return url.openStream();
1919 import java.io.IOException;
2020 import java.util.HashMap;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.ml.AbstractTrainer;
2727 import opennlp.tools.ml.model.DataIndexer;
3333
3434 private DataIndexer indexer;
3535
36 @Before
37 public void initIndexer() {
36 @BeforeEach
37 void initIndexer() {
3838 TrainingParameters trainingParameters = new TrainingParameters();
3939 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
4040 indexer = new OnePassRealValueDataIndexer();
4242 }
4343
4444 @Test
45 public void testLastLineBug() throws IOException {
45 void testLastLineBug() throws IOException {
4646 try (RealValueFileEventStream rvfes = new RealValueFileEventStream(
4747 "src/test/resources/data/opennlp/maxent/io/rvfes-bug-data-ok.txt")) {
4848 indexer.index(rvfes);
4949 }
50 Assert.assertEquals(1, indexer.getOutcomeLabels().length);
50 Assertions.assertEquals(1, indexer.getOutcomeLabels().length);
5151
5252 try (RealValueFileEventStream rvfes = new RealValueFileEventStream(
5353 "src/test/resources/data/opennlp/maxent/io/rvfes-bug-data-broken.txt")) {
5454 indexer.index(rvfes);
5555 }
56 Assert.assertEquals(1, indexer.getOutcomeLabels().length);
56 Assertions.assertEquals(1, indexer.getOutcomeLabels().length);
5757 }
5858 }
1616
1717 package opennlp.tools.ml.maxent.quasinewton;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 import opennlp.tools.ml.maxent.quasinewton.LineSearch.LineSearchResult;
2323
2525 private static final double TOLERANCE = 0.01;
2626
2727 @Test
28 public void testLineSearchDeterminesSaneStepLength1() {
28 void testLineSearchDeterminesSaneStepLength1() {
2929 Function objectiveFunction = new QuadraticFunction1();
3030 // given
31 double[] testX = new double[] { 0 };
32 double testValueX = objectiveFunction.valueAt(testX);
33 double[] testGradX = objectiveFunction.gradientAt(testX);
34 double[] testDirection = new double[] { 1 };
35 // when
36 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
37 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
38 double stepSize = lsr.getStepSize();
39 // then
40 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
41 Assert.assertTrue(succCond);
42 }
43
44 @Test
45 public void testLineSearchDeterminesSaneStepLength2() {
46 Function objectiveFunction = new QuadraticFunction2();
47 // given
48 double[] testX = new double[] { -2 };
49 double testValueX = objectiveFunction.valueAt(testX);
50 double[] testGradX = objectiveFunction.gradientAt(testX);
51 double[] testDirection = new double[] { 1 };
52 // when
53 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
54 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
55 double stepSize = lsr.getStepSize();
56 // then
57 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
58 Assert.assertTrue(succCond);
59 }
60
61 @Test
62 public void testLineSearchFailsWithWrongDirection1() {
63 Function objectiveFunction = new QuadraticFunction1();
64 // given
65 double[] testX = new double[] { 0 };
66 double testValueX = objectiveFunction.valueAt(testX);
67 double[] testGradX = objectiveFunction.gradientAt(testX);
68 double[] testDirection = new double[] { -1 };
69 // when
70 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
71 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
72 double stepSize = lsr.getStepSize();
73 // then
74 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
75 Assert.assertFalse(succCond);
76 Assert.assertEquals(0.0, stepSize, TOLERANCE);
77 }
78
79 @Test
80 public void testLineSearchFailsWithWrongDirection2() {
81 Function objectiveFunction = new QuadraticFunction2();
82 // given
83 double[] testX = new double[] { -2 };
84 double testValueX = objectiveFunction.valueAt(testX);
85 double[] testGradX = objectiveFunction.gradientAt(testX);
86 double[] testDirection = new double[] { -1 };
87 // when
88 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
89 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
90 double stepSize = lsr.getStepSize();
91 // then
92 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
93 Assert.assertFalse(succCond);
94 Assert.assertEquals(0.0, stepSize, TOLERANCE);
95 }
96
97 @Test
98 public void testLineSearchFailsWithWrongDirection3() {
31 double[] testX = new double[] {0};
32 double testValueX = objectiveFunction.valueAt(testX);
33 double[] testGradX = objectiveFunction.gradientAt(testX);
34 double[] testDirection = new double[] {1};
35 // when
36 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
37 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
38 double stepSize = lsr.getStepSize();
39 // then
40 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
41 Assertions.assertTrue(succCond);
42 }
43
44 @Test
45 void testLineSearchDeterminesSaneStepLength2() {
46 Function objectiveFunction = new QuadraticFunction2();
47 // given
48 double[] testX = new double[] {-2};
49 double testValueX = objectiveFunction.valueAt(testX);
50 double[] testGradX = objectiveFunction.gradientAt(testX);
51 double[] testDirection = new double[] {1};
52 // when
53 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
54 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
55 double stepSize = lsr.getStepSize();
56 // then
57 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
58 Assertions.assertTrue(succCond);
59 }
60
61 @Test
62 void testLineSearchFailsWithWrongDirection1() {
9963 Function objectiveFunction = new QuadraticFunction1();
10064 // given
101 double[] testX = new double[] { 4 };
102 double testValueX = objectiveFunction.valueAt(testX);
103 double[] testGradX = objectiveFunction.gradientAt(testX);
104 double[] testDirection = new double[] { 1 };
105 // when
106 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
107 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
108 double stepSize = lsr.getStepSize();
109 // then
110 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
111 Assert.assertFalse(succCond);
112 Assert.assertEquals(0.0, stepSize, TOLERANCE);
113 }
114
115 @Test
116 public void testLineSearchFailsWithWrongDirection4() {
117 Function objectiveFunction = new QuadraticFunction2();
118 // given
119 double[] testX = new double[] { 2 };
120 double testValueX = objectiveFunction.valueAt(testX);
121 double[] testGradX = objectiveFunction.gradientAt(testX);
122 double[] testDirection = new double[] { 1 };
123 // when
124 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
125 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
126 double stepSize = lsr.getStepSize();
127 // then
128 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
129 Assert.assertFalse(succCond);
130 Assert.assertEquals(0.0, stepSize, TOLERANCE);
131 }
132
133 @Test
134 public void testLineSearchFailsAtMinimum1() {
135 Function objectiveFunction = new QuadraticFunction2();
136 // given
137 double[] testX = new double[] { 0 };
138 double testValueX = objectiveFunction.valueAt(testX);
139 double[] testGradX = objectiveFunction.gradientAt(testX);
140 double[] testDirection = new double[] { -1 };
141 // when
142 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
143 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
144 double stepSize = lsr.getStepSize();
145 // then
146 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
147 Assert.assertFalse(succCond);
148 Assert.assertEquals(0.0, stepSize, TOLERANCE);
149 }
150
151 @Test
152 public void testLineSearchFailsAtMinimum2() {
153 Function objectiveFunction = new QuadraticFunction2();
154 // given
155 double[] testX = new double[] { 0 };
156 double testValueX = objectiveFunction.valueAt(testX);
157 double[] testGradX = objectiveFunction.gradientAt(testX);
158 double[] testDirection = new double[] { 1 };
159 // when
160 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
161 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
162 double stepSize = lsr.getStepSize();
163 // then
164 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
165 Assert.assertFalse(succCond);
166 Assert.assertEquals(0.0, stepSize, TOLERANCE);
65 double[] testX = new double[] {0};
66 double testValueX = objectiveFunction.valueAt(testX);
67 double[] testGradX = objectiveFunction.gradientAt(testX);
68 double[] testDirection = new double[] {-1};
69 // when
70 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
71 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
72 double stepSize = lsr.getStepSize();
73 // then
74 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
75 Assertions.assertFalse(succCond);
76 Assertions.assertEquals(0.0, stepSize, TOLERANCE);
77 }
78
79 @Test
80 void testLineSearchFailsWithWrongDirection2() {
81 Function objectiveFunction = new QuadraticFunction2();
82 // given
83 double[] testX = new double[] {-2};
84 double testValueX = objectiveFunction.valueAt(testX);
85 double[] testGradX = objectiveFunction.gradientAt(testX);
86 double[] testDirection = new double[] {-1};
87 // when
88 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
89 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
90 double stepSize = lsr.getStepSize();
91 // then
92 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
93 Assertions.assertFalse(succCond);
94 Assertions.assertEquals(0.0, stepSize, TOLERANCE);
95 }
96
97 @Test
98 void testLineSearchFailsWithWrongDirection3() {
99 Function objectiveFunction = new QuadraticFunction1();
100 // given
101 double[] testX = new double[] {4};
102 double testValueX = objectiveFunction.valueAt(testX);
103 double[] testGradX = objectiveFunction.gradientAt(testX);
104 double[] testDirection = new double[] {1};
105 // when
106 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
107 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
108 double stepSize = lsr.getStepSize();
109 // then
110 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
111 Assertions.assertFalse(succCond);
112 Assertions.assertEquals(0.0, stepSize, TOLERANCE);
113 }
114
115 @Test
116 void testLineSearchFailsWithWrongDirection4() {
117 Function objectiveFunction = new QuadraticFunction2();
118 // given
119 double[] testX = new double[] {2};
120 double testValueX = objectiveFunction.valueAt(testX);
121 double[] testGradX = objectiveFunction.gradientAt(testX);
122 double[] testDirection = new double[] {1};
123 // when
124 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
125 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
126 double stepSize = lsr.getStepSize();
127 // then
128 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
129 Assertions.assertFalse(succCond);
130 Assertions.assertEquals(0.0, stepSize, TOLERANCE);
131 }
132
133 @Test
134 void testLineSearchFailsAtMinimum1() {
135 Function objectiveFunction = new QuadraticFunction2();
136 // given
137 double[] testX = new double[] {0};
138 double testValueX = objectiveFunction.valueAt(testX);
139 double[] testGradX = objectiveFunction.gradientAt(testX);
140 double[] testDirection = new double[] {-1};
141 // when
142 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
143 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
144 double stepSize = lsr.getStepSize();
145 // then
146 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
147 Assertions.assertFalse(succCond);
148 Assertions.assertEquals(0.0, stepSize, TOLERANCE);
149 }
150
151 @Test
152 void testLineSearchFailsAtMinimum2() {
153 Function objectiveFunction = new QuadraticFunction2();
154 // given
155 double[] testX = new double[] {0};
156 double testValueX = objectiveFunction.valueAt(testX);
157 double[] testGradX = objectiveFunction.gradientAt(testX);
158 double[] testDirection = new double[] {1};
159 // when
160 LineSearchResult lsr = LineSearchResult.getInitialObject(testValueX, testGradX, testX);
161 LineSearch.doLineSearch(objectiveFunction, testDirection, lsr, 1.0);
162 double stepSize = lsr.getStepSize();
163 // then
164 boolean succCond = TOLERANCE < stepSize && stepSize <= 1;
165 Assertions.assertFalse(succCond);
166 Assertions.assertEquals(0.0, stepSize, TOLERANCE);
167167 }
168168
169169 /**
2222 import java.util.HashMap;
2323 import java.util.Map;
2424
25 import org.junit.Assert;
26 import org.junit.Before;
27 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.BeforeEach;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.tools.ml.AbstractTrainer;
3030 import opennlp.tools.ml.model.DataIndexer;
3838
3939 private DataIndexer testDataIndexer;
4040
41 @Before
42 public void initIndexer() {
41 @BeforeEach
42 void initIndexer() {
4343 TrainingParameters trainingParameters = new TrainingParameters();
4444 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
4545 testDataIndexer = new OnePassRealValueDataIndexer();
4747 }
4848
4949 @Test
50 public void testDomainDimensionSanity() throws IOException {
51 // given
52 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
53 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
54 StandardCharsets.UTF_8.name());
50 void testDomainDimensionSanity() throws IOException {
51 // given
52 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
53 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
54 StandardCharsets.UTF_8.name());
5555 testDataIndexer.index(rvfes1);
5656 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
5757 // when
5858 int correctDomainDimension = testDataIndexer.getPredLabels().length
5959 * testDataIndexer.getOutcomeLabels().length;
6060 // then
61 Assert.assertEquals(correctDomainDimension, objectFunction.getDimension());
62 }
63
64 @Test
65 public void testInitialSanity() throws IOException {
66 // given
67 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
68 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
69 StandardCharsets.UTF_8.name());
61 Assertions.assertEquals(correctDomainDimension, objectFunction.getDimension());
62 }
63
64 @Test
65 void testInitialSanity() throws IOException {
66 // given
67 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
68 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
69 StandardCharsets.UTF_8.name());
7070 testDataIndexer.index(rvfes1);
7171 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
7272 // when
7373 double[] initial = objectFunction.getInitialPoint();
7474 // then
7575 for (double anInitial : initial) {
76 Assert.assertEquals(0.0, anInitial, TOLERANCE01);
77 }
78 }
79
80 @Test
81 public void testGradientSanity() throws IOException {
82 // given
83 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
84 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
85 StandardCharsets.UTF_8.name());
76 Assertions.assertEquals(0.0, anInitial, TOLERANCE01);
77 }
78 }
79
80 @Test
81 void testGradientSanity() throws IOException {
82 // given
83 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
84 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
85 StandardCharsets.UTF_8.name());
8686 testDataIndexer.index(rvfes1);
8787 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
8888 // when
8989 double[] initial = objectFunction.getInitialPoint();
9090 double[] gradientAtInitial = objectFunction.gradientAt(initial);
9191 // then
92 Assert.assertNotNull(gradientAtInitial);
93 }
94
95 @Test
96 public void testValueAtInitialPoint() throws IOException {
92 Assertions.assertNotNull(gradientAtInitial);
93 }
94
95 @Test
96 void testValueAtInitialPoint() throws IOException {
9797 // given
9898 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
9999 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt", "UTF-8");
103103 double value = objectFunction.valueAt(objectFunction.getInitialPoint());
104104 double expectedValue = 13.86294361;
105105 // then
106 Assert.assertEquals(expectedValue, value, TOLERANCE01);
107 }
108
109 @Test
110 public void testValueAtNonInitialPoint01() throws IOException {
111 // given
112 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
113 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
114 StandardCharsets.UTF_8.name());
115 testDataIndexer.index(rvfes1);
116 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
117 // when
118 double[] nonInitialPoint = new double[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
106 Assertions.assertEquals(expectedValue, value, TOLERANCE01);
107 }
108
109 @Test
110 void testValueAtNonInitialPoint01() throws IOException {
111 // given
112 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
113 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
114 StandardCharsets.UTF_8.name());
115 testDataIndexer.index(rvfes1);
116 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
117 // when
118 double[] nonInitialPoint = new double[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
119119 double value = objectFunction.valueAt(nonInitialPoint);
120120 double expectedValue = 13.862943611198894;
121121 // then
122 Assert.assertEquals(expectedValue, value, TOLERANCE01);
123 }
124
125 @Test
126 public void testValueAtNonInitialPoint02() throws IOException {
127 // given
128 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
129 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
130 StandardCharsets.UTF_8.name());
131 testDataIndexer.index(rvfes1);
132 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
133 // when
134 double[] nonInitialPoint = new double[] { 3, 2, 3, 2, 3, 2, 3, 2, 3, 2 };
122 Assertions.assertEquals(expectedValue, value, TOLERANCE01);
123 }
124
125 @Test
126 void testValueAtNonInitialPoint02() throws IOException {
127 // given
128 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
129 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
130 StandardCharsets.UTF_8.name());
131 testDataIndexer.index(rvfes1);
132 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
133 // when
134 double[] nonInitialPoint = new double[] {3, 2, 3, 2, 3, 2, 3, 2, 3, 2};
135135 double value = objectFunction.valueAt(dealignDoubleArrayForTestData(nonInitialPoint,
136136 testDataIndexer.getPredLabels(),
137137 testDataIndexer.getOutcomeLabels()));
138138 double expectedValue = 53.163219721099026;
139139 // then
140 Assert.assertEquals(expectedValue, value, TOLERANCE02);
141 }
142
143 @Test
144 public void testGradientAtInitialPoint() throws IOException {
145 // given
146 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
147 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
148 StandardCharsets.UTF_8.name());
140 Assertions.assertEquals(expectedValue, value, TOLERANCE02);
141 }
142
143 @Test
144 void testGradientAtInitialPoint() throws IOException {
145 // given
146 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
147 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
148 StandardCharsets.UTF_8.name());
149149 testDataIndexer.index(rvfes1);
150150 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
151151 // when
152152 double[] gradientAtInitialPoint = objectFunction.gradientAt(objectFunction.getInitialPoint());
153 double[] expectedGradient = new double[] { -9.0, -14.0, -17.0, 20.0, 8.5, 9.0, 14.0, 17.0, -20.0, -8.5 };
154 // then
155 Assert.assertTrue(compareDoubleArray(expectedGradient, gradientAtInitialPoint,
153 double[] expectedGradient = new double[] {-9.0, -14.0, -17.0, 20.0, 8.5, 9.0, 14.0, 17.0, -20.0, -8.5};
154 // then
155 Assertions.assertTrue(compareDoubleArray(expectedGradient, gradientAtInitialPoint,
156156 testDataIndexer, TOLERANCE01));
157157 }
158158
159159 @Test
160 public void testGradientAtNonInitialPoint() throws IOException {
161 // given
162 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
163 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
164 StandardCharsets.UTF_8.name());
165 testDataIndexer.index(rvfes1);
166 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
167 // when
168 double[] nonInitialPoint = new double[] { 0.2, 0.5, 0.2, 0.5, 0.2, 0.5, 0.2, 0.5, 0.2, 0.5 };
160 void testGradientAtNonInitialPoint() throws IOException {
161 // given
162 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
163 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt",
164 StandardCharsets.UTF_8.name());
165 testDataIndexer.index(rvfes1);
166 NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer);
167 // when
168 double[] nonInitialPoint = new double[] {0.2, 0.5, 0.2, 0.5, 0.2, 0.5, 0.2, 0.5, 0.2, 0.5};
169169 double[] gradientAtNonInitialPoint =
170170 objectFunction.gradientAt(dealignDoubleArrayForTestData(nonInitialPoint,
171171 testDataIndexer.getPredLabels(),
172172 testDataIndexer.getOutcomeLabels()));
173173 double[] expectedGradient =
174 new double[] { -12.755042847945553, -21.227127506102434,
175 -72.57790706276435, 38.03525795198456,
176 15.348650889354925, 12.755042847945557,
177 21.22712750610244, 72.57790706276438,
178 -38.03525795198456, -15.348650889354925 };
179 // then
180 Assert.assertTrue(compareDoubleArray(expectedGradient, gradientAtNonInitialPoint,
174 new double[] {-12.755042847945553, -21.227127506102434,
175 -72.57790706276435, 38.03525795198456,
176 15.348650889354925, 12.755042847945557,
177 21.22712750610244, 72.57790706276438,
178 -38.03525795198456, -15.348650889354925};
179 // then
180 Assertions.assertTrue(compareDoubleArray(expectedGradient, gradientAtNonInitialPoint,
181181 testDataIndexer, TOLERANCE01));
182182 }
183183
184184 private double[] alignDoubleArrayForTestData(double[] expected,
185 String[] predLabels, String[] outcomeLabels) {
185 String[] predLabels, String[] outcomeLabels) {
186186 double[] aligned = new double[predLabels.length * outcomeLabels.length];
187187
188188 String[] sortedPredLabels = predLabels.clone();
189 String[] sortedOutcomeLabels = outcomeLabels.clone();
189 String[] sortedOutcomeLabels = outcomeLabels.clone();
190190 Arrays.sort(sortedPredLabels);
191191 Arrays.sort(sortedOutcomeLabels);
192192
202202 for (int i = 0; i < sortedOutcomeLabels.length; i++) {
203203 for (int j = 0; j < sortedPredLabels.length; j++) {
204204 aligned[i * sortedPredLabels.length + j] = expected[invertedOutcomeIndex
205 .get(sortedOutcomeLabels[i])
206 * sortedPredLabels.length
207 + invertedPredIndex.get(sortedPredLabels[j])];
205 .get(sortedOutcomeLabels[i])
206 * sortedPredLabels.length
207 + invertedPredIndex.get(sortedPredLabels[j])];
208208 }
209209 }
210210 return aligned;
211211 }
212212
213213 private double[] dealignDoubleArrayForTestData(double[] expected,
214 String[] predLabels, String[] outcomeLabels) {
214 String[] predLabels, String[] outcomeLabels) {
215215 double[] dealigned = new double[predLabels.length * outcomeLabels.length];
216216
217217 String[] sortedPredLabels = predLabels.clone();
241241 }
242242
243243 private boolean compareDoubleArray(double[] expected, double[] actual,
244 DataIndexer indexer, double tolerance)
245 {
244 DataIndexer indexer, double tolerance) {
246245 double[] alignedActual = alignDoubleArrayForTestData(
247246 actual, indexer.getPredLabels(), indexer.getOutcomeLabels());
248247
1616
1717 package opennlp.tools.ml.maxent.quasinewton;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 public class QNMinimizerTest {
2323
2424 @Test
25 public void testQuadraticFunction() {
25 void testQuadraticFunction() {
2626 QNMinimizer minimizer = new QNMinimizer();
2727 Function f = new QuadraticFunction();
2828 double[] x = minimizer.minimize(f);
2929 double minValue = f.valueAt(x);
3030
31 Assert.assertEquals(x[0], 1.0, 1e-5);
32 Assert.assertEquals(x[1], 5.0, 1e-5);
33 Assert.assertEquals(minValue, 10.0, 1e-10);
31 Assertions.assertEquals(1.0, x[0], 1e-5);
32 Assertions.assertEquals(5.0, x[1], 1e-5);
33 Assertions.assertEquals(10.0, minValue, 1e-10);
3434 }
3535
3636 @Test
37 public void testRosenbrockFunction() {
37 void testRosenbrockFunction() {
3838 QNMinimizer minimizer = new QNMinimizer();
3939 Function f = new Rosenbrock();
4040 double[] x = minimizer.minimize(f);
4141 double minValue = f.valueAt(x);
4242
43 Assert.assertEquals(x[0], 1.0, 1e-5);
44 Assert.assertEquals(x[1], 1.0, 1e-5);
45 Assert.assertEquals(minValue, 0, 1e-10);
43 Assertions.assertEquals(1.0, x[0], 1e-5);
44 Assertions.assertEquals(1.0, x[1], 1e-5);
45 Assertions.assertEquals(0, minValue, 1e-10);
4646 }
4747
4848 /**
6262
6363 @Override
6464 public double[] gradientAt(double[] x) {
65 return new double[] { 2 * (x[0] - 1), 2 * (x[1] - 5) };
65 return new double[] {2 * (x[0] - 1), 2 * (x[1] - 5)};
6666 }
6767 }
6868
7070 * Rosenbrock function (http://en.wikipedia.org/wiki/Rosenbrock_function)
7171 * f(x,y) = (1-x)^2 + 100*(y-x^2)^2
7272 * f(x,y) is non-convex and has global minimum at (x,y) = (1,1) where f(x,y) = 0
73 *
73 * <p>
7474 * f_x = -2*(1-x) - 400*(y-x^2)*x
7575 * f_y = 200*(y-x^2)
7676 */
1919 import java.io.IOException;
2020 import java.util.HashMap;
2121
22 import org.junit.Test;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.ml.AbstractEventTrainer;
2525 import opennlp.tools.ml.AbstractTrainer;
3535 public class QNPrepAttachTest {
3636
3737 @Test
38 public void testQNOnPrepAttachData() throws IOException {
38 void testQNOnPrepAttachData() throws IOException {
3939 DataIndexer indexer = new TwoPassDataIndexer();
4040 TrainingParameters indexingParameters = new TrainingParameters();
4141 indexingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
4343 indexer.init(indexingParameters, new HashMap<>());
4444 indexer.index(PrepAttachDataUtil.createTrainingStream());
4545
46 AbstractModel model = new QNTrainer(true).trainModel(100, indexer );
46 AbstractModel model = new QNTrainer(true).trainModel(100, indexer);
4747
4848 PrepAttachDataUtil.testModel(model, 0.8155484030700668);
4949 }
5050
5151 @Test
52 public void testQNOnPrepAttachDataWithParamsDefault() throws IOException {
52 void testQNOnPrepAttachDataWithParamsDefault() throws IOException {
5353
5454 TrainingParameters trainParams = new TrainingParameters();
5555 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
5656
5757 MaxentModel model = TrainerFactory.getEventTrainer(trainParams, null)
58 .train(PrepAttachDataUtil.createTrainingStream());
58 .train(PrepAttachDataUtil.createTrainingStream());
5959
6060 PrepAttachDataUtil.testModel(model, 0.8115870264917059);
6161 }
6262
6363 @Test
64 public void testQNOnPrepAttachDataWithElasticNetParams() throws IOException {
64 void testQNOnPrepAttachDataWithElasticNetParams() throws IOException {
6565
6666 TrainingParameters trainParams = new TrainingParameters();
6767 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
7272 trainParams.put(QNTrainer.L2COST_PARAM, 1.0D);
7373
7474 MaxentModel model = TrainerFactory.getEventTrainer(trainParams, null)
75 .train(PrepAttachDataUtil.createTrainingStream());
75 .train(PrepAttachDataUtil.createTrainingStream());
7676
7777 PrepAttachDataUtil.testModel(model, 0.8229759841544937);
7878 }
7979
8080 @Test
81 public void testQNOnPrepAttachDataWithL1Params() throws IOException {
81 void testQNOnPrepAttachDataWithL1Params() throws IOException {
8282
8383 TrainingParameters trainParams = new TrainingParameters();
8484 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
8989 trainParams.put(QNTrainer.L2COST_PARAM, 0D);
9090
9191 MaxentModel model = TrainerFactory.getEventTrainer(trainParams, null)
92 .train(PrepAttachDataUtil.createTrainingStream());
92 .train(PrepAttachDataUtil.createTrainingStream());
9393
9494 PrepAttachDataUtil.testModel(model, 0.8180242634315424);
9595 }
9696
9797 @Test
98 public void testQNOnPrepAttachDataWithL2Params() throws IOException {
98 void testQNOnPrepAttachDataWithL2Params() throws IOException {
9999
100100 TrainingParameters trainParams = new TrainingParameters();
101101 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
106106 trainParams.put(QNTrainer.L2COST_PARAM, 1.0D);
107107
108108 MaxentModel model = TrainerFactory.getEventTrainer(trainParams, null)
109 .train(PrepAttachDataUtil.createTrainingStream());
109 .train(PrepAttachDataUtil.createTrainingStream());
110110
111111 PrepAttachDataUtil.testModel(model, 0.8227283981183461);
112112 }
113113
114114 @Test
115 public void testQNOnPrepAttachDataInParallel() throws IOException {
115 void testQNOnPrepAttachDataInParallel() throws IOException {
116116
117117 TrainingParameters trainParams = new TrainingParameters();
118118 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
119119 trainParams.put(QNTrainer.THREADS_PARAM, 2);
120120
121121 MaxentModel model = TrainerFactory.getEventTrainer(trainParams, null)
122 .train(PrepAttachDataUtil.createTrainingStream());
122 .train(PrepAttachDataUtil.createTrainingStream());
123123
124124 PrepAttachDataUtil.testModel(model, 0.8115870264917059);
125125 }
2222 import java.io.IOException;
2323 import java.util.HashMap;
2424
25 import org.junit.Assert;
26 import org.junit.Before;
27 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.BeforeEach;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.tools.ml.AbstractTrainer;
3030 import opennlp.tools.ml.model.AbstractModel;
4242
4343 private DataIndexer testDataIndexer;
4444
45 @Before
46 public void initIndexer() {
45 @BeforeEach
46 void initIndexer() {
4747 TrainingParameters trainingParameters = new TrainingParameters();
4848 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
4949 testDataIndexer = new OnePassRealValueDataIndexer();
5151 }
5252
5353 @Test
54 public void testTrainModelReturnsAQNModel() throws Exception {
54 void testTrainModelReturnsAQNModel() throws Exception {
5555 // given
5656 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
5757 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt");
5959 // when
6060 QNModel trainedModel = new QNTrainer(false).trainModel(ITERATIONS, testDataIndexer);
6161 // then
62 Assert.assertNotNull(trainedModel);
62 Assertions.assertNotNull(trainedModel);
6363 }
6464
6565 @Test
66 public void testInTinyDevSet() throws Exception {
66 void testInTinyDevSet() throws Exception {
6767 // given
6868 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
6969 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt");
70 testDataIndexer.index(rvfes1);;
70 testDataIndexer.index(rvfes1);
71 ;
7172 // when
7273 QNModel trainedModel = new QNTrainer(15, true).trainModel(ITERATIONS, testDataIndexer);
7374 String[] features2Classify = new String[] {
74 "feature2","feature3", "feature3",
75 "feature3","feature3", "feature3",
76 "feature3","feature3", "feature3",
77 "feature3","feature3", "feature3"};
75 "feature2", "feature3", "feature3",
76 "feature3", "feature3", "feature3",
77 "feature3", "feature3", "feature3",
78 "feature3", "feature3", "feature3"};
7879 double[] eval = trainedModel.eval(features2Classify);
7980 // then
80 Assert.assertNotNull(eval);
81 Assertions.assertNotNull(eval);
8182 }
8283
8384 @Test
84 public void testModel() throws IOException {
85 void testModel() throws IOException {
8586 // given
8687 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
8788 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt");
9091 QNModel trainedModel = new QNTrainer(15, true).trainModel(
9192 ITERATIONS, testDataIndexer);
9293
93 Assert.assertFalse(trainedModel.equals(null));
94 Assertions.assertFalse(trainedModel.equals(null));
9495 }
9596
9697 @Test
97 public void testSerdeModel() throws IOException {
98 void testSerdeModel() throws IOException {
9899 // given
99100 RealValueFileEventStream rvfes1 = new RealValueFileEventStream(
100101 "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt");
113114 AbstractModel readModel = modelReader.getModel();
114115 QNModel deserModel = (QNModel) readModel;
115116
116 Assert.assertTrue(trainedModel.equals(deserModel));
117 Assertions.assertTrue(trainedModel.equals(deserModel));
117118
118119 String[] features2Classify = new String[] {
119 "feature2","feature3", "feature3",
120 "feature3","feature3", "feature3",
121 "feature3","feature3", "feature3",
122 "feature3","feature3", "feature3"};
120 "feature2", "feature3", "feature3",
121 "feature3", "feature3", "feature3",
122 "feature3", "feature3", "feature3",
123 "feature3", "feature3", "feature3"};
123124 double[] eval01 = trainedModel.eval(features2Classify);
124125 double[] eval02 = deserModel.eval(features2Classify);
125126
126 Assert.assertEquals(eval01.length, eval02.length);
127 Assertions.assertEquals(eval01.length, eval02.length);
127128 for (int i = 0; i < eval01.length; i++) {
128 Assert.assertEquals(eval01[i], eval02[i], 0.00000001);
129 Assertions.assertEquals(eval01[i], eval02[i], 0.00000001);
129130 }
130131 }
131132 }
1616
1717 package opennlp.tools.ml.model;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 public class EventTest {
2323
2424 @Test
25 public void testNullOutcome() {
25 void testNullOutcome() {
2626 try {
27 new Event(null, new String[]{"aa", "bb", "cc"});
28 Assert.fail("NPE must be thrown");
29 }
30 catch (NullPointerException expected) {
27 new Event(null, new String[] {"aa", "bb", "cc"});
28 Assertions.fail("NPE must be thrown");
29 } catch (NullPointerException expected) {
3130 }
3231 }
3332
3433 @Test
35 public void testNullContext() {
34 void testNullContext() {
3635 try {
3736 new Event("o1", null);
38 Assert.fail("NPE must be thrown");
39 }
40 catch (NullPointerException expected) {
37 Assertions.fail("NPE must be thrown");
38 } catch (NullPointerException expected) {
4139 }
4240 }
4341
4442 @Test
45 public void testWithValues() {
43 void testWithValues() {
4644 Event event = new Event("o1",
47 new String[]{"aa", "bb", "cc"});
45 new String[] {"aa", "bb", "cc"});
4846
49 Assert.assertEquals("o1", event.getOutcome());
50 Assert.assertArrayEquals(new String[]{"aa", "bb", "cc"}, event.getContext());
51 Assert.assertNull(event.getValues());
52 Assert.assertEquals("o1 [aa bb cc]", event.toString());
47 Assertions.assertEquals("o1", event.getOutcome());
48 Assertions.assertArrayEquals(new String[] {"aa", "bb", "cc"}, event.getContext());
49 Assertions.assertNull(event.getValues());
50 Assertions.assertEquals("o1 [aa bb cc]", event.toString());
5351 }
5452
5553 @Test
56 public void testWithoutValues() {
54 void testWithoutValues() {
5755 Event event = new Event("o1",
58 new String[]{"aa", "bb", "cc"},
59 new float[]{0.2F, 0.4F, 0.4F});
56 new String[] {"aa", "bb", "cc"},
57 new float[] {0.2F, 0.4F, 0.4F});
6058
61 Assert.assertEquals("o1", event.getOutcome());
62 Assert.assertArrayEquals(new String[]{"aa", "bb", "cc"}, event.getContext());
63 Assert.assertArrayEquals(new float[]{0.2F, 0.4F, 0.4F}, event.getValues(), 0.001F);
64 Assert.assertEquals("o1 [aa=0.2 bb=0.4 cc=0.4]", event.toString());
59 Assertions.assertEquals("o1", event.getOutcome());
60 Assertions.assertArrayEquals(new String[] {"aa", "bb", "cc"}, event.getContext());
61 Assertions.assertArrayEquals(new float[] {0.2F, 0.4F, 0.4F}, event.getValues(), 0.001F);
62 Assertions.assertEquals("o1 [aa=0.2 bb=0.4 cc=0.4]", event.toString());
6563 }
6664 }
1919 import java.io.IOException;
2020 import java.io.StringReader;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 public class FileEventStreamTest {
2626
2727 private static final String EVENTS =
2828 "other wc=ic w&c=he,ic n1wc=lc n1w&c=belongs,lc n2wc=lc\n" +
29 "other wc=lc w&c=belongs,lc p1wc=ic p1w&c=he,ic n1wc=lc\n" +
30 "other wc=lc w&c=to,lc p1wc=lc p1w&c=belongs,lc p2wc=ic\n" +
31 "org-start wc=ic w&c=apache,ic p1wc=lc p1w&c=to,lc\n" +
32 "org-cont wc=ic w&c=software,ic p1wc=ic p1w&c=apache,ic\n" +
33 "org-cont wc=ic w&c=foundation,ic p1wc=ic p1w&c=software,ic\n" +
34 "other wc=other w&c=.,other p1wc=ic\n";
29 "other wc=lc w&c=belongs,lc p1wc=ic p1w&c=he,ic n1wc=lc\n" +
30 "other wc=lc w&c=to,lc p1wc=lc p1w&c=belongs,lc p2wc=ic\n" +
31 "org-start wc=ic w&c=apache,ic p1wc=lc p1w&c=to,lc\n" +
32 "org-cont wc=ic w&c=software,ic p1wc=ic p1w&c=apache,ic\n" +
33 "org-cont wc=ic w&c=foundation,ic p1wc=ic p1w&c=software,ic\n" +
34 "other wc=other w&c=.,other p1wc=ic\n";
3535
3636 @Test
37 public void testSimpleReading() throws IOException {
37 void testSimpleReading() throws IOException {
3838 try (FileEventStream feStream = new FileEventStream(new StringReader(EVENTS))) {
39 Assert.assertEquals("other [wc=ic w&c=he,ic n1wc=lc n1w&c=belongs,lc n2wc=lc]",
40 feStream.read().toString());
41 Assert.assertEquals("other [wc=lc w&c=belongs,lc p1wc=ic p1w&c=he,ic n1wc=lc]",
42 feStream.read().toString());
43 Assert.assertEquals("other [wc=lc w&c=to,lc p1wc=lc p1w&c=belongs,lc p2wc=ic]",
44 feStream.read().toString());
45 Assert.assertEquals("org-start [wc=ic w&c=apache,ic p1wc=lc p1w&c=to,lc]",
46 feStream.read().toString());
47 Assert.assertEquals("org-cont [wc=ic w&c=software,ic p1wc=ic p1w&c=apache,ic]",
48 feStream.read().toString());
49 Assert.assertEquals("org-cont [wc=ic w&c=foundation,ic p1wc=ic p1w&c=software,ic]",
50 feStream.read().toString());
51 Assert.assertEquals("other [wc=other w&c=.,other p1wc=ic]",
52 feStream.read().toString());
53 Assert.assertNull(feStream.read());
39 Assertions.assertEquals("other [wc=ic w&c=he,ic n1wc=lc n1w&c=belongs,lc n2wc=lc]",
40 feStream.read().toString());
41 Assertions.assertEquals("other [wc=lc w&c=belongs,lc p1wc=ic p1w&c=he,ic n1wc=lc]",
42 feStream.read().toString());
43 Assertions.assertEquals("other [wc=lc w&c=to,lc p1wc=lc p1w&c=belongs,lc p2wc=ic]",
44 feStream.read().toString());
45 Assertions.assertEquals("org-start [wc=ic w&c=apache,ic p1wc=lc p1w&c=to,lc]",
46 feStream.read().toString());
47 Assertions.assertEquals("org-cont [wc=ic w&c=software,ic p1wc=ic p1w&c=apache,ic]",
48 feStream.read().toString());
49 Assertions.assertEquals("org-cont [wc=ic w&c=foundation,ic p1wc=ic p1w&c=software,ic]",
50 feStream.read().toString());
51 Assertions.assertEquals("other [wc=other w&c=.,other p1wc=ic]",
52 feStream.read().toString());
53 Assertions.assertNull(feStream.read());
5454 }
5555 }
5656
5757 @Test
58 public void testReset() throws IOException {
58 void testReset() throws IOException {
5959 try (FileEventStream feStream = new FileEventStream(new StringReader(EVENTS))) {
6060 feStream.reset();
61 Assert.fail("UnsupportedOperationException should be thrown");
62 }
63 catch (UnsupportedOperationException expected) {
61 Assertions.fail("UnsupportedOperationException should be thrown");
62 } catch (UnsupportedOperationException expected) {
6463 }
6564 }
6665 }
0 /*
1 * Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package opennlp.tools.ml.model;
18
19 import java.io.DataInputStream;
20 import java.io.DataOutputStream;
21 import java.io.File;
22 import java.io.IOException;
23 import java.nio.charset.StandardCharsets;
24 import java.nio.file.Files;
25 import java.util.ArrayList;
26 import java.util.List;
27
28 import org.junit.jupiter.api.AfterEach;
29 import org.junit.jupiter.api.Assertions;
30 import org.junit.jupiter.api.BeforeEach;
31 import org.junit.jupiter.api.Test;
32
33 /**
34 * Test cases for {@link ModelParameterChunker}.
35 *
36 * @author <a href="mailto:martin.wiesner@hs-heilbronn.de">Martin Wiesner</a>
37 */
38 public class ModelParameterChunkerTest {
39
40 private File tmp;
41
42 @BeforeEach
43 void setup() throws IOException {
44 tmp = File.createTempFile("chunker-test", ".dat");
45 tmp.deleteOnExit();
46 }
47
48 @AfterEach
49 void tearDown() {
50 tmp = null;
51 }
52
53 /*
54 * Note: 8k Integer elements will be concatenated into a flat String. The size of the resulting character
55 * sequence won't hit the critical 64K limit (see: DataOutputStream#writeUTF).
56 *
57 * No chunking is therefore required.
58 */
59 @Test
60 void testWriteReadUTFWithoutChunking() {
61 // 8k ints -> 48042 bytes for a flat String
62 testAndCheck(8192, 48042);
63 }
64
65 /*
66 * Note: 16k Integer elements will be concatenated into a flat String. The size of the resulting character
67 * sequence will exceed the critical 64K limit (see: DataOutputStream#writeUTF).
68 *
69 * Chunking is therefore required and used internally to avoid the blow up of the serialization procedure.
70 *
71 * When restoring the chunked String, the signature string (#SIGNATURE_CHUNKED_PARAMS) will be escaped.
72 * Thus, we can assume the restored string must be equal to the artificially created original input.
73 */
74 @Test
75 void testWriteReadUTFWithChunking() {
76 // 16k ints -> 103578 bytes for a flat String
77 testAndCheck(16384, 103578);
78 }
79
80 private void testAndCheck(int elementCount, int expectedByteLength) {
81 String p = getParameter(elementCount);
82 Assertions.assertNotNull(p);
83 Assertions.assertFalse(p.trim().isEmpty());
84 Assertions.assertEquals(expectedByteLength, p.getBytes(StandardCharsets.UTF_8).length);
85
86 // TEST
87 try (DataOutputStream dos = new DataOutputStream(Files.newOutputStream(tmp.toPath()))) {
88 ModelParameterChunker.writeUTF(dos, p);
89 } catch (IOException e) {
90 Assertions.fail(e.getLocalizedMessage());
91 }
92 // VERIFY
93 try (DataInputStream dis = new DataInputStream(Files.newInputStream(tmp.toPath()))) {
94 String restoredBelow64K = ModelParameterChunker.readUTF(dis);
95 // assumptions
96 Assertions.assertNotNull(restoredBelow64K);
97 Assertions.assertFalse(restoredBelow64K.trim().isEmpty());
98 Assertions.assertEquals(p, restoredBelow64K);
99 Assertions.assertEquals(expectedByteLength, p.getBytes(StandardCharsets.UTF_8).length);
100 } catch (IOException e) {
101 Assertions.fail(e.getLocalizedMessage());
102 }
103 }
104
105 private String getParameter(int elementCount) {
106 List<Integer> someParameters = new ArrayList<>(elementCount);
107 for (int i = 0; i < elementCount; i++) {
108 someParameters.add(i);
109 }
110 return someParameters.toString();
111 }
112 }
1919 import java.io.IOException;
2020 import java.util.Collections;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.util.ObjectStream;
2626 import opennlp.tools.util.TrainingParameters;
2828 public class OnePassDataIndexerTest {
2929
3030 @Test
31 public void testIndex() throws IOException {
31 void testIndex() throws IOException {
3232 // He belongs to <START:org> Apache Software Foundation <END> .
3333 ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
3434 .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
3535 .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
3636 .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
37 " powf=other,lc ppo=other")
37 " powf=other,lc ppo=other")
3838 .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
39 " powf=other,ic ppo=other")
39 " powf=other,ic ppo=other")
4040 .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
41 " pow=org-start,Software powf=org-start,ic ppo=other")
41 " pow=org-start,Software powf=org-start,ic ppo=other")
4242 .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
43 " powf=org-cont,ic ppo=org-start")
43 " powf=org-cont,ic ppo=org-start")
4444 .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
45 " ppo=org-cont")
45 " ppo=org-cont")
4646 .build();
4747
4848 DataIndexer indexer = new OnePassDataIndexer();
4949 indexer.init(new TrainingParameters(Collections.emptyMap()), null);
5050 indexer.index(eventStream);
51 Assert.assertEquals(3, indexer.getContexts().length);
52 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
53 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
54 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
55 Assert.assertNull(indexer.getValues());
56 Assert.assertEquals(5, indexer.getNumEvents());
57 Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
58 Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
59 Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
60 Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
61 Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
51 Assertions.assertEquals(3, indexer.getContexts().length);
52 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[0]);
53 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[1]);
54 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[2]);
55 Assertions.assertNull(indexer.getValues());
56 Assertions.assertEquals(5, indexer.getNumEvents());
57 Assertions.assertArrayEquals(new int[] {0, 1, 2}, indexer.getOutcomeList());
58 Assertions.assertArrayEquals(new int[] {3, 1, 1}, indexer.getNumTimesEventsSeen());
59 Assertions.assertArrayEquals(new String[] {"ppo=other"}, indexer.getPredLabels());
60 Assertions.assertArrayEquals(new String[] {"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
61 Assertions.assertArrayEquals(new int[] {5}, indexer.getPredCounts());
6262 }
6363 }
1919 import java.io.IOException;
2020 import java.util.Collections;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.util.ObjectStream;
2727 import opennlp.tools.util.TrainingParameters;
3030
3131 DataIndexer indexer;
3232
33 @Before
34 public void setUp() throws Exception {
33 @BeforeEach
34 void setUp() {
3535 indexer = new OnePassRealValueDataIndexer();
3636 indexer.init(new TrainingParameters(Collections.emptyMap()), null);
3737 }
3838
3939 @Test
40 public void testIndex() throws IOException {
40 void testIndex() throws IOException {
4141 // He belongs to <START:org> Apache Software Foundation <END> .
4242 ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
4343 .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
4444 .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
4545 .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
46 " powf=other,lc ppo=other")
46 " powf=other,lc ppo=other")
4747 .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
48 " powf=other,ic ppo=other")
48 " powf=other,ic ppo=other")
4949 .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
50 " pow=org-start,Software powf=org-start,ic ppo=other")
50 " pow=org-start,Software powf=org-start,ic ppo=other")
5151 .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
52 " powf=org-cont,ic ppo=org-start")
52 " powf=org-cont,ic ppo=org-start")
5353 .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
54 " ppo=org-cont")
54 " ppo=org-cont")
5555 .build();
5656
5757 indexer.index(eventStream);
58 Assert.assertEquals(3, indexer.getContexts().length);
59 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
60 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
61 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
62 Assert.assertEquals(3, indexer.getValues().length);
63 Assert.assertNull(indexer.getValues()[0]);
64 Assert.assertNull(indexer.getValues()[1]);
65 Assert.assertNull(indexer.getValues()[2]);
66 Assert.assertEquals(5, indexer.getNumEvents());
67 Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
68 Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
69 Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
70 Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
71 Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
58 Assertions.assertEquals(3, indexer.getContexts().length);
59 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[0]);
60 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[1]);
61 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[2]);
62 Assertions.assertEquals(3, indexer.getValues().length);
63 Assertions.assertNull(indexer.getValues()[0]);
64 Assertions.assertNull(indexer.getValues()[1]);
65 Assertions.assertNull(indexer.getValues()[2]);
66 Assertions.assertEquals(5, indexer.getNumEvents());
67 Assertions.assertArrayEquals(new int[] {0, 1, 2}, indexer.getOutcomeList());
68 Assertions.assertArrayEquals(new int[] {3, 1, 1}, indexer.getNumTimesEventsSeen());
69 Assertions.assertArrayEquals(new String[] {"ppo=other"}, indexer.getPredLabels());
70 Assertions.assertArrayEquals(new String[] {"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
71 Assertions.assertArrayEquals(new int[] {5}, indexer.getPredCounts());
7272 }
7373
7474 @Test
75 public void testIndexValues() throws IOException {
75 void testIndexValues() throws IOException {
7676 // He belongs to <START:org> Apache Software Foundation <END> .
7777 ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
7878 .add("other/w=he;0.1 n1w=belongs;0.2 n2w=to;0.1 po=other;0.1" +
79 " pow=other,He;0.1 powf=other,ic;0.1 ppo=other;0.1")
79 " pow=other,He;0.1 powf=other,ic;0.1 ppo=other;0.1")
8080 .add("other/w=belongs;0.1 p1w=he;0.2 n1w=to;0.1 n2w=apache;0.1" +
81 " po=other;0.1 pow=other,belongs;0.1 powf=other,lc;0.1 ppo=other;0.1")
81 " po=other;0.1 pow=other,belongs;0.1 powf=other,lc;0.1 ppo=other;0.1")
8282 .add("other/w=to;0.1 p1w=belongs;0.2 p2w=he;0.1 n1w=apache;0.1" +
83 " n2w=software;0.1 po=other;0.1 pow=other,to;0.1 powf=other,lc;0.1 ppo=other;0.1")
83 " n2w=software;0.1 po=other;0.1 pow=other,to;0.1 powf=other,lc;0.1 ppo=other;0.1")
8484 .add("org-start/w=apache;0.1 p1w=to;0.2 p2w=belongs;0.1 n1w=software;0.1 n2w=foundation;0.1" +
85 " po=other;0.1 pow=other,Apache;0.1 powf=other,ic;0.1 ppo=other;0.1")
85 " po=other;0.1 pow=other,Apache;0.1 powf=other,ic;0.1 ppo=other;0.1")
8686 .add("org-cont/w=software;0.1 p1w=apache;0.2 p2w=to;0.1 n1w=foundation;0.1" +
87 " n2w=.;0.1 po=org-start;0.1 pow=org-start,Software;0.1 powf=org-start,ic;0.1 ppo=other;0.1")
87 " n2w=.;0.1 po=org-start;0.1 pow=org-start,Software;0.1 powf=org-start,ic;0.1 ppo=other;0.1")
8888 .add("org-cont/w=foundation;0.1 p1w=software;0.2 p2w=apache;0.1 n1w=.;0.1 po=org-cont;0.1" +
89 " pow=org-cont,Foundation;0.1 powf=org-cont,ic;0.1 ppo=org-start;0.1")
89 " pow=org-cont,Foundation;0.1 powf=org-cont,ic;0.1 ppo=org-start;0.1")
9090 .add("other/w=.;0.1 p1w=foundation;0.1 p2w=software;0.1 po=org-cont;0.1 pow=org-cont,.;0.1" +
91 " powf=org-cont,other;0.1 ppo=org-cont;0.1")
91 " powf=org-cont,other;0.1 ppo=org-cont;0.1")
9292 .build();
9393
9494 indexer.index(eventStream);
9595 System.out.println(indexer);
96 Assert.assertEquals(3, indexer.getContexts().length);
97 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
98 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
99 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
100 Assert.assertEquals(3, indexer.getValues().length);
96 Assertions.assertEquals(3, indexer.getContexts().length);
97 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[0]);
98 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[1]);
99 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[2]);
100 Assertions.assertEquals(3, indexer.getValues().length);
101101 final float delta = 0.001F;
102 Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
103 indexer.getValues()[0], delta);
104 Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
105 indexer.getValues()[1], delta);
106 Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
107 indexer.getValues()[2], delta);
108 Assert.assertEquals(5, indexer.getNumEvents());
109 Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
110 Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
111 Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
112 Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
113 Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
102 Assertions.assertArrayEquals(
103 indexer.getValues()[0], new float[] {0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F}, delta);
104 Assertions.assertArrayEquals(
105 indexer.getValues()[1], new float[] {0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F}, delta);
106 Assertions.assertArrayEquals(
107 indexer.getValues()[2], new float[] {0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F}, delta);
108 Assertions.assertEquals(5, indexer.getNumEvents());
109 Assertions.assertArrayEquals(new int[] {0, 1, 2}, indexer.getOutcomeList());
110 Assertions.assertArrayEquals(new int[] {3, 1, 1}, indexer.getNumTimesEventsSeen());
111 Assertions.assertArrayEquals(new String[] {"ppo=other"}, indexer.getPredLabels());
112 Assertions.assertArrayEquals(new String[] {"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
113 Assertions.assertArrayEquals(new int[] {5}, indexer.getPredCounts());
114114 }
115115 }
1919 import java.io.IOException;
2020 import java.io.StringReader;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 public class RealValueFileEventStreamTest {
2626
2727 private static final String EVENTS =
2828 "other wc=ic=1 w&c=he,ic=2 n1wc=lc=3 n1w&c=belongs,lc=4 n2wc=lc=5\n" +
29 "other wc=lc=1 w&c=belongs,lc=2 p1wc=ic=3 p1w&c=he,ic=4 n1wc=lc=5\n" +
30 "other wc=lc=1 w&c=to,lc=2 p1wc=lc=3 p1w&c=belongs,lc=4 p2wc=ic=5\n" +
31 "org-start wc=ic=1 w&c=apache,ic=2 p1wc=lc=3 p1w&c=to,lc=4\n" +
32 "org-cont wc=ic=1 w&c=software,ic=2 p1wc=ic=3 p1w&c=apache,ic=4\n" +
33 "org-cont wc=ic=1 w&c=foundation,ic=2 p1wc=ic=3 p1w&c=software,ic=4\n" +
34 "other wc=other=1 w&c=.,other=2 p1wc=ic=3\n";
29 "other wc=lc=1 w&c=belongs,lc=2 p1wc=ic=3 p1w&c=he,ic=4 n1wc=lc=5\n" +
30 "other wc=lc=1 w&c=to,lc=2 p1wc=lc=3 p1w&c=belongs,lc=4 p2wc=ic=5\n" +
31 "org-start wc=ic=1 w&c=apache,ic=2 p1wc=lc=3 p1w&c=to,lc=4\n" +
32 "org-cont wc=ic=1 w&c=software,ic=2 p1wc=ic=3 p1w&c=apache,ic=4\n" +
33 "org-cont wc=ic=1 w&c=foundation,ic=2 p1wc=ic=3 p1w&c=software,ic=4\n" +
34 "other wc=other=1 w&c=.,other=2 p1wc=ic=3\n";
3535
3636 @Test
37 public void testSimpleReading() throws IOException {
37 void testSimpleReading() throws IOException {
3838 try (FileEventStream feStream = new FileEventStream(new StringReader(EVENTS))) {
39 Assert.assertEquals("other [wc=ic=1 w&c=he,ic=2 n1wc=lc=3 n1w&c=belongs,lc=4 n2wc=lc=5]",
40 feStream.read().toString());
41 Assert.assertEquals("other [wc=lc=1 w&c=belongs,lc=2 p1wc=ic=3 p1w&c=he,ic=4 n1wc=lc=5]",
42 feStream.read().toString());
43 Assert.assertEquals("other [wc=lc=1 w&c=to,lc=2 p1wc=lc=3 p1w&c=belongs,lc=4 p2wc=ic=5]",
44 feStream.read().toString());
45 Assert.assertEquals("org-start [wc=ic=1 w&c=apache,ic=2 p1wc=lc=3 p1w&c=to,lc=4]",
46 feStream.read().toString());
47 Assert.assertEquals("org-cont [wc=ic=1 w&c=software,ic=2 p1wc=ic=3 p1w&c=apache,ic=4]",
48 feStream.read().toString());
49 Assert.assertEquals("org-cont [wc=ic=1 w&c=foundation,ic=2 p1wc=ic=3 p1w&c=software,ic=4]",
50 feStream.read().toString());
51 Assert.assertEquals("other [wc=other=1 w&c=.,other=2 p1wc=ic=3]",
52 feStream.read().toString());
53 Assert.assertNull(feStream.read());
39 Assertions.assertEquals("other [wc=ic=1 w&c=he,ic=2 n1wc=lc=3 n1w&c=belongs,lc=4 n2wc=lc=5]",
40 feStream.read().toString());
41 Assertions.assertEquals("other [wc=lc=1 w&c=belongs,lc=2 p1wc=ic=3 p1w&c=he,ic=4 n1wc=lc=5]",
42 feStream.read().toString());
43 Assertions.assertEquals("other [wc=lc=1 w&c=to,lc=2 p1wc=lc=3 p1w&c=belongs,lc=4 p2wc=ic=5]",
44 feStream.read().toString());
45 Assertions.assertEquals("org-start [wc=ic=1 w&c=apache,ic=2 p1wc=lc=3 p1w&c=to,lc=4]",
46 feStream.read().toString());
47 Assertions.assertEquals("org-cont [wc=ic=1 w&c=software,ic=2 p1wc=ic=3 p1w&c=apache,ic=4]",
48 feStream.read().toString());
49 Assertions.assertEquals("org-cont [wc=ic=1 w&c=foundation,ic=2 p1wc=ic=3 p1w&c=software,ic=4]",
50 feStream.read().toString());
51 Assertions.assertEquals("other [wc=other=1 w&c=.,other=2 p1wc=ic=3]",
52 feStream.read().toString());
53 Assertions.assertNull(feStream.read());
5454 }
5555 }
5656
5757 @Test
58 public void testReset() throws IOException {
58 void testReset() throws IOException {
5959 try (FileEventStream feStream = new FileEventStream(new StringReader(EVENTS))) {
6060 feStream.reset();
61 Assert.fail("UnsupportedOperationException should be thrown");
62 }
63 catch (UnsupportedOperationException expected) {
61 Assertions.fail("UnsupportedOperationException should be thrown");
62 } catch (UnsupportedOperationException expected) {
6463 }
6564 }
6665 }
1616
1717 package opennlp.tools.ml.model;
1818
19 import java.io.IOException;
2019 import java.util.ArrayList;
2120 import java.util.Arrays;
2221 import java.util.List;
4847 String[] pair = cvPairs[i].split(";");
4948 if (pair.length != 2) {
5049 throw new RuntimeException(String.format("format error of the event \"%s\". "
51 + "\"%s\" doesn't have value", event, Arrays.toString(pair)));
50 + "\"%s\" doesn't have value", event, Arrays.toString(pair)));
5251 }
5352 context[i] = pair[0];
5453 values[i] = Float.parseFloat(pair[1]);
5554 }
5655 eventList.add(new Event(ss[0], context, values));
57 }
58 else {
56 } else {
5957 eventList.add(new Event(ss[0], cvPairs));
6058 }
6159
6563 public ObjectStream<Event> build() {
6664 return new ObjectStream<Event>() {
6765 @Override
68 public Event read() throws IOException {
66 public Event read() {
6967 if (eventList.size() <= pos) {
7068 return null;
7169 }
1919 import java.io.IOException;
2020 import java.util.Collections;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.namefind.DefaultNameContextGenerator;
2626 import opennlp.tools.namefind.NameContextGenerator;
3535 public class TwoPassDataIndexerTest {
3636
3737 @Test
38 public void testIndex() throws IOException {
38 void testIndex() throws IOException {
3939 // He belongs to <START:org> Apache Software Foundation <END> .
4040 ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
4141 .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
4242 .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
4343 .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
44 " powf=other,lc ppo=other")
44 " powf=other,lc ppo=other")
4545 .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
46 " powf=other,ic ppo=other")
46 " powf=other,ic ppo=other")
4747 .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
48 " pow=org-start,Software powf=org-start,ic ppo=other")
48 " pow=org-start,Software powf=org-start,ic ppo=other")
4949 .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
50 " powf=org-cont,ic ppo=org-start")
50 " powf=org-cont,ic ppo=org-start")
5151 .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
52 " ppo=org-cont")
52 " ppo=org-cont")
5353 .build();
5454
5555 DataIndexer indexer = new TwoPassDataIndexer();
5656 indexer.init(new TrainingParameters(Collections.emptyMap()), null);
5757 indexer.index(eventStream);
58 Assert.assertEquals(3, indexer.getContexts().length);
59 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
60 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
61 Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
62 Assert.assertNull(indexer.getValues());
63 Assert.assertEquals(5, indexer.getNumEvents());
64 Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
65 Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
66 Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
67 Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
68 Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
58 Assertions.assertEquals(3, indexer.getContexts().length);
59 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[0]);
60 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[1]);
61 Assertions.assertArrayEquals(new int[] {0}, indexer.getContexts()[2]);
62 Assertions.assertNull(indexer.getValues());
63 Assertions.assertEquals(5, indexer.getNumEvents());
64 Assertions.assertArrayEquals(new int[] {0, 1, 2}, indexer.getOutcomeList());
65 Assertions.assertArrayEquals(new int[] {3, 1, 1}, indexer.getNumTimesEventsSeen());
66 Assertions.assertArrayEquals(new String[] {"ppo=other"}, indexer.getPredLabels());
67 Assertions.assertArrayEquals(new String[] {"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
68 Assertions.assertArrayEquals(new int[] {5}, indexer.getPredCounts());
6969 }
7070
7171 @Test
72 public void testIndexWithNewline() throws IOException {
72 void testIndexWithNewline() throws IOException {
7373
7474 String[] sentence = "He belongs to Apache \n Software Foundation .".split(" ");
7575
7676 NameContextGenerator CG = new DefaultNameContextGenerator(
77 (AdaptiveFeatureGenerator[]) null);
77 (AdaptiveFeatureGenerator[]) null);
7878
7979 NameSample nameSample = new NameSample(sentence,
80 new Span[] { new Span(3, 7) }, false);
80 new Span[] {new Span(3, 7)}, false);
8181
8282 ObjectStream<Event> eventStream = new NameFinderEventStream(
83 ObjectStreamUtils.createObjectStream(nameSample), "org", CG, null);
83 ObjectStreamUtils.createObjectStream(nameSample), "org", CG, null);
8484
8585 DataIndexer indexer = new TwoPassDataIndexer();
8686 indexer.init(new TrainingParameters(Collections.emptyMap()), null);
8787 indexer.index(eventStream);
88 Assert.assertEquals(5, indexer.getContexts().length);
88 Assertions.assertEquals(5, indexer.getContexts().length);
8989
9090 }
9191 }
2121 import java.util.HashMap;
2222 import java.util.List;
2323
24 import org.junit.Assert;
25 import org.junit.Before;
26 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeEach;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.ml.AbstractTrainer;
2929 import opennlp.tools.ml.model.AbstractDataIndexer;
4242
4343 private DataIndexer testDataIndexer;
4444
45 @Before
46 public void initIndexer() {
45 @BeforeEach
46 void initIndexer() {
4747 TrainingParameters trainingParameters = new TrainingParameters();
4848 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
49 trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);;
49 trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);
50 ;
5051 testDataIndexer = new TwoPassDataIndexer();
5152 testDataIndexer.init(trainingParameters, new HashMap<>());
5253 }
5354
5455 @Test
55 public void testNaiveBayes1() throws IOException {
56 void testNaiveBayes1() throws IOException {
5657
5758 testDataIndexer.index(createTrainingStream());
5859 NaiveBayesModel model =
6869 }
6970
7071 @Test
71 public void testNaiveBayes2() throws IOException {
72 void testNaiveBayes2() throws IOException {
7273
7374 testDataIndexer.index(createTrainingStream());
7475 NaiveBayesModel model =
8485 }
8586
8687 @Test
87 public void testNaiveBayes3() throws IOException {
88 void testNaiveBayes3() throws IOException {
8889
8990 testDataIndexer.index(createTrainingStream());
9091 NaiveBayesModel model =
100101 }
101102
102103 @Test
103 public void testNaiveBayes4() throws IOException {
104 void testNaiveBayes4() throws IOException {
104105
105106 testDataIndexer.index(createTrainingStream());
106107 NaiveBayesModel model =
117118 private void testModel(MaxentModel model, Event event, double higher_probability) {
118119 double[] outcomes = model.eval(event.getContext());
119120 String outcome = model.getBestOutcome(outcomes);
120 Assert.assertEquals(2, outcomes.length);
121 Assert.assertEquals(event.getOutcome(), outcome);
121 Assertions.assertEquals(2, outcomes.length);
122 Assertions.assertEquals(event.getOutcome(), outcome);
122123 if (event.getOutcome().equals(model.getOutcome(0))) {
123 Assert.assertEquals(higher_probability, outcomes[0], 0.0001);
124 Assertions.assertEquals(higher_probability, outcomes[0], 0.0001);
124125 }
125126 if (!event.getOutcome().equals(model.getOutcome(0))) {
126 Assert.assertEquals(1.0 - higher_probability, outcomes[0], 0.0001);
127 Assertions.assertEquals(1.0 - higher_probability, outcomes[0], 0.0001);
127128 }
128129 if (event.getOutcome().equals(model.getOutcome(1))) {
129 Assert.assertEquals(higher_probability, outcomes[1], 0.0001);
130 Assertions.assertEquals(higher_probability, outcomes[1], 0.0001);
130131 }
131132 if (!event.getOutcome().equals(model.getOutcome(1))) {
132 Assert.assertEquals(1.0 - higher_probability, outcomes[1], 0.0001);
133 Assertions.assertEquals(1.0 - higher_probability, outcomes[1], 0.0001);
133134 }
134135 }
135136
136 public static ObjectStream<Event> createTrainingStream() throws IOException {
137 public static ObjectStream<Event> createTrainingStream() {
137138 List<Event> trainingEvents = new ArrayList<>();
138139
139140 String label1 = "politics";
2121 import java.nio.file.Path;
2222 import java.util.HashMap;
2323
24 import org.junit.Assert;
25 import org.junit.Before;
26 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.BeforeEach;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.ml.AbstractTrainer;
2929 import opennlp.tools.ml.model.AbstractDataIndexer;
3939
4040 private DataIndexer testDataIndexer;
4141
42 @Before
43 public void initIndexer() {
42 @BeforeEach
43 void initIndexer() {
4444 TrainingParameters trainingParameters = new TrainingParameters();
4545 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
46 trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);;
46 trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);
47 ;
4748 testDataIndexer = new TwoPassDataIndexer();
4849 testDataIndexer.init(trainingParameters, new HashMap<>());
4950 }
5051
5152 @Test
52 public void testBinaryModelPersistence() throws Exception {
53 void testBinaryModelPersistence() throws Exception {
5354 testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
5455 NaiveBayesModel model = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer);
5556 Path tempFile = Files.createTempFile("bnb-", ".bin");
6061 NaiveBayesModelReader reader = new BinaryNaiveBayesModelReader(file);
6162 reader.checkModelType();
6263 AbstractModel abstractModel = reader.constructModel();
63 Assert.assertNotNull(abstractModel);
64 }
65 finally {
64 Assertions.assertNotNull(abstractModel);
65 } finally {
6666 file.delete();
6767 }
6868 }
6969
7070 @Test
71 public void testTextModelPersistence() throws Exception {
71 void testTextModelPersistence() throws Exception {
7272 testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
7373 NaiveBayesModel model = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer);
7474 Path tempFile = Files.createTempFile("ptnb-", ".txt");
7979 NaiveBayesModelReader reader = new PlainTextNaiveBayesModelReader(file);
8080 reader.checkModelType();
8181 AbstractModel abstractModel = reader.constructModel();
82 Assert.assertNotNull(abstractModel);
83 }
84 finally {
82 Assertions.assertNotNull(abstractModel);
83 } finally {
8584 file.delete();
8685 }
8786 }
1919 import java.io.IOException;
2020 import java.util.HashMap;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.ml.AbstractTrainer;
2727 import opennlp.tools.ml.EventTrainer;
4040
4141 private DataIndexer testDataIndexer;
4242
43 @Before
44 public void initIndexer() {
43 @BeforeEach
44 void initIndexer() {
4545 TrainingParameters trainingParameters = new TrainingParameters();
4646 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
4747 trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);
5050 }
5151
5252 @Test
53 public void testNaiveBayesOnPrepAttachData() throws IOException {
53 void testNaiveBayesOnPrepAttachData() throws IOException {
5454 testDataIndexer.index(PrepAttachDataUtil.createTrainingStream());
5555 MaxentModel model = new NaiveBayesTrainer().trainModel(testDataIndexer);
56 Assert.assertTrue(model instanceof NaiveBayesModel);
56 Assertions.assertTrue(model instanceof NaiveBayesModel);
5757 PrepAttachDataUtil.testModel(model, 0.7897994553107205);
5858 }
5959
6060 @Test
61 public void testNaiveBayesOnPrepAttachDataUsingTrainUtil() throws IOException {
61 void testNaiveBayesOnPrepAttachDataUsingTrainUtil() throws IOException {
6262 TrainingParameters trainParams = new TrainingParameters();
6363 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, NaiveBayesTrainer.NAIVE_BAYES_VALUE);
6464 trainParams.put(AbstractTrainer.CUTOFF_PARAM, 1);
6565
6666 EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null);
6767 MaxentModel model = trainer.train(PrepAttachDataUtil.createTrainingStream());
68 Assert.assertTrue(model instanceof NaiveBayesModel);
68 Assertions.assertTrue(model instanceof NaiveBayesModel);
6969 PrepAttachDataUtil.testModel(model, 0.7897994553107205);
7070 }
7171
7272 @Test
73 public void testNaiveBayesOnPrepAttachDataUsingTrainUtilWithCutoff5() throws IOException {
73 void testNaiveBayesOnPrepAttachDataUsingTrainUtilWithCutoff5() throws IOException {
7474 TrainingParameters trainParams = new TrainingParameters();
7575 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, NaiveBayesTrainer.NAIVE_BAYES_VALUE);
7676 trainParams.put(AbstractTrainer.CUTOFF_PARAM, 5);
7777
7878 EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null);
7979 MaxentModel model = trainer.train(PrepAttachDataUtil.createTrainingStream());
80 Assert.assertTrue(model instanceof NaiveBayesModel);
80 Assertions.assertTrue(model instanceof NaiveBayesModel);
8181 PrepAttachDataUtil.testModel(model, 0.7945035899975241);
8282 }
8383 }
2626 import java.nio.file.Path;
2727 import java.util.HashMap;
2828
29 import org.junit.Assert;
30 import org.junit.Before;
31 import org.junit.Test;
29 import org.junit.jupiter.api.Assertions;
30 import org.junit.jupiter.api.BeforeEach;
31 import org.junit.jupiter.api.Test;
3232
3333 import opennlp.tools.ml.AbstractTrainer;
3434 import opennlp.tools.ml.model.AbstractDataIndexer;
4444
4545 private DataIndexer testDataIndexer;
4646
47 @Before
48 public void initIndexer() {
47 @BeforeEach
48 void initIndexer() {
4949 TrainingParameters trainingParameters = new TrainingParameters();
5050 trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
51 trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);;
51 trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);
52 ;
5253 testDataIndexer = new TwoPassDataIndexer();
5354 testDataIndexer.init(trainingParameters, new HashMap<>());
5455 }
5556
5657 @Test
57 public void testNaiveBayes1() throws IOException {
58 void testNaiveBayes1() throws IOException {
5859
5960 testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
6061 NaiveBayesModel model1 =
7172 }
7273
7374 @Test
74 public void testNaiveBayes2() throws IOException {
75 void testNaiveBayes2() throws IOException {
7576
7677 testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
7778 NaiveBayesModel model1 =
8889 }
8990
9091 @Test
91 public void testNaiveBayes3() throws IOException {
92 void testNaiveBayes3() throws IOException {
9293
9394 testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
9495 NaiveBayesModel model1 =
105106 }
106107
107108 @Test
108 public void testNaiveBayes4() throws IOException {
109 void testNaiveBayes4() throws IOException {
109110
110111 testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
111112 NaiveBayesModel model1 =
123124
124125
125126 @Test
126 public void testPlainTextModel() throws IOException {
127 void testPlainTextModel() throws IOException {
127128 testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
128129 NaiveBayesModel model1 =
129130 (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer);
139140 new PlainTextNaiveBayesModelReader(new BufferedReader(new StringReader(sw1.toString())));
140141 reader.checkModelType();
141142
142 NaiveBayesModel model2 = (NaiveBayesModel)reader.constructModel();
143 NaiveBayesModel model2 = (NaiveBayesModel) reader.constructModel();
143144
144145 StringWriter sw2 = new StringWriter();
145146 modelWriter = new PlainTextNaiveBayesModelWriter(model2, new BufferedWriter(sw2));
146147 modelWriter.persist();
147148
148149 System.out.println(sw1.toString());
149 Assert.assertEquals(sw1.toString(), sw2.toString());
150 Assertions.assertEquals(sw1.toString(), sw2.toString());
150151
151152 }
152153
158159 modelWriter.persist();
159160 NaiveBayesModelReader reader = new BinaryNaiveBayesModelReader(file);
160161 reader.checkModelType();
161 return (NaiveBayesModel)reader.constructModel();
162 }
163 finally {
162 return (NaiveBayesModel) reader.constructModel();
163 } finally {
164164 file.delete();
165165 }
166166 }
169169 String[] labels1 = extractLabels(model1);
170170 String[] labels2 = extractLabels(model2);
171171
172 Assert.assertArrayEquals(labels1, labels2);
172 Assertions.assertArrayEquals(labels1, labels2);
173173
174174 double[] outcomes1 = model1.eval(event.getContext());
175175 double[] outcomes2 = model2.eval(event.getContext());
176176
177 Assert.assertArrayEquals(outcomes1, outcomes2, 0.000000000001);
177 Assertions.assertArrayEquals(outcomes1, outcomes2, 0.000000000001);
178178
179179 }
180180
2424 import java.util.HashMap;
2525 import java.util.Map;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 import opennlp.tools.ml.AbstractTrainer;
3131 import opennlp.tools.ml.EventTrainer;
4343 public class PerceptronPrepAttachTest {
4444
4545 @Test
46 public void testPerceptronOnPrepAttachData() throws IOException {
46 void testPerceptronOnPrepAttachData() throws IOException {
4747 TwoPassDataIndexer indexer = new TwoPassDataIndexer();
4848 TrainingParameters indexingParameters = new TrainingParameters();
4949 indexingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1);
5555 }
5656
5757 @Test
58 public void testPerceptronOnPrepAttachDataWithSkippedAveraging() throws IOException {
58 void testPerceptronOnPrepAttachDataWithSkippedAveraging() throws IOException {
5959
6060 TrainingParameters trainParams = new TrainingParameters();
6161 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
6868 }
6969
7070 @Test
71 public void testPerceptronOnPrepAttachDataWithTolerance() throws IOException {
71 void testPerceptronOnPrepAttachDataWithTolerance() throws IOException {
7272
7373 TrainingParameters trainParams = new TrainingParameters();
7474 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
8282 }
8383
8484 @Test
85 public void testPerceptronOnPrepAttachDataWithStepSizeDecrease() throws IOException {
85 void testPerceptronOnPrepAttachDataWithStepSizeDecrease() throws IOException {
8686
8787 TrainingParameters trainParams = new TrainingParameters();
8888 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
9696 }
9797
9898 @Test
99 public void testModelSerialization() throws IOException {
99 void testModelSerialization() throws IOException {
100100
101101 TrainingParameters trainParams = new TrainingParameters();
102102 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
121121 }
122122
123123 @Test
124 public void testModelEquals() throws IOException {
124 void testModelEquals() throws IOException {
125125 TrainingParameters trainParams = new TrainingParameters();
126126 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
127127 trainParams.put(AbstractTrainer.CUTOFF_PARAM, 1);
131131 AbstractModel modelA = (AbstractModel) trainer.train(PrepAttachDataUtil.createTrainingStream());
132132 AbstractModel modelB = (AbstractModel) trainer.train(PrepAttachDataUtil.createTrainingStream());
133133
134 Assert.assertEquals(modelA, modelB);
135 Assert.assertEquals(modelA.hashCode(), modelB.hashCode());
134 Assertions.assertEquals(modelA, modelB);
135 Assertions.assertEquals(modelA.hashCode(), modelB.hashCode());
136136 }
137
137
138138 @Test
139 public void verifyReportMap() throws IOException {
139 void verifyReportMap() throws IOException {
140140 TrainingParameters trainParams = new TrainingParameters();
141141 trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
142142 trainParams.put(AbstractTrainer.CUTOFF_PARAM, 1);
143143 // Since we are verifying the report map, we don't need to have more than 1 iteration
144144 trainParams.put(AbstractTrainer.ITERATIONS_PARAM, 1);
145145 trainParams.put("UseSkippedAveraging", true);
146
147 Map<String,String> reportMap = new HashMap<>();
146
147 Map<String, String> reportMap = new HashMap<>();
148148 EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, reportMap);
149149 trainer.train(PrepAttachDataUtil.createTrainingStream());
150 Assert.assertTrue("Report Map does not contain the training event hash",
151 reportMap.containsKey("Training-Eventhash"));
150 Assertions.assertTrue(
151 reportMap.containsKey("Training-Eventhash"), "Report Map does not contain the training event hash");
152152 }
153153 }
2020 import java.util.Arrays;
2121 import java.util.List;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.util.Span;
2727
5050 private static final String OTHER = BilouCodec.OTHER;
5151
5252 @Test
53 public void testEncodeNoNames() {
53 void testEncodeNoNames() {
5454 NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true);
5555 String[] expected = new String[] {OTHER, OTHER, OTHER, OTHER};
5656 String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
57 Assert.assertArrayEquals("Only 'Other' is expected.", expected, acutal);
58 }
59
60 @Test
61 public void testEncodeSingleUnitTokenSpan() {
57 Assertions.assertArrayEquals(expected, acutal, "Only 'Other' is expected.");
58 }
59
60 @Test
61 void testEncodeSingleUnitTokenSpan() {
6262 String[] sentence = "I called Julie again.".split(" ");
63 Span[] singleSpan = new Span[] { new Span(2,3, A_TYPE)};
63 Span[] singleSpan = new Span[] {new Span(2, 3, A_TYPE)};
6464 NameSample nameSample = new NameSample(sentence, singleSpan, true);
6565 String[] expected = new String[] {OTHER, OTHER, A_UNIT, OTHER};
6666 String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
67 Assert.assertArrayEquals("'Julie' should be 'unit' only, the rest should be 'other'.", expected, acutal);
68 }
69
70 @Test
71 public void testEncodeDoubleTokenSpan() {
67 Assertions.assertArrayEquals(expected, acutal,
68 "'Julie' should be 'unit' only, the rest should be 'other'.");
69 }
70
71 @Test
72 void testEncodeDoubleTokenSpan() {
7273 String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
73 Span[] singleSpan = new Span[] { new Span(2,4, A_TYPE)};
74 Span[] singleSpan = new Span[] {new Span(2, 4, A_TYPE)};
7475 NameSample nameSample = new NameSample(sentence, singleSpan, true);
7576 String[] expected = new String[] {OTHER, OTHER, A_START, A_LAST, OTHER};
7677 String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
77 Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 'last' " +
78 "and the rest should be 'other'.", expected, acutal);
79 }
80
81 @Test
82 public void testEncodeTripleTokenSpan() {
78 Assertions.assertArrayEquals(expected, acutal, "'Stefanie' should be 'start' only, 'Schmidt' is 'last' " +
79 "and the rest should be 'other'.");
80 }
81
82 @Test
83 void testEncodeTripleTokenSpan() {
8384 String[] sentence = "Secretary - General Anders Fogh Rasmussen is from Denmark.".split(" ");
84 Span[] singleSpan = new Span[] { new Span(3,6, A_TYPE)};
85 Span[] singleSpan = new Span[] {new Span(3, 6, A_TYPE)};
8586 NameSample nameSample = new NameSample(sentence, singleSpan, true);
8687 String[] expected = new String[] {OTHER, OTHER, OTHER, A_START, A_CONTINUE,
8788 A_LAST, OTHER, OTHER, OTHER};
8889 String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
89 Assert.assertArrayEquals("'Anders' should be 'start' only, 'Fogh' is 'inside', " +
90 "'Rasmussen' is 'last' and the rest should be 'other'.", expected, acutal);
91 }
92
93 @Test
94 public void testEncodeAdjacentUnitSpans() {
90 Assertions.assertArrayEquals(expected, acutal, "'Anders' should be 'start' only, 'Fogh' is 'inside', " +
91 "'Rasmussen' is 'last' and the rest should be 'other'.");
92 }
93
94 @Test
95 void testEncodeAdjacentUnitSpans() {
9596 String[] sentence = "word PersonA PersonB word".split(" ");
96 Span[] singleSpan = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE)};
97 Span[] singleSpan = new Span[] {new Span(1, 2, A_TYPE), new Span(2, 3, A_TYPE)};
9798 NameSample nameSample = new NameSample(sentence, singleSpan, true);
9899 String[] expected = new String[] {OTHER, A_UNIT, A_UNIT, OTHER};
99100 String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
100 Assert.assertArrayEquals("Both PersonA and PersonB are 'unit' tags", expected, acutal);
101 }
102
103 @Test
104 public void testCreateSequenceValidator() {
105 Assert.assertTrue(codec.createSequenceValidator() instanceof BilouNameFinderSequenceValidator);
106 }
107
108 @Test
109 public void testDecodeEmpty() {
101 Assertions.assertArrayEquals(expected, acutal, "Both PersonA and PersonB are 'unit' tags");
102 }
103
104 @Test
105 void testCreateSequenceValidator() {
106 Assertions.assertTrue(codec.createSequenceValidator() instanceof BilouNameFinderSequenceValidator);
107 }
108
109 @Test
110 void testDecodeEmpty() {
110111 Span[] expected = new Span[] {};
111112 Span[] actual = codec.decode(new ArrayList<String>());
112 Assert.assertArrayEquals(expected, actual);
113 Assertions.assertArrayEquals(expected, actual);
113114 }
114115
115116 /**
116117 * Unit, Other
117118 */
118119 @Test
119 public void testDecodeSingletonFirst() {
120 void testDecodeSingletonFirst() {
120121
121122 List<String> encoded = Arrays.asList(A_UNIT, OTHER);
122 Span[] expected = new Span[] {new Span(0,1, A_TYPE)};
123 Span[] actual = codec.decode(encoded);
124 Assert.assertArrayEquals(expected, actual);
123 Span[] expected = new Span[] {new Span(0, 1, A_TYPE)};
124 Span[] actual = codec.decode(encoded);
125 Assertions.assertArrayEquals(expected, actual);
125126 }
126127
127128 /**
128129 * Unit, Unit, Other
129130 */
130131 @Test
131 public void testDecodeAdjacentSingletonFirst() {
132 void testDecodeAdjacentSingletonFirst() {
132133 List<String> encoded = Arrays.asList(A_UNIT, A_UNIT, OTHER);
133134 Span[] expected = new Span[] {new Span(0, 1, A_TYPE), new Span(1, 2, A_TYPE)};
134135 Span[] actual = codec.decode(encoded);
135 Assert.assertArrayEquals(expected, actual);
136 Assertions.assertArrayEquals(expected, actual);
136137 }
137138
138139 /**
139140 * Start, Last, Other
140141 */
141142 @Test
142 public void testDecodePairFirst() {
143 void testDecodePairFirst() {
143144 List<String> encoded = Arrays.asList(A_START, A_LAST, OTHER);
144145 Span[] expected = new Span[] {new Span(0, 2, A_TYPE)};
145146 Span[] actual = codec.decode(encoded);
146 Assert.assertArrayEquals(expected, actual);
147 Assertions.assertArrayEquals(expected, actual);
147148 }
148149
149150 /**
150151 * Start, Continue, Last, Other
151152 */
152153 @Test
153 public void testDecodeTripletFirst() {
154 void testDecodeTripletFirst() {
154155 List<String> encoded = Arrays.asList(A_START, A_CONTINUE, A_LAST, OTHER);
155156 Span[] expected = new Span[] {new Span(0, 3, A_TYPE)};
156157 Span[] actual = codec.decode(encoded);
157 Assert.assertArrayEquals(expected, actual);
158 Assertions.assertArrayEquals(expected, actual);
158159 }
159160
160161 /**
161162 * Start, Continue, Continue, Last, Other
162163 */
163164 @Test
164 public void testDecodeTripletContinuationFirst() {
165 void testDecodeTripletContinuationFirst() {
165166 List<String> encoded = Arrays.asList(A_START, A_CONTINUE, A_CONTINUE,
166167 A_LAST, OTHER);
167168 Span[] expected = new Span[] {new Span(0, 4, A_TYPE)};
168169 Span[] actual = codec.decode(encoded);
169 Assert.assertArrayEquals(expected, actual);
170 Assertions.assertArrayEquals(expected, actual);
170171 }
171172
172173 /**
173174 * Start, Last, Unit, Other
174175 */
175176 @Test
176 public void testDecodeAdjacentPairSingleton() {
177 void testDecodeAdjacentPairSingleton() {
177178 List<String> encoded = Arrays.asList(A_START, A_LAST, A_UNIT, OTHER);
178179 Span[] expected = new Span[] {new Span(0, 2, A_TYPE),
179180 new Span(2, 3, A_TYPE)};
180181 Span[] actual = codec.decode(encoded);
181 Assert.assertArrayEquals(expected, actual);
182 Assertions.assertArrayEquals(expected, actual);
182183 }
183184
184185 /**
185186 * Other, Unit, Other
186187 */
187188 @Test
188 public void testDecodeOtherFirst() {
189 void testDecodeOtherFirst() {
189190 List<String> encoded = Arrays.asList(OTHER, A_UNIT, OTHER);
190191 Span[] expected = new Span[] {new Span(1, 2, A_TYPE)};
191192 Span[] actual = codec.decode(encoded);
192 Assert.assertArrayEquals(expected, actual);
193 Assertions.assertArrayEquals(expected, actual);
193194 }
194195
195196 /**
196197 * Other, A-Start, A-Continue, A-Last, Other, B-Start, B-Last, Other, C-Unit, Other
197198 */
198199 @Test
199 public void testDecodeMultiClass() {
200 void testDecodeMultiClass() {
200201 List<String> encoded = Arrays.asList(OTHER, A_START, A_CONTINUE, A_LAST, OTHER,
201202 B_START, B_LAST, OTHER, C_UNIT, OTHER);
202203 Span[] expected = new Span[] {new Span(1, 4, A_TYPE),
203 new Span(5, 7, B_TYPE), new Span(8,9, C_TYPE)};
204 Span[] actual = codec.decode(encoded);
205 Assert.assertArrayEquals(expected, actual);
206 }
207
208
209 @Test
210 public void testCompatibilityEmpty() {
211 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {}));
204 new Span(5, 7, B_TYPE), new Span(8, 9, C_TYPE)};
205 Span[] actual = codec.decode(encoded);
206 Assertions.assertArrayEquals(expected, actual);
207 }
208
209
210 @Test
211 void testCompatibilityEmpty() {
212 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {}));
212213 }
213214
214215 /**
221222 * A-Start, A-Last, B-Start => Fail
222223 */
223224 @Test
224 public void testCompatibilitySinglesStart() {
225 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START}));
226 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START}));
227 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START}));
225 void testCompatibilitySinglesStart() {
226 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_START}));
227 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START}));
228 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START}));
228229 }
229230
230231 /**
233234 * A-Start, A-Last, B-Continue => Fail
234235 */
235236 @Test
236 public void testCompatibilitySinglesContinue() {
237 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE}));
238 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE}));
239 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE}));
237 void testCompatibilitySinglesContinue() {
238 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE}));
239 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE}));
240 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE}));
240241 }
241242
242243 /**
245246 * A-Start, A-Last, B-Last => Fail
246247 */
247248 @Test
248 public void testCompatibilitySinglesLast() {
249 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST}));
250 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST}));
251 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST}));
249 void testCompatibilitySinglesLast() {
250 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST}));
251 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST}));
252 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST}));
252253 }
253254
254255 /**
257258 * A-Start, A-Last, Other => Pass
258259 */
259260 @Test
260 public void testCompatibilitySinglesOther() {
261 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER}));
262 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, OTHER}));
263 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, OTHER}));
261 void testCompatibilitySinglesOther() {
262 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER}));
263 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, OTHER}));
264 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, OTHER}));
264265 }
265266
266267 /**
269270 * A-Start, A-Last, B-Unit => Pass
270271 */
271272 @Test
272 public void testCompatibilitySinglesUnit() {
273 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {B_UNIT}));
274 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, B_UNIT}));
275 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_UNIT}));
273 void testCompatibilitySinglesUnit() {
274 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {B_UNIT}));
275 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, B_UNIT}));
276 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_UNIT}));
276277 }
277278
278279 /**
279280 * Doubles and doubles in combination with other valid type (unit/start+last)
280 *
281 * <p>
281282 * B-Start, B-Continue => Fail
282283 * A-Unit, B-Start, B-Continue => Fail
283284 * A-Start, A-Last, B-Start, B-Continue => Fail
284285 */
285286 @Test
286 public void testCompatibilityStartContinue() {
287 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, B_CONTINUE}));
288 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_CONTINUE}));
289 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_CONTINUE}));
287 void testCompatibilityStartContinue() {
288 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, B_CONTINUE}));
289 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_CONTINUE}));
290 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_CONTINUE}));
290291 }
291292
292293 /**
295296 * A-Start, A-Last, B-Start, B-Last => Pass
296297 */
297298 @Test
298 public void testCompatibilityStartLast() {
299 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {B_START, B_LAST}));
300 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_LAST}));
301 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_LAST}));
299 void testCompatibilityStartLast() {
300 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {B_START, B_LAST}));
301 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_LAST}));
302 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_LAST}));
302303 }
303304
304305 /**
307308 * A-Start, A-Last, B-Start, Other => Fail
308309 */
309310 @Test
310 public void testCompatibilityStartOther() {
311 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, OTHER}));
312 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, OTHER}));
313 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, OTHER}));
311 void testCompatibilityStartOther() {
312 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, OTHER}));
313 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, OTHER}));
314 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, OTHER}));
314315 }
315316
316317 /**
319320 * A-Start, A-Last, B-Start, B-Unit => Fail
320321 */
321322 @Test
322 public void testCompatibilityStartUnit() {
323 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, B_UNIT}));
324 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_UNIT}));
325 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_UNIT}));
323 void testCompatibilityStartUnit() {
324 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, B_UNIT}));
325 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_UNIT}));
326 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_UNIT}));
326327 }
327328
328329 /**
331332 * A-Start, A-Last, B-Continue, B-Last => Fail
332333 */
333334 @Test
334 public void testCompatibilityContinueLast() {
335 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, B_LAST}));
336 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, B_LAST}));
337 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, B_LAST}));
335 void testCompatibilityContinueLast() {
336 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, B_LAST}));
337 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, B_LAST}));
338 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, B_LAST}));
338339 }
339340
340341 /**
343344 * A-Start, A-Last, B-Continue, Other => Fail
344345 */
345346 @Test
346 public void testCompatibilityContinueOther() {
347 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, OTHER}));
348 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, OTHER}));
349 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, OTHER}));
347 void testCompatibilityContinueOther() {
348 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, OTHER}));
349 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, OTHER}));
350 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, OTHER}));
350351 }
351352
352353 /**
355356 * A-Start, A-Last, B-Continue, B-Unit => Fail
356357 */
357358 @Test
358 public void testCompatibilityContinueUnit() {
359 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, B_UNIT}));
360 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, B_UNIT}));
361 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, B_UNIT}));
359 void testCompatibilityContinueUnit() {
360 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, B_UNIT}));
361 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, B_UNIT}));
362 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, B_UNIT}));
362363 }
363364
364365 /**
367368 * A-Start, A-Last, B-Last, Other => Fail
368369 */
369370 @Test
370 public void testCompatibilityLastOther() {
371 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST, OTHER}));
372 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST, OTHER}));
373 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST, OTHER}));
371 void testCompatibilityLastOther() {
372 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST, OTHER}));
373 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST, OTHER}));
374 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST, OTHER}));
374375 }
375376
376377 /**
379380 * A-Start, A-Last, B-Last, B-Unit => Fail
380381 */
381382 @Test
382 public void testCompatibilityLastUnit() {
383 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST, B_UNIT}));
384 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST, B_UNIT}));
385 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST, B_UNIT}));
383 void testCompatibilityLastUnit() {
384 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST, B_UNIT}));
385 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST, B_UNIT}));
386 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST, B_UNIT}));
386387 }
387388
388389 /**
391392 * A-Start, A-Last, Other, B-Unit => Pass
392393 */
393394 @Test
394 public void testCompatibilityOtherUnit() {
395 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {OTHER, B_UNIT}));
396 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, OTHER, B_UNIT}));
397 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, OTHER, B_UNIT}));
395 void testCompatibilityOtherUnit() {
396 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {OTHER, B_UNIT}));
397 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, OTHER, B_UNIT}));
398 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, OTHER, B_UNIT}));
398399 }
399400
400401 /**
401402 * Triples and triples in combination with other valid type (unit/start+last)
402 *
403 * <p>
403404 * B-Start, B-Continue, B-Last => Pass
404405 * A-Unit, B-Start, B-Continue, B-Last => Pass
405406 * A-Start, A-Last, B-Start, B-Continue, B-Last => Pass
406407 */
407408 @Test
408 public void testCompatibilityStartContinueLast() {
409 Assert.assertTrue(codec.areOutcomesCompatible(
409 void testCompatibilityStartContinueLast() {
410 Assertions.assertTrue(codec.areOutcomesCompatible(
410411 new String[] {B_START, B_CONTINUE, B_LAST}));
411 Assert.assertTrue(codec.areOutcomesCompatible(
412 Assertions.assertTrue(codec.areOutcomesCompatible(
412413 new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST}));
413 Assert.assertTrue(codec.areOutcomesCompatible(
414 Assertions.assertTrue(codec.areOutcomesCompatible(
414415 new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST}));
415416 }
416417
420421 * A-Start, A-Last, B-Start, B-Continue, Other => Fail
421422 */
422423 @Test
423 public void testCompatibilityStartContinueOther() {
424 Assert.assertFalse(codec.areOutcomesCompatible(
424 void testCompatibilityStartContinueOther() {
425 Assertions.assertFalse(codec.areOutcomesCompatible(
425426 new String[] {B_START, B_CONTINUE, OTHER}));
426 Assert.assertFalse(codec.areOutcomesCompatible(
427 Assertions.assertFalse(codec.areOutcomesCompatible(
427428 new String[] {A_UNIT, B_START, B_CONTINUE, OTHER}));
428 Assert.assertFalse(codec.areOutcomesCompatible(
429 Assertions.assertFalse(codec.areOutcomesCompatible(
429430 new String[] {A_START, A_LAST, B_START, B_CONTINUE, OTHER}));
430431 }
431432
435436 * A-Start, A-Last, B-Start, B-Continue, B-Unit => Fail
436437 */
437438 @Test
438 public void testCompatibilityStartContinueUnit() {
439 Assert.assertFalse(codec.areOutcomesCompatible(
439 void testCompatibilityStartContinueUnit() {
440 Assertions.assertFalse(codec.areOutcomesCompatible(
440441 new String[] {B_START, B_CONTINUE, B_UNIT}));
441 Assert.assertFalse(codec.areOutcomesCompatible(
442 Assertions.assertFalse(codec.areOutcomesCompatible(
442443 new String[] {A_UNIT, B_START, B_CONTINUE, B_UNIT}));
443 Assert.assertFalse(codec.areOutcomesCompatible(
444 Assertions.assertFalse(codec.areOutcomesCompatible(
444445 new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_UNIT}));
445446 }
446447
450451 * A-Start, A-Last, B-Continue, B-Last, Other => Fail
451452 */
452453 @Test
453 public void testCompatibilityContinueLastOther() {
454 Assert.assertFalse(codec.areOutcomesCompatible(
454 void testCompatibilityContinueLastOther() {
455 Assertions.assertFalse(codec.areOutcomesCompatible(
455456 new String[] {B_CONTINUE, B_LAST, OTHER}));
456 Assert.assertFalse(codec.areOutcomesCompatible(
457 Assertions.assertFalse(codec.areOutcomesCompatible(
457458 new String[] {A_UNIT, B_CONTINUE, B_LAST, OTHER}));
458 Assert.assertFalse(codec.areOutcomesCompatible(
459 Assertions.assertFalse(codec.areOutcomesCompatible(
459460 new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, OTHER}));
460461 }
461462
465466 * A-Start, A-Last, B-Continue, B-Last, B_Unit => Fail
466467 */
467468 @Test
468 public void testCompatibilityContinueLastUnit() {
469 Assert.assertFalse(codec.areOutcomesCompatible(
469 void testCompatibilityContinueLastUnit() {
470 Assertions.assertFalse(codec.areOutcomesCompatible(
470471 new String[] {B_CONTINUE, B_LAST, B_UNIT}));
471 Assert.assertFalse(codec.areOutcomesCompatible(
472 Assertions.assertFalse(codec.areOutcomesCompatible(
472473 new String[] {A_UNIT, B_CONTINUE, B_LAST, B_UNIT}));
473 Assert.assertFalse(codec.areOutcomesCompatible(
474 Assertions.assertFalse(codec.areOutcomesCompatible(
474475 new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, B_UNIT}));
475476 }
476477
480481 * A-Start, A-Last, B-Continue, B-Last, B_Unit => Fail
481482 */
482483 @Test
483 public void testCompatibilityLastOtherUnit() {
484 Assert.assertFalse(codec.areOutcomesCompatible(
484 void testCompatibilityLastOtherUnit() {
485 Assertions.assertFalse(codec.areOutcomesCompatible(
485486 new String[] {B_LAST, OTHER, B_UNIT}));
486 Assert.assertFalse(codec.areOutcomesCompatible(
487 Assertions.assertFalse(codec.areOutcomesCompatible(
487488 new String[] {A_UNIT, B_LAST, OTHER, B_UNIT}));
488 Assert.assertFalse(codec.areOutcomesCompatible(
489 Assertions.assertFalse(codec.areOutcomesCompatible(
489490 new String[] {A_START, A_LAST, B_LAST, OTHER, B_UNIT}));
490491 }
491492
492493 /**
493494 * Quadruples and quadruple in combination of unit/start+last
494 *
495 * <p>
495496 * B-Start, B-Continue, B-Last, Other => Pass
496497 * A-Unit, B-Start, B-Continue, B-Last, Other => Pass
497498 * A-Start, A-Last, B-Start, B-Continue, B-Last, Other => Pass
498499 */
499500 @Test
500 public void testCompatibilityStartContinueLastOther() {
501 Assert.assertTrue(codec.areOutcomesCompatible(
501 void testCompatibilityStartContinueLastOther() {
502 Assertions.assertTrue(codec.areOutcomesCompatible(
502503 new String[] {B_START, B_CONTINUE, B_LAST, OTHER}));
503 Assert.assertTrue(codec.areOutcomesCompatible(
504 Assertions.assertTrue(codec.areOutcomesCompatible(
504505 new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, OTHER}));
505 Assert.assertTrue(codec.areOutcomesCompatible(
506 Assertions.assertTrue(codec.areOutcomesCompatible(
506507 new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, OTHER}));
507508 }
508509
512513 * A-Start, A-Last, B-Start, B-Continue, B-Last, B-Unit => Pass
513514 */
514515 @Test
515 public void testCompatibilityStartContinueLastUnit() {
516 Assert.assertTrue(codec.areOutcomesCompatible(
516 void testCompatibilityStartContinueLastUnit() {
517 Assertions.assertTrue(codec.areOutcomesCompatible(
517518 new String[] {B_START, B_CONTINUE, B_LAST, B_UNIT}));
518 Assert.assertTrue(codec.areOutcomesCompatible(
519 Assertions.assertTrue(codec.areOutcomesCompatible(
519520 new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, B_UNIT}));
520 Assert.assertTrue(codec.areOutcomesCompatible(
521 Assertions.assertTrue(codec.areOutcomesCompatible(
521522 new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, B_UNIT}));
522523 }
523524
528529 * A-Start, A-Last, B-Continue, B-Last, Other, B-Unit => Fail
529530 */
530531 @Test
531 public void testCompatibilityContinueLastOtherUnit() {
532 Assert.assertFalse(codec.areOutcomesCompatible(
532 void testCompatibilityContinueLastOtherUnit() {
533 Assertions.assertFalse(codec.areOutcomesCompatible(
533534 new String[] {B_CONTINUE, B_LAST, OTHER, B_UNIT}));
534 Assert.assertFalse(codec.areOutcomesCompatible(
535 Assertions.assertFalse(codec.areOutcomesCompatible(
535536 new String[] {A_UNIT, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
536 Assert.assertFalse(codec.areOutcomesCompatible(
537 Assertions.assertFalse(codec.areOutcomesCompatible(
537538 new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
538539 }
539540
540541 /**
541542 * Quintuple
542 *
543 * <p>
543544 * B-Start, B-Continue, B-Last, Other, B-Unit => Pass
544545 * A-Unit, B-Start, B-Continue, B-Last, Other, B-Unit => Pass
545546 * A-Staart, A-Last, B-Start, B-Continue, B-Last, Other, B-Unit => Pass
546547 */
547548 @Test
548 public void testCompatibilityUnitOther() {
549 Assert.assertTrue(codec.areOutcomesCompatible(
549 void testCompatibilityUnitOther() {
550 Assertions.assertTrue(codec.areOutcomesCompatible(
550551 new String[] {B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
551 Assert.assertTrue(codec.areOutcomesCompatible(
552 Assertions.assertTrue(codec.areOutcomesCompatible(
552553 new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
553 Assert.assertTrue(codec.areOutcomesCompatible(
554 Assertions.assertTrue(codec.areOutcomesCompatible(
554555 new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
555556 }
556557
558559 * Multiclass
559560 */
560561 @Test
561 public void testCompatibilityMultiClass() {
562 Assert.assertTrue(codec.areOutcomesCompatible(
562 void testCompatibilityMultiClass() {
563 Assertions.assertTrue(codec.areOutcomesCompatible(
563564 new String[] {B_UNIT, A_CONTINUE, A_LAST, A_UNIT,
564565 B_START, B_LAST, A_START, C_UNIT, OTHER}));
565566 }
568569 * Bad combinations
569570 */
570571 @Test
571 public void testCompatibilityBadTag() {
572 Assert.assertFalse(codec.areOutcomesCompatible(
572 void testCompatibilityBadTag() {
573 Assertions.assertFalse(codec.areOutcomesCompatible(
573574 new String[] {A_START, A_CONTINUE, OTHER, "BAD"}));
574575 }
575576
576577 @Test
577 public void testCompatibilityWrongClass() {
578 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, B_LAST, OTHER}));
579 }
580
578 void testCompatibilityWrongClass() {
579 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, B_LAST, OTHER}));
580 }
581581
582582
583583 }
1515 */
1616 package opennlp.tools.namefind;
1717
18 import org.junit.Assert;
19 import org.junit.Test;
18 import org.junit.jupiter.api.Assertions;
19 import org.junit.jupiter.api.Test;
2020
2121 /**
2222 * This is the test class for {@link BilouNameFinderSequenceValidator}..
3939 private static String OTHER = BilouCodec.OTHER;
4040
4141 @Test
42 public void testStartAsFirstLabel() {
42 void testStartAsFirstLabel() {
4343 String outcome = START_A;
4444 String[] inputSequence = new String[] {"TypeA", "TypeA", "something"};
45 String[] outcomesSequence = new String[] { };
46 Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
47 }
48
49 @Test
50 public void testContinueAsFirstLabel() {
45 String[] outcomesSequence = new String[] {};
46 Assertions.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
47 }
48
49 @Test
50 void testContinueAsFirstLabel() {
5151 String outcome = CONTINUE_A;
5252 String[] inputSequence = new String[] {"TypeA", "something", "something"};
53 String[] outcomesSequence = new String[] { };
54 Assert.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
55 }
56
57 @Test
58 public void testLastAsFirstLabel() {
53 String[] outcomesSequence = new String[] {};
54 Assertions.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
55 }
56
57 @Test
58 void testLastAsFirstLabel() {
5959 String outcome = LAST_A;
6060 String[] inputSequence = new String[] {"TypeA", "something", "something"};
61 String[] outcomesSequence = new String[] { };
62 Assert.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
63 }
64
65 @Test
66 public void testUnitAsFirstLabel() {
61 String[] outcomesSequence = new String[] {};
62 Assertions.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
63 }
64
65 @Test
66 void testUnitAsFirstLabel() {
6767 String outcome = UNIT_A;
6868 String[] inputSequence = new String[] {"TypeA", "something", "something"};
69 String[] outcomesSequence = new String[] { };
70 Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
71 }
72
73 @Test
74 public void testOtherAsFirstLabel() {
69 String[] outcomesSequence = new String[] {};
70 Assertions.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
71 }
72
73 @Test
74 void testOtherAsFirstLabel() {
7575 String outcome = OTHER;
7676 String[] inputSequence = new String[] {"something", "TypeA", "something"};
77 String[] outcomesSequence = new String[] { };
78 Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
77 String[] outcomesSequence = new String[] {};
78 Assertions.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
7979 }
8080
8181 /**
8282 * Start, Any Start => Invalid
8383 */
8484 @Test
85 public void testBeginFollowedByBegin() {
85 void testBeginFollowedByBegin() {
8686
8787 String[] outcomesSequence = new String[] {START_A};
8888
8989 // Same Types
9090 String outcome = START_A;
9191 String[] inputSequence = new String[] {"TypeA", "TypeA", "something"};
92 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
92 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
9393
9494 // Diff. Types
9595 outcome = START_B;
9696 inputSequence = new String[] {"TypeA", "TypeB", "something"};
97 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
97 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
9898 }
9999
100100 /**
102102 * Start, Continue, Diff. Type => Invalid
103103 */
104104 @Test
105 public void testBeginFollowedByContinue() {
105 void testBeginFollowedByContinue() {
106106
107107 String[] outcomesSequence = new String[] {START_A};
108108
109109 // Same Types
110110 String outcome = CONTINUE_A;
111111 String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "something"};
112 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
112 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
113113
114114 // Different Types
115115 outcome = CONTINUE_B;
116116 inputSequence = new String[] {"TypeA", "TypeB", "TypeB", "something"};
117 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
117 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
118118 }
119119
120120 /**
122122 * Start, Last, Diff. Type => Invalid
123123 */
124124 @Test
125 public void testStartFollowedByLast() {
125 void testStartFollowedByLast() {
126126
127127 String[] outcomesSequence = new String[] {START_A};
128128
129129 // Same Type
130130 String outcome = LAST_A;
131131 String[] inputSequence = new String[] {"TypeA", "TypeA", "something"};
132 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
132 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
133133
134134 // Diff. Types
135135 outcome = LAST_B;
136136 inputSequence = new String[] {"TypeA", "TypeB", "something"};
137 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
137 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
138138 }
139139
140140 /**
141141 * Start, Other => Invalid
142142 */
143143 @Test
144 public void testStartFollowedByOther() {
144 void testStartFollowedByOther() {
145145 String outcome = OTHER;
146146 String[] inputSequence = new String[] {"TypeA", "something", "something"};
147147 String[] outcomesSequence = new String[] {START_A};
148 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
149 }
150
151 /**
152 * Start, Unit => Invalid
153 */
154 @Test
155 public void testStartFollowedByUnit() {
148 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
149 }
150
151 /**
152 * Start, Unit => Invalid
153 */
154 @Test
155 void testStartFollowedByUnit() {
156156 String outcome = UNIT_A;
157157 String[] inputSequence = new String[] {"TypeA", "AnyType", "something"};
158158 String[] outcomesSequence = new String[] {START_A};
159 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
159 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
160160 }
161161
162162 /**
163163 * Continue, Any Begin => Invalid
164164 */
165165 @Test
166 public void testContinueFollowedByStart() {
166 void testContinueFollowedByStart() {
167167
168168 String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
169169
170170 // Same Types
171171 String outcome = START_A;
172172 String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "something"};
173 Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
173 Assertions.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
174174
175175 // Diff. Types
176176 outcome = START_B;
177177 inputSequence = new String[] {"TypeA", "TypeA", "TypeB", "something"};
178 Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
178 Assertions.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
179179
180180 }
181181
184184 * Continue, Continue, Diff. Type => Invalid
185185 */
186186 @Test
187 public void testContinueFollowedByContinue() {
187 void testContinueFollowedByContinue() {
188188
189189 String[] outcomesSequence = new String[] {START_A, CONTINUE_A, CONTINUE_A};
190190
191191 // Same Types
192192 String outcome = CONTINUE_A;
193193 String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeA", "something"};
194 Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
194 Assertions.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
195195
196196 // Different Types
197197 outcome = CONTINUE_B;
198198 inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeB", "something"};
199 Assert.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
199 Assertions.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
200200 }
201201
202202 /**
204204 * Continue, Last, Diff. Type => Invalid
205205 */
206206 @Test
207 public void testContinueFollowedByLast() {
207 void testContinueFollowedByLast() {
208208
209209 String[] outcomesSequence = new String[] {OTHER, START_A, CONTINUE_A};
210210
211211 // Same Types
212212 String outcome = LAST_A;
213213 String[] inputSequence = new String[] {"something", "TypeA", "TypeA", "TypeA", "something"};
214 Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
214 Assertions.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
215215
216216 // Different Types
217217 outcome = LAST_B;
218218 inputSequence = new String[] {"something", "TypeA", "TypeA", "TypeB", "something"};
219 Assert.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
219 Assertions.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
220220 }
221221
222222 /**
223223 * Continue, Other => Invalid
224224 */
225225 @Test
226 public void testContinueFollowedByOther() {
226 void testContinueFollowedByOther() {
227227 String outcome = OTHER;
228228 String[] inputSequence = new String[] {"TypeA", "TypeA", "something", "something"};
229229 String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
230 Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
230 Assertions.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
231231 }
232232
233233 /**
234234 * Continue, Unit => Invalid
235235 */
236236 @Test
237 public void testContinueFollowedByUnit() {
237 void testContinueFollowedByUnit() {
238238 String outcome = UNIT_A;
239239 String[] inputSequence = new String[] {"TypeA", "TypeA", "AnyType", "something"};
240240 String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
241 Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
241 Assertions.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
242242 }
243243
244244 /**
245245 * Last, Any Start => Valid
246246 */
247247 @Test
248 public void testLastFollowedByStart() {
248 void testLastFollowedByStart() {
249249
250250 String[] outcomesSequence = new String[] {START_A, CONTINUE_A, LAST_A};
251251
252252 // Same Types
253253 String outcome = START_A;
254254 String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeA", "TypeA"};
255 Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
255 Assertions.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
256256
257257 // Same Types
258258 outcome = START_B;
259259 inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeB", "TypeB"};
260 Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
261 }
262
263 /**
264 * Last, Any Continue => Invalid
265 */
266 @Test
267 public void testLastFollowedByContinue() {
260 Assertions.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
261 }
262
263 /**
264 * Last, Any Continue => Invalid
265 */
266 @Test
267 void testLastFollowedByContinue() {
268268
269269 String[] outcomesSequence = new String[] {START_A, CONTINUE_A, LAST_A};
270270
271271 String outcome = CONTINUE_A;
272272 String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeA", "something"};
273 Assert.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
273 Assertions.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
274274
275275 // Diff. Types
276276 outcome = CONTINUE_B;
277277 inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeB", "something"};
278 Assert.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
279 }
280
281 /**
282 * Last, Any Last => Invalid
283 */
284 @Test
285 public void testLastFollowedByLast() {
278 Assertions.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
279 }
280
281 /**
282 * Last, Any Last => Invalid
283 */
284 @Test
285 void testLastFollowedByLast() {
286286
287287 String[] outcomesSequence = new String[] {OTHER, OTHER, START_A, CONTINUE_A, LAST_A};
288288
290290 String outcome = LAST_A;
291291 String[] inputSequence = new String[] {"something", "something", "TypeA",
292292 "TypeA", "TypeA", "TypeA", "something"};
293 Assert.assertFalse(validator.validSequence(5, inputSequence, outcomesSequence, outcome));
293 Assertions.assertFalse(validator.validSequence(5, inputSequence, outcomesSequence, outcome));
294294
295295 // Diff. Types
296296 outcome = LAST_B;
297297 inputSequence = new String[] {"something", "something", "TypeA", "TypeA",
298298 "TypeA", "TypeB", "something"};
299 Assert.assertFalse(validator.validSequence(5, inputSequence, outcomesSequence, outcome));
299 Assertions.assertFalse(validator.validSequence(5, inputSequence, outcomesSequence, outcome));
300300 }
301301
302302 /**
303303 * Last, Other => Valid
304304 */
305305 @Test
306 public void testLastFollowedByOther() {
306 void testLastFollowedByOther() {
307307 String outcome = OTHER;
308308 String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "something", "something"};
309309 String[] outcomesSequence = new String[] {START_A, CONTINUE_A, LAST_A};
310 Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
310 Assertions.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
311311 }
312312
313313 /**
314314 * Last, Unit => Valid
315315 */
316316 @Test
317 public void testLastFollowedByUnit() {
317 void testLastFollowedByUnit() {
318318 String outcome = UNIT_A;
319319 String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "AnyType", "something"};
320320 String[] outcomesSequence = new String[] {START_A, CONTINUE_A, LAST_A};
321 Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
321 Assertions.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
322322 }
323323
324324 /**
325325 * Other, Any Start => Valid
326326 */
327327 @Test
328 public void testOtherFollowedByBegin() {
328 void testOtherFollowedByBegin() {
329329 String outcome = START_A;
330330 String[] inputSequence = new String[] {"something", "TypeA", "TypeA"};
331331 String[] outcomesSequence = new String[] {OTHER};
332 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
332 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
333333 }
334334
335335 /**
336336 * Other, Any Continue => Invalid
337337 */
338338 @Test
339 public void testOtherFollowedByContinue() {
339 void testOtherFollowedByContinue() {
340340 String outcome = CONTINUE_A;
341341 String[] inputSequence = new String[] {"something", "TypeA", "TypeA"};
342342 String[] outcomesSequence = new String[] {OTHER};
343 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
343 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
344344 }
345345
346346 /**
347347 * Other, Any Last => Invalid
348348 */
349349 @Test
350 public void testOtherFollowedByLast() {
350 void testOtherFollowedByLast() {
351351 String outcome = LAST_A;
352352 String[] inputSequence = new String[] {"something", "TypeA", "TypeA"};
353353 String[] outcomesSequence = new String[] {OTHER};
354 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
354 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
355355 }
356356
357357 /**
358358 * Outside, Unit => Valid
359359 */
360360 @Test
361 public void testOtherFollowedByUnit() {
361 void testOtherFollowedByUnit() {
362362 String outcome = UNIT_A;
363363 String[] inputSequence = new String[] {"something", "AnyType", "something"};
364364 String[] outcomesSequence = new String[] {OTHER};
365 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
365 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
366366 }
367367
368368 /**
369369 * Other, Other => Valid
370370 */
371371 @Test
372 public void testOutsideFollowedByOutside() {
372 void testOutsideFollowedByOutside() {
373373 String outcome = OTHER;
374374 String[] inputSequence = new String[] {"something", "something", "something"};
375375 String[] outcomesSequence = new String[] {OTHER};
376 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
376 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
377377 }
378378
379379 /**
380380 * Unit, Any Start => Valid
381381 */
382382 @Test
383 public void testUnitFollowedByBegin() {
383 void testUnitFollowedByBegin() {
384384 String outcome = START_A;
385385 String[] inputSequence = new String[] {"AnyType", "TypeA", "something"};
386386 String[] outcomesSequence = new String[] {UNIT_A};
387 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
387 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
388388 }
389389
390390 /**
391391 * Unit, Any Continue => Invalid
392392 */
393393 @Test
394 public void testUnitFollowedByInside() {
394 void testUnitFollowedByInside() {
395395 String outcome = CONTINUE_A;
396396 String[] inputSequence = new String[] {"TypeA", "TypeA", "something"};
397397 String[] outcomesSequence = new String[] {UNIT_A};
398 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
398 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
399399 }
400400
401401 /**
402402 * Unit, Any Last => Invalid
403403 */
404404 @Test
405 public void testUnitFollowedByLast() {
405 void testUnitFollowedByLast() {
406406 String outcome = LAST_A;
407407 String[] inputSequence = new String[] {"AnyType", "TypeA", "something"};
408408 String[] outcomesSequence = new String[] {UNIT_A};
409 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
409 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
410410 }
411411
412412 /**
413413 * Unit, Other => Valid
414414 */
415415 @Test
416 public void testUnitFollowedByOutside() {
416 void testUnitFollowedByOutside() {
417417 String outcome = OTHER;
418418 String[] inputSequence = new String[] {"TypeA", "something", "something"};
419419 String[] outcomesSequence = new String[] {UNIT_A};
420 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
420 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
421421 }
422422
423423 /**
424424 * Unit, Unit => Valid
425425 */
426426 @Test
427 public void testUnitFollowedByUnit() {
427 void testUnitFollowedByUnit() {
428428 String outcome = UNIT_A;
429429 String[] inputSequence = new String[] {"AnyType", "AnyType", "something"};
430430 String[] outcomesSequence = new String[] {UNIT_A};
431 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
431 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
432432 }
433433
434434 }
2020 import java.util.Arrays;
2121 import java.util.List;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.util.Span;
2727
4646 private static final String OTHER = BioCodec.OTHER;
4747
4848 @Test
49 public void testEncodeNoNames() {
49 void testEncodeNoNames() {
5050 NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true);
51 String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER};
52 String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
53 Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual);
54 }
55
56 @Test
57 public void testEncodeSingleTokenSpan() {
51 String[] expected = new String[] {OTHER, OTHER, OTHER, OTHER};
52 String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
53 Assertions.assertArrayEquals(expected, actual, "Only 'Other' is expected.");
54 }
55
56 @Test
57 void testEncodeSingleTokenSpan() {
5858 String[] sentence = "I called Julie again.".split(" ");
59 Span[] spans = new Span[] { new Span(2,3, A_TYPE)};
59 Span[] spans = new Span[] {new Span(2, 3, A_TYPE)};
6060 NameSample nameSample = new NameSample(sentence, spans, true);
6161 String[] expected = new String[] {OTHER, OTHER, A_START, OTHER};
6262 String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
63 Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should be 'other'.", expected, actual);
64 }
65
66 @Test
67 public void testEncodeDoubleTokenSpan() {
63 Assertions.assertArrayEquals(expected, actual,
64 "'Julie' should be 'start' only, the rest should be 'other'.");
65 }
66
67 @Test
68 void testEncodeDoubleTokenSpan() {
6869 String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
69 Span[] span = new Span[] { new Span(2,4, A_TYPE)};
70 Span[] span = new Span[] {new Span(2, 4, A_TYPE)};
7071 NameSample nameSample = new NameSample(sentence, span, true);
7172 String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER};
7273 String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
73 Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " +
74 "'continue' and the rest should be 'other'.", expected, actual);
75 }
76
77 @Test
78 public void testEncodeDoubleTokenSpanNoType() {
74 Assertions.assertArrayEquals(expected, actual, "'Stefanie' should be 'start' only, 'Schmidt' is " +
75 "'continue' and the rest should be 'other'.");
76 }
77
78 @Test
79 void testEncodeDoubleTokenSpanNoType() {
7980 final String DEFAULT_START = "default" + "-" + BioCodec.START;
8081 final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE;
8182 String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
82 Span[] span = new Span[] { new Span(2,4, null)};
83 Span[] span = new Span[] {new Span(2, 4, null)};
8384 NameSample nameSample = new NameSample(sentence, span, true);
8485 String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, DEFAULT_CONTINUE, OTHER};
8586 String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
86 Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " +
87 "'continue' and the rest should be 'other'.", expected, actual);
88 }
89
90 @Test
91 public void testEncodeAdjacentSingleSpans() {
87 Assertions.assertArrayEquals(expected, actual, "'Stefanie' should be 'start' only, 'Schmidt' is " +
88 "'continue' and the rest should be 'other'.");
89 }
90
91 @Test
92 void testEncodeAdjacentSingleSpans() {
9293 String[] sentence = "something PersonA PersonB Something".split(" ");
93 Span[] span = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE) };
94 Span[] span = new Span[] {new Span(1, 2, A_TYPE), new Span(2, 3, A_TYPE)};
9495 NameSample nameSample = new NameSample(sentence, span, true);
9596 String[] expected = new String[] {OTHER, A_START, A_START, OTHER};
9697 String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
97 Assert.assertArrayEquals(expected, actual);
98 }
99
100 @Test
101 public void testEncodeAdjacentSpans() {
98 Assertions.assertArrayEquals(expected, actual);
99 }
100
101 @Test
102 void testEncodeAdjacentSpans() {
102103 String[] sentence = "something PersonA PersonA PersonB Something".split(" ");
103 Span[] span = new Span[] { new Span(1,3, A_TYPE), new Span(3, 4, A_TYPE) };
104 Span[] span = new Span[] {new Span(1, 3, A_TYPE), new Span(3, 4, A_TYPE)};
104105 NameSample nameSample = new NameSample(sentence, span, true);
105106 String[] expected = new String[] {OTHER, A_START, A_CONTINUE, A_START, OTHER};
106107 String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
107 Assert.assertArrayEquals(expected, actual);
108 }
109
110 @Test
111 public void testCreateSequenceValidator() {
112 Assert.assertTrue(codec.createSequenceValidator() instanceof NameFinderSequenceValidator);
113 }
114
115
116 @Test
117 public void testDecodeEmpty() {
108 Assertions.assertArrayEquals(expected, actual);
109 }
110
111 @Test
112 void testCreateSequenceValidator() {
113 Assertions.assertTrue(codec.createSequenceValidator() instanceof NameFinderSequenceValidator);
114 }
115
116
117 @Test
118 void testDecodeEmpty() {
118119 Span[] expected = new Span[] {};
119120 Span[] actual = codec.decode(new ArrayList<String>());
120 Assert.assertArrayEquals(expected, actual);
121 }
121 Assertions.assertArrayEquals(expected, actual);
122 }
123
122124 /**
123125 * Start, Other
124126 */
125127 @Test
126 public void testDecodeSingletonFirst() {
128 void testDecodeSingletonFirst() {
127129
128130 List<String> encoded = Arrays.asList(B_START, OTHER);
129131 Span[] expected = new Span[] {new Span(0, 1, B_TYPE)};
130132 Span[] actual = codec.decode(encoded);
131 Assert.assertArrayEquals(expected, actual);
133 Assertions.assertArrayEquals(expected, actual);
132134 }
133135
134136 /**
135137 * Start Start Other
136138 */
137139 @Test
138 public void testDecodeAdjacentSingletonFirst() {
140 void testDecodeAdjacentSingletonFirst() {
139141 List<String> encoded = Arrays.asList(B_START, B_START, OTHER);
140142 Span[] expected = new Span[] {new Span(0, 1, B_TYPE), new Span(1, 2, B_TYPE)};
141143 Span[] actual = codec.decode(encoded);
142 Assert.assertArrayEquals(expected, actual);
144 Assertions.assertArrayEquals(expected, actual);
143145 }
144146
145147 /**
146148 * Start Continue Other
147149 */
148150 @Test
149 public void testDecodePairFirst() {
151 void testDecodePairFirst() {
150152 List<String> encoded = Arrays.asList(B_START, B_CONTINUE, OTHER);
151153 Span[] expected = new Span[] {new Span(0, 2, B_TYPE)};
152154 Span[] actual = codec.decode(encoded);
153 Assert.assertArrayEquals(expected, actual);
155 Assertions.assertArrayEquals(expected, actual);
154156 }
155157
156158 /**
157159 * Start Continue Continue Other
158160 */
159161 @Test
160 public void testDecodeTripletFirst() {
162 void testDecodeTripletFirst() {
161163 List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_CONTINUE, OTHER);
162164 Span[] expected = new Span[] {new Span(0, 3, B_TYPE)};
163165 Span[] actual = codec.decode(encoded);
164 Assert.assertArrayEquals(expected, actual);
166 Assertions.assertArrayEquals(expected, actual);
165167 }
166168
167169 /**
168170 * Start Continue Start Other
169171 */
170172 @Test
171 public void testDecodeAdjacentPairSingleton() {
173 void testDecodeAdjacentPairSingleton() {
172174 List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_START, OTHER);
173175 Span[] expected = new Span[] {new Span(0, 2, B_TYPE), new Span(2, 3, B_TYPE)};
174176 Span[] actual = codec.decode(encoded);
175 Assert.assertArrayEquals(expected, actual);
177 Assertions.assertArrayEquals(expected, actual);
176178 }
177179
178180 /**
179181 * Other Start Other
180182 */
181183 @Test
182 public void testDecodeOtherFirst() {
184 void testDecodeOtherFirst() {
183185 List<String> encoded = Arrays.asList(OTHER, B_START, OTHER);
184186 Span[] expected = new Span[] {new Span(1, 2, B_TYPE)};
185187 Span[] actual = codec.decode(encoded);
186 Assert.assertArrayEquals(expected, actual);
188 Assertions.assertArrayEquals(expected, actual);
187189 }
188190
189191 /**
190192 * A-Start A-Continue, A-Continue, Other, B-Start, B-Continue, Other, C-Start, Other
191193 */
192194 @Test
193 public void testDecodeMultiClass() {
195 void testDecodeMultiClass() {
194196 List<String> encoded = Arrays.asList(OTHER, A_START, A_CONTINUE, A_CONTINUE,
195197 OTHER, B_START, B_CONTINUE, OTHER, C_START, OTHER);
196198 Span[] expected = new Span[] {new Span(1, 4, A_TYPE),
197199 new Span(5, 7, B_TYPE), new Span(8, 9, C_TYPE)};
198200 Span[] actual = codec.decode(encoded);
199 Assert.assertArrayEquals(expected, actual);
200 }
201
202 @Test
203 public void testCompatibilityEmpty() {
204 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {}));
205 }
206
207 @Test
208 public void testCompatibilitySingleStart() {
209 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START}));
210 }
211
212 @Test
213 public void testCompatibilitySingleContinue() {
214 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE}));
215 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE}));
216 }
217
218 @Test
219 public void testCompatibilitySingleOther() {
220 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER}));
221 }
222
223 @Test
224 public void testCompatibilityStartContinue() {
225 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE}));
226 }
227
228 @Test
229 public void testCompatibilityStartOther() {
230 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, OTHER}));
231 }
232
233 @Test
234 public void testCompatibilityContinueOther() {
235 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE, OTHER}));
236 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE, OTHER}));
237 }
238
239 @Test
240 public void testCompatibilityStartContinueOther() {
241 Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, OTHER}));
242 }
243
244
245 @Test
246 public void testCompatibilityMultiClass() {
247 Assert.assertTrue(codec.areOutcomesCompatible(
201 Assertions.assertArrayEquals(expected, actual);
202 }
203
204 @Test
205 void testCompatibilityEmpty() {
206 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {}));
207 }
208
209 @Test
210 void testCompatibilitySingleStart() {
211 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_START}));
212 }
213
214 @Test
215 void testCompatibilitySingleContinue() {
216 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE}));
217 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE}));
218 }
219
220 @Test
221 void testCompatibilitySingleOther() {
222 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER}));
223 }
224
225 @Test
226 void testCompatibilityStartContinue() {
227 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE}));
228 }
229
230 @Test
231 void testCompatibilityStartOther() {
232 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, OTHER}));
233 }
234
235 @Test
236 void testCompatibilityContinueOther() {
237 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE, OTHER}));
238 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE, OTHER}));
239 }
240
241 @Test
242 void testCompatibilityStartContinueOther() {
243 Assertions.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, OTHER}));
244 }
245
246
247 @Test
248 void testCompatibilityMultiClass() {
249 Assertions.assertTrue(codec.areOutcomesCompatible(
248250 new String[] {A_START, A_CONTINUE, B_START, OTHER}));
249251 }
250252
251253 @Test
252 public void testCompatibilityBadTag() {
253 Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, "BAD"}));
254 }
255
256 @Test
257 public void testCompatibilityRepeated() {
258 Assert.assertTrue(codec.areOutcomesCompatible(
254 void testCompatibilityBadTag() {
255 Assertions.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, "BAD"}));
256 }
257
258 @Test
259 void testCompatibilityRepeated() {
260 Assertions.assertTrue(codec.areOutcomesCompatible(
259261 new String[] {A_START, A_START, A_CONTINUE, A_CONTINUE, B_START, B_START, OTHER, OTHER}));
260262 }
261263
2222 import java.util.ArrayList;
2323 import java.util.List;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.cmdline.namefind.NameEvaluationErrorListener;
2929 import opennlp.tools.dictionary.Dictionary;
4141 public class DictionaryNameFinderEvaluatorTest {
4242
4343 @Test
44 public void testEvaluator() throws IOException, URISyntaxException {
44 void testEvaluator() throws IOException, URISyntaxException {
4545 DictionaryNameFinder nameFinder = new DictionaryNameFinder(
4646 createDictionary());
4747 TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(
5252 sample.close();
5353 FMeasure fmeasure = evaluator.getFMeasure();
5454
55 Assert.assertTrue(fmeasure.getFMeasure() == 1);
56 Assert.assertTrue(fmeasure.getRecallScore() == 1);
55 Assertions.assertTrue(fmeasure.getFMeasure() == 1);
56 Assertions.assertTrue(fmeasure.getRecallScore() == 1);
5757 }
5858
5959 /**
6161 *
6262 * @return
6363 * @throws IOException
64 * @throws URISyntaxException
6564 */
66 private static ObjectStream<NameSample> createSample() throws IOException,
67 URISyntaxException {
65 private static ObjectStream<NameSample> createSample() throws IOException {
6866
6967 InputStreamFactory in = new ResourceAsStreamFactory(
7068 DictionaryNameFinderEvaluatorTest.class,
1616
1717 package opennlp.tools.namefind;
1818
19 import org.junit.Assert;
20 import org.junit.Before;
21 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.BeforeEach;
21 import org.junit.jupiter.api.Test;
2222
2323 import opennlp.tools.dictionary.Dictionary;
2424 import opennlp.tools.tokenize.SimpleTokenizer;
2626 import opennlp.tools.util.StringList;
2727
2828 /**
29 *Tests for the {@link DictionaryNameFinder} class.
30 */
29 * Tests for the {@link DictionaryNameFinder} class.
30 */
3131 public class DictionaryNameFinderTest {
3232
3333 private Dictionary mDictionary = new Dictionary();
3535
3636 public DictionaryNameFinderTest() {
3737
38 StringList vanessa = new StringList(new String[]{"Vanessa"});
38 StringList vanessa = new StringList(new String[] {"Vanessa"});
3939 mDictionary.put(vanessa);
4040
4141 StringList vanessaWilliams = new StringList("Vanessa", "Williams");
4242 mDictionary.put(vanessaWilliams);
4343
44 StringList max = new StringList(new String[]{"Max"});
44 StringList max = new StringList(new String[] {"Max"});
4545 mDictionary.put(max);
4646
4747 StringList michaelJordan = new
4949 mDictionary.put(michaelJordan);
5050 }
5151
52 @Before
53 public void setUp() throws Exception {
52 @BeforeEach
53 void setUp() {
5454 mNameFinder = new DictionaryNameFinder(mDictionary);
5555 }
5656
5757 @Test
58 public void testSingleTokeNameAtSentenceStart() {
58 void testSingleTokeNameAtSentenceStart() {
5959 String sentence = "Max a b c d";
6060 SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
6161 String[] tokens = tokenizer.tokenize(sentence);
6262 Span[] names = mNameFinder.find(tokens);
63 Assert.assertTrue(names.length == 1);
64 Assert.assertTrue(names[0].getStart() == 0 && names[0].getEnd() == 1);
63 Assertions.assertTrue(names.length == 1);
64 Assertions.assertTrue(names[0].getStart() == 0 && names[0].getEnd() == 1);
6565 }
6666
6767 @Test
68 public void testSingleTokeNameInsideSentence() {
68 void testSingleTokeNameInsideSentence() {
6969 String sentence = "a b Max c d";
7070 SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
7171 String[] tokens = tokenizer.tokenize(sentence);
7272 Span[] names = mNameFinder.find(tokens);
73 Assert.assertTrue(names.length == 1);
74 Assert.assertTrue(names[0].getStart() == 2 && names[0].getEnd() == 3);
73 Assertions.assertTrue(names.length == 1);
74 Assertions.assertTrue(names[0].getStart() == 2 && names[0].getEnd() == 3);
7575 }
7676
7777 @Test
78 public void testSingleTokeNameAtSentenceEnd() {
78 void testSingleTokeNameAtSentenceEnd() {
7979 String sentence = "a b c Max";
8080
8181 SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
8282 String[] tokens = tokenizer.tokenize(sentence);
8383 Span[] names = mNameFinder.find(tokens);
84 Assert.assertTrue(names.length == 1);
85 Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
84 Assertions.assertTrue(names.length == 1);
85 Assertions.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
8686 }
8787
8888 @Test
89 public void testLastMatchingTokenNameIsChoosen() {
89 void testLastMatchingTokenNameIsChoosen() {
9090 String[] sentence = {"a", "b", "c", "Vanessa"};
9191 Span[] names = mNameFinder.find(sentence);
92 Assert.assertTrue(names.length == 1);
93 Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
92 Assertions.assertTrue(names.length == 1);
93 Assertions.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
9494 }
9595
9696 @Test
97 public void testLongerTokenNameIsPreferred() {
97 void testLongerTokenNameIsPreferred() {
9898 String[] sentence = {"a", "b", "c", "Vanessa", "Williams"};
9999 Span[] names = mNameFinder.find(sentence);
100 Assert.assertTrue(names.length == 1);
101 Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
100 Assertions.assertTrue(names.length == 1);
101 Assertions.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
102102 }
103103
104104 @Test
105 public void testCaseSensitivity() {
105 void testCaseSensitivity() {
106106 String[] sentence = {"a", "b", "c", "vanessa", "williams"};
107107 Span[] names = mNameFinder.find(sentence);
108 Assert.assertTrue(names.length == 1);
109 Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
108 Assertions.assertTrue(names.length == 1);
109 Assertions.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
110110 }
111111
112112 @Test
113 public void testCaseLongerEntry() {
113 void testCaseLongerEntry() {
114114 String[] sentence = {"a", "b", "michael", "jordan"};
115115 Span[] names = mNameFinder.find(sentence);
116 Assert.assertTrue(names.length == 1);
117 Assert.assertTrue(names[0].length() == 2);
116 Assertions.assertTrue(names.length == 1);
117 Assertions.assertTrue(names[0].length() == 2);
118118 }
119119 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.ml.model.Event;
2525 import opennlp.tools.util.ObjectStream;
3737 "."};
3838
3939 private static final NameContextGenerator CG = new DefaultNameContextGenerator(
40 (AdaptiveFeatureGenerator[]) null);
40 (AdaptiveFeatureGenerator[]) null);
4141
4242 /**
4343 * Tests the correctly generated outcomes for a test sentence.
4444 */
4545 @Test
46 public void testOutcomesForSingleTypeSentence() throws IOException {
46 void testOutcomesForSingleTypeSentence() throws IOException {
4747
4848 NameSample nameSample = new NameSample(SENTENCE,
49 new Span[]{new Span(0, 2, "person")}, false);
49 new Span[] {new Span(0, 2, "person")}, false);
5050
5151 try (ObjectStream<Event> eventStream = new NameFinderEventStream(
5252 ObjectStreamUtils.createObjectStream(nameSample))) {
5353
54 Assert.assertEquals("person-" + NameFinderME.START, eventStream.read().getOutcome());
55 Assert.assertEquals("person-" + NameFinderME.CONTINUE, eventStream.read().getOutcome());
54 Assertions.assertEquals("person-" + NameFinderME.START, eventStream.read().getOutcome());
55 Assertions.assertEquals("person-" + NameFinderME.CONTINUE, eventStream.read().getOutcome());
5656
5757 for (int i = 0; i < 10; i++) {
58 Assert.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome());
58 Assertions.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome());
5959 }
6060
61 Assert.assertNull(eventStream.read());
61 Assertions.assertNull(eventStream.read());
6262 }
6363 }
6464
6868 * declares its type, passing the type to event stream has no effect
6969 */
7070 @Test
71 public void testOutcomesTypeCantOverride() throws IOException {
71 void testOutcomesTypeCantOverride() throws IOException {
7272 String type = "XYZ";
7373
7474 NameSample nameSample = new NameSample(SENTENCE,
75 new Span[] { new Span(0, 2, "person") }, false);
75 new Span[] {new Span(0, 2, "person")}, false);
7676
7777 ObjectStream<Event> eventStream = new NameFinderEventStream(
78 ObjectStreamUtils.createObjectStream(nameSample), type, CG, null);
78 ObjectStreamUtils.createObjectStream(nameSample), type, CG, null);
7979
8080 String prefix = type + "-";
81 Assert.assertEquals(prefix + NameFinderME.START, eventStream.read().getOutcome());
82 Assert.assertEquals(prefix + NameFinderME.CONTINUE,
83 eventStream.read().getOutcome());
81 Assertions.assertEquals(prefix + NameFinderME.START, eventStream.read().getOutcome());
82 Assertions.assertEquals(prefix + NameFinderME.CONTINUE,
83 eventStream.read().getOutcome());
8484
8585 for (int i = 0; i < 10; i++) {
86 Assert.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome());
86 Assertions.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome());
8787 }
8888
89 Assert.assertNull(eventStream.read());
89 Assertions.assertNull(eventStream.read());
9090 eventStream.close();
9191 }
9292
9696 * user
9797 */
9898 @Test
99 public void testOutcomesWithType() throws IOException {
99 void testOutcomesWithType() throws IOException {
100100 String type = "XYZ";
101101
102102 NameSample nameSample = new NameSample(SENTENCE,
103 new Span[] { new Span(0, 2) }, false);
103 new Span[] {new Span(0, 2)}, false);
104104
105105 ObjectStream<Event> eventStream = new NameFinderEventStream(
106 ObjectStreamUtils.createObjectStream(nameSample), type, CG, null);
106 ObjectStreamUtils.createObjectStream(nameSample), type, CG, null);
107107
108108 String prefix = type + "-";
109 Assert.assertEquals(prefix + NameFinderME.START, eventStream.read().getOutcome());
110 Assert.assertEquals(prefix + NameFinderME.CONTINUE,
111 eventStream.read().getOutcome());
109 Assertions.assertEquals(prefix + NameFinderME.START, eventStream.read().getOutcome());
110 Assertions.assertEquals(prefix + NameFinderME.CONTINUE,
111 eventStream.read().getOutcome());
112112
113113 for (int i = 0; i < 10; i++) {
114 Assert.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome());
114 Assertions.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome());
115115 }
116116
117 Assert.assertNull(eventStream.read());
117 Assertions.assertNull(eventStream.read());
118118 eventStream.close();
119119 }
120120
124124 * "default".
125125 */
126126 @Test
127 public void testOutcomesTypeEmpty() throws IOException {
127 void testOutcomesTypeEmpty() throws IOException {
128128
129129 NameSample nameSample = new NameSample(SENTENCE,
130 new Span[] { new Span(0, 2) }, false);
130 new Span[] {new Span(0, 2)}, false);
131131
132132 ObjectStream<Event> eventStream = new NameFinderEventStream(
133 ObjectStreamUtils.createObjectStream(nameSample), null, CG, null);
133 ObjectStreamUtils.createObjectStream(nameSample), null, CG, null);
134134
135135 String prefix = "default-";
136 Assert.assertEquals(prefix + NameFinderME.START, eventStream.read().getOutcome());
137 Assert.assertEquals(prefix + NameFinderME.CONTINUE,
138 eventStream.read().getOutcome());
136 Assertions.assertEquals(prefix + NameFinderME.START, eventStream.read().getOutcome());
137 Assertions.assertEquals(prefix + NameFinderME.CONTINUE,
138 eventStream.read().getOutcome());
139139
140140 for (int i = 0; i < 10; i++) {
141 Assert.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome());
141 Assertions.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome());
142142 }
143143
144 Assert.assertNull(eventStream.read());
144 Assertions.assertNull(eventStream.read());
145145 eventStream.close();
146146 }
147147 }
2020 import java.nio.charset.StandardCharsets;
2121 import java.util.Collections;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.ml.model.SequenceClassificationModel;
2727 import opennlp.tools.util.MockInputStreamFactory;
5454 private static final String DEFAULT = "default";
5555
5656 @Test
57 public void testNameFinder() throws Exception {
57 void testNameFinder() throws Exception {
5858
5959 // train the name finder
6060 String encoding = "ISO-8859-1";
6262 ObjectStream<NameSample> sampleStream =
6363 new NameSampleDataStream(
6464 new PlainTextByLineStream(new MockInputStreamFactory(
65 new File("opennlp/tools/namefind/AnnotatedSentences.txt")), encoding));
65 new File("opennlp/tools/namefind/AnnotatedSentences.txt")), encoding));
6666
6767 TrainingParameters params = new TrainingParameters();
6868 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
8888
8989 Span[] names = nameFinder.find(sentence);
9090
91 Assert.assertEquals(1, names.length);
92 Assert.assertEquals(new Span(0, 1, DEFAULT), names[0]);
91 Assertions.assertEquals(1, names.length);
92 Assertions.assertEquals(new Span(0, 1, DEFAULT), names[0]);
9393
9494 sentence = new String[] {
9595 "Hi",
103103
104104 names = nameFinder.find(sentence);
105105
106 Assert.assertEquals(2, names.length);
107 Assert.assertEquals(new Span(1, 2, DEFAULT), names[0]);
108 Assert.assertEquals(new Span(4, 6, DEFAULT), names[1]);
106 Assertions.assertEquals(2, names.length);
107 Assertions.assertEquals(new Span(1, 2, DEFAULT), names[0]);
108 Assertions.assertEquals(new Span(4, 6, DEFAULT), names[1]);
109109 }
110110
111111 /**
113113 * nameType and try the model in a sample text.
114114 */
115115 @Test
116 public void testNameFinderWithTypes() throws Exception {
116 void testNameFinderWithTypes() throws Exception {
117117
118118 // train the name finder
119119 String encoding = "ISO-8859-1";
120120
121121 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
122122 new PlainTextByLineStream(new MockInputStreamFactory(
123 new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")), encoding));
124
125 TrainingParameters params = new TrainingParameters();
126 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
127 params.put(TrainingParameters.CUTOFF_PARAM, 1);
128
129 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
130 params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
131
132 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
133
134 // now test if it can detect the sample sentences
135
136 String[] sentence2 = new String[] { "Hi", "Mike", ",", "it's", "Stefanie",
137 "Schmidt", "." };
123 new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")), encoding));
124
125 TrainingParameters params = new TrainingParameters();
126 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
127 params.put(TrainingParameters.CUTOFF_PARAM, 1);
128
129 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
130 params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
131
132 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
133
134 // now test if it can detect the sample sentences
135
136 String[] sentence2 = new String[] {"Hi", "Mike", ",", "it's", "Stefanie",
137 "Schmidt", "."};
138138
139139 Span[] names2 = nameFinder.find(sentence2);
140140
141 Assert.assertEquals(2, names2.length);
142 Assert.assertEquals(new Span(1, 2, "person"), names2[0]);
143 Assert.assertEquals(new Span(4, 6, "person"), names2[1]);
144 Assert.assertEquals("person", names2[0].getType());
145 Assert.assertEquals("person", names2[1].getType());
146
147 String[] sentence = { "Alisa", "appreciated", "the", "hint", "and",
148 "enjoyed", "a", "delicious", "traditional", "meal." };
141 Assertions.assertEquals(2, names2.length);
142 Assertions.assertEquals(new Span(1, 2, "person"), names2[0]);
143 Assertions.assertEquals(new Span(4, 6, "person"), names2[1]);
144 Assertions.assertEquals("person", names2[0].getType());
145 Assertions.assertEquals("person", names2[1].getType());
146
147 String[] sentence = {"Alisa", "appreciated", "the", "hint", "and",
148 "enjoyed", "a", "delicious", "traditional", "meal."};
149149
150150 Span[] names = nameFinder.find(sentence);
151151
152 Assert.assertEquals(1, names.length);
153 Assert.assertEquals(new Span(0, 1, "person"), names[0]);
154 Assert.assertTrue(hasOtherAsOutcome(nameFinderModel));
152 Assertions.assertEquals(1, names.length);
153 Assertions.assertEquals(new Span(0, 1, "person"), names[0]);
154 Assertions.assertTrue(hasOtherAsOutcome(nameFinderModel));
155155 }
156156
157157 /**
159159 * This is related to the issue OPENNLP-9
160160 */
161161 @Test
162 public void testOnlyWithNames() throws Exception {
163
164 // train the name finder
165 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
166 new PlainTextByLineStream(new MockInputStreamFactory(
167 new File("opennlp/tools/namefind/OnlyWithNames.train")), StandardCharsets.UTF_8));
168
169 TrainingParameters params = new TrainingParameters();
170 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
171 params.put(TrainingParameters.CUTOFF_PARAM, 1);
172
173 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
174 params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
175
176 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
177
178 // now test if it can detect the sample sentences
179
180 String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
181 "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");
182
183 Span[] names1 = nameFinder.find(sentence);
184
185 Assert.assertEquals(new Span(0, 2, DEFAULT), names1[0]);
186 Assert.assertEquals(new Span(2, 4, DEFAULT), names1[1]);
187 Assert.assertEquals(new Span(4, 6, DEFAULT), names1[2]);
188 Assert.assertFalse(hasOtherAsOutcome(nameFinderModel));
189 }
190
191 @Test
192 public void testOnlyWithNamesTypeOverride() throws Exception {
193
194 // train the name finder
195 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
196 new PlainTextByLineStream(new MockInputStreamFactory(
197 new File("opennlp/tools/namefind/OnlyWithNames.train")), StandardCharsets.UTF_8));
198
199 TrainingParameters params = new TrainingParameters();
200 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
201 params.put(TrainingParameters.CUTOFF_PARAM, 1);
202
203 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream,
162 void testOnlyWithNames() throws Exception {
163
164 // train the name finder
165 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
166 new PlainTextByLineStream(new MockInputStreamFactory(
167 new File("opennlp/tools/namefind/OnlyWithNames.train")), StandardCharsets.UTF_8));
168
169 TrainingParameters params = new TrainingParameters();
170 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
171 params.put(TrainingParameters.CUTOFF_PARAM, 1);
172
173 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
204174 params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
205175
206176 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
212182
213183 Span[] names1 = nameFinder.find(sentence);
214184
215 Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]);
216 Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]);
217 Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]);
218 Assert.assertFalse(hasOtherAsOutcome(nameFinderModel));
185 Assertions.assertEquals(new Span(0, 2, DEFAULT), names1[0]);
186 Assertions.assertEquals(new Span(2, 4, DEFAULT), names1[1]);
187 Assertions.assertEquals(new Span(4, 6, DEFAULT), names1[2]);
188 Assertions.assertFalse(hasOtherAsOutcome(nameFinderModel));
189 }
190
191 @Test
192 void testOnlyWithNamesTypeOverride() throws Exception {
193
194 // train the name finder
195 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
196 new PlainTextByLineStream(new MockInputStreamFactory(
197 new File("opennlp/tools/namefind/OnlyWithNames.train")), StandardCharsets.UTF_8));
198
199 TrainingParameters params = new TrainingParameters();
200 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
201 params.put(TrainingParameters.CUTOFF_PARAM, 1);
202
203 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream,
204 params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
205
206 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
207
208 // now test if it can detect the sample sentences
209
210 String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
211 "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");
212
213 Span[] names1 = nameFinder.find(sentence);
214
215 Assertions.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]);
216 Assertions.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]);
217 Assertions.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]);
218 Assertions.assertFalse(hasOtherAsOutcome(nameFinderModel));
219219 }
220220
221221 /**
224224 * This is related to the issue OPENNLP-9
225225 */
226226 @Test
227 public void testOnlyWithNamesWithTypes() throws Exception {
228
229 // train the name finder
230 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
231 new PlainTextByLineStream(new MockInputStreamFactory(
232 new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), StandardCharsets.UTF_8));
227 void testOnlyWithNamesWithTypes() throws Exception {
228
229 // train the name finder
230 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
231 new PlainTextByLineStream(new MockInputStreamFactory(
232 new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), StandardCharsets.UTF_8));
233233
234234 TrainingParameters params = new TrainingParameters();
235235 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
247247
248248 Span[] names1 = nameFinder.find(sentence);
249249
250 Assert.assertEquals(new Span(0, 2, "person"), names1[0]);
251 Assert.assertEquals(new Span(2, 4, "person"), names1[1]);
252 Assert.assertEquals(new Span(4, 6, "person"), names1[2]);
253 Assert.assertEquals("person", names1[2].getType());
254 Assert.assertFalse(hasOtherAsOutcome(nameFinderModel));
250 Assertions.assertEquals(new Span(0, 2, "person"), names1[0]);
251 Assertions.assertEquals(new Span(2, 4, "person"), names1[1]);
252 Assertions.assertEquals(new Span(4, 6, "person"), names1[2]);
253 Assertions.assertEquals("person", names1[2].getType());
254 Assertions.assertFalse(hasOtherAsOutcome(nameFinderModel));
255255 }
256256
257257 /**
259259 * This is related to the issue OPENNLP-9
260260 */
261261 @Test
262 public void testOnlyWithEntitiesWithTypes() throws Exception {
263
264 // train the name finder
265 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
266 new PlainTextByLineStream(new MockInputStreamFactory(
267 new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), StandardCharsets.UTF_8));
262 void testOnlyWithEntitiesWithTypes() throws Exception {
263
264 // train the name finder
265 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
266 new PlainTextByLineStream(new MockInputStreamFactory(
267 new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), StandardCharsets.UTF_8));
268268
269269 TrainingParameters params = new TrainingParameters();
270270 params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
282282
283283 Span[] names1 = nameFinder.find(sentence);
284284
285 Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO
286 Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States
287 Assert.assertEquals("person", names1[2].getType());
288 Assert.assertFalse(hasOtherAsOutcome(nameFinderModel));
285 Assertions.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO
286 Assertions.assertEquals(new Span(1, 3, "location"), names1[1]); // United States
287 Assertions.assertEquals("person", names1[2].getType());
288 Assertions.assertFalse(hasOtherAsOutcome(nameFinderModel));
289289 }
290290
291291 private boolean hasOtherAsOutcome(TokenNameFinderModel nameFinderModel) {
300300 }
301301
302302 @Test
303 public void testDropOverlappingSpans() {
304 Span[] spans = new Span[] {new Span(1, 10), new Span(1,11), new Span(1,11), new Span(5, 15)};
303 void testDropOverlappingSpans() {
304 Span[] spans = new Span[] {new Span(1, 10), new Span(1, 11), new Span(1, 11), new Span(5, 15)};
305305 Span[] remainingSpan = NameFinderME.dropOverlappingSpans(spans);
306 Assert.assertEquals(new Span(1, 11), remainingSpan[0]);
306 Assertions.assertEquals(new Span(1, 11), remainingSpan[0]);
307307 }
308308
309309 /**
311311 * nameTypes and try the model in a sample text.
312312 */
313313 @Test
314 public void testNameFinderWithMultipleTypes() throws Exception {
315
316 // train the name finder
317 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
318 new PlainTextByLineStream(new MockInputStreamFactory(
319 new File("opennlp/tools/namefind/voa1.train")), StandardCharsets.UTF_8));
320
321 TrainingParameters params = new TrainingParameters();
322 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
323 params.put(TrainingParameters.CUTOFF_PARAM, 1);
324
325 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
326 params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
327
328 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
329
330 // now test if it can detect the sample sentences
331
332 String[] sentence = new String[] { "U", ".", "S", ".", "President",
314 void testNameFinderWithMultipleTypes() throws Exception {
315
316 // train the name finder
317 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
318 new PlainTextByLineStream(new MockInputStreamFactory(
319 new File("opennlp/tools/namefind/voa1.train")), StandardCharsets.UTF_8));
320
321 TrainingParameters params = new TrainingParameters();
322 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
323 params.put(TrainingParameters.CUTOFF_PARAM, 1);
324
325 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
326 params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
327
328 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
329
330 // now test if it can detect the sample sentences
331
332 String[] sentence = new String[] {"U", ".", "S", ".", "President",
333333 "Barack", "Obama", "has", "arrived", "in", "South", "Korea", ",",
334334 "where", "he", "is", "expected", "to", "show", "solidarity", "with",
335335 "the", "country", "'", "s", "president", "in", "demanding", "North",
336336 "Korea", "move", "toward", "ending", "its", "nuclear", "weapons",
337 "programs", "." };
338
339 Span[] names1 = nameFinder.find(sentence);
340
341 Assert.assertEquals(new Span(0, 4, "location"), names1[0]);
342 Assert.assertEquals(new Span(5, 7, "person"), names1[1]);
343 Assert.assertEquals(new Span(10, 12, "location"), names1[2]);
344 Assert.assertEquals(new Span(28, 30, "location"), names1[3]);
345 Assert.assertEquals("location", names1[0].getType());
346 Assert.assertEquals("person", names1[1].getType());
347 Assert.assertEquals("location", names1[2].getType());
348 Assert.assertEquals("location", names1[3].getType());
349
350 sentence = new String[] { "Scott", "Snyder", "is", "the", "director", "of",
351 "the", "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "." };
337 "programs", "."};
338
339 Span[] names1 = nameFinder.find(sentence);
340
341 Assertions.assertEquals(new Span(0, 4, "location"), names1[0]);
342 Assertions.assertEquals(new Span(5, 7, "person"), names1[1]);
343 Assertions.assertEquals(new Span(10, 12, "location"), names1[2]);
344 Assertions.assertEquals(new Span(28, 30, "location"), names1[3]);
345 Assertions.assertEquals("location", names1[0].getType());
346 Assertions.assertEquals("person", names1[1].getType());
347 Assertions.assertEquals("location", names1[2].getType());
348 Assertions.assertEquals("location", names1[3].getType());
349
350 sentence = new String[] {"Scott", "Snyder", "is", "the", "director", "of",
351 "the", "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "."};
352352
353353 Span[] names2 = nameFinder.find(sentence);
354354
355 Assert.assertEquals(2, names2.length);
356 Assert.assertEquals(new Span(0, 2, "person"), names2[0]);
357 Assert.assertEquals(new Span(7, 15, "organization"), names2[1]);
358 Assert.assertEquals("person", names2[0].getType());
359 Assert.assertEquals("organization", names2[1].getType());
355 Assertions.assertEquals(2, names2.length);
356 Assertions.assertEquals(new Span(0, 2, "person"), names2[0]);
357 Assertions.assertEquals(new Span(7, 15, "organization"), names2[1]);
358 Assertions.assertEquals("person", names2[0].getType());
359 Assertions.assertEquals("organization", names2[1].getType());
360360 }
361361
362362 }
1515 */
1616 package opennlp.tools.namefind;
1717
18 import org.junit.Assert;
19 import org.junit.Test;
18 import org.junit.jupiter.api.Assertions;
19 import org.junit.jupiter.api.Test;
2020
2121 /**
2222 * This is the test class for {@link NameFinderSequenceValidator}..
3131 private static String OTHER = NameFinderME.OTHER;
3232
3333 @Test
34 public void testContinueCannotBeFirstOutcome() {
34 void testContinueCannotBeFirstOutcome() {
3535
3636 final String outcome = CONTINUE_A;
3737
3838 String[] inputSequence = new String[] {"PersonA", "is", "here"};
3939 String[] outcomesSequence = new String[] {};
40 Assert.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
40 Assertions.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
4141
4242 }
4343
4444 @Test
45 public void testContinueAfterStartAndSameType() {
45 void testContinueAfterStartAndSameType() {
4646
4747 final String outcome = CONTINUE_A;
4848
4949 // previous start, same name type
5050 String[] inputSequence = new String[] {"Stefanie", "Schmidt", "is", "German"};
5151 String[] outcomesSequence = new String[] {START_A};
52 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
52 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
5353
5454 }
5555
5656 @Test
57 public void testContinueAfterStartAndNotSameType() {
57 void testContinueAfterStartAndNotSameType() {
5858
5959 final String outcome = CONTINUE_B;
6060
6161 // previous start, not same name type
6262 String[] inputSequence = new String[] {"PersonA", "LocationA", "something"};
6363 String[] outcomesSequence = new String[] {START_A};
64 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
64 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
6565 }
6666
6767 @Test
68 public void testContinueAfterContinueAndSameType() {
68 void testContinueAfterContinueAndSameType() {
6969
7070 final String outcome = CONTINUE_A;
7171
7272 // previous continue, same name type
7373 String[] inputSequence = new String[] {"FirstName", "MidleName", "LastName", "is", "a", "long", "name"};
7474 String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
75 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
75 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
7676 }
7777
7878 @Test
79 public void testContinueAfterContinueAndNotSameType() {
79 void testContinueAfterContinueAndNotSameType() {
8080
8181 final String outcome = CONTINUE_B;
8282
8383 // previous continue, not same name type
8484 String[] inputSequence = new String[] {"FirstName", "LastName", "LocationA", "something"};
8585 String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
86 Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
86 Assertions.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
8787 }
8888
8989 @Test
90 public void testContinueAfterOther() {
90 void testContinueAfterOther() {
9191
9292 final String outcome = CONTINUE_A;
9393
9494 // previous other
9595 String[] inputSequence = new String[] {"something", "is", "wrong", "here"};
9696 String[] outcomesSequence = new String[] {OTHER};
97 Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
97 Assertions.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
9898 }
9999
100100 @Test
101 public void testStartIsAlwaysAValidOutcome() {
101 void testStartIsAlwaysAValidOutcome() {
102102
103103 final String outcome = START_A;
104104
105105 // pos zero
106106 String[] inputSequence = new String[] {"PersonA", "is", "here"};
107107 String[] outcomesSequence = new String[] {};
108 Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
108 Assertions.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
109109
110110 // pos one, previous other
111111 inputSequence = new String[] {"it's", "PersonA", "again"};
112112 outcomesSequence = new String[] {OTHER};
113 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
113 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
114114
115115 // pos one, previous start
116116 inputSequence = new String[] {"PersonA", "PersonB", "something"};
117117 outcomesSequence = new String[] {START_A};
118 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
118 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
119119
120120 // pos two, previous other
121121 inputSequence = new String[] {"here", "is", "PersonA"};
122122 outcomesSequence = new String[] {OTHER, OTHER};
123 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
123 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
124124
125125 // pos two, previous start, same name type
126126 inputSequence = new String[] {"is", "PersonA", "PersoneB"};
127127 outcomesSequence = new String[] {OTHER, START_A};
128 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
128 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
129129
130130 // pos two, previous start, different name type
131131 inputSequence = new String[] {"something", "PersonA", "OrganizationA"};
132132 outcomesSequence = new String[] {OTHER, START_B};
133 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
133 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
134134
135135 // pos two, previous continue, same name type
136136 inputSequence = new String[] {"Stefanie", "Schmidt", "PersonB", "something"};
137137 outcomesSequence = new String[] {START_A, CONTINUE_A};
138 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
138 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
139139
140140 // pos two, previous continue, not same name type
141141 inputSequence = new String[] {"Stefanie", "Schmidt", "OrganizationA", "something"};
142142 outcomesSequence = new String[] {START_B, CONTINUE_B};
143 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
143 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
144144
145145 }
146146
147147 @Test
148 public void testOtherIsAlwaysAValidOutcome() {
148 void testOtherIsAlwaysAValidOutcome() {
149149
150150 final String outcome = OTHER;
151151
152152 // pos zero
153153 String[] inputSequence = new String[] {"it's", "a", "test"};
154154 String[] outcomesSequence = new String[] {};
155 Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
155 Assertions.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
156156
157157 // pos one, previous other
158158 inputSequence = new String[] {"it's", "a", "test"};
159159 outcomesSequence = new String[] {OTHER};
160 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
160 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
161161
162162 // pos one, previous start
163163 inputSequence = new String[] {"Mike", "is", "here"};
164164 outcomesSequence = new String[] {START_A};
165 Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
165 Assertions.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
166166
167167 // pos two, previous other
168168 inputSequence = new String[] {"it's", "a", "test"};
169169 outcomesSequence = new String[] {OTHER, OTHER};
170 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
170 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
171171
172172 // pos two, previous start
173173 inputSequence = new String[] {"is", "Mike", "here"};
174174 outcomesSequence = new String[] {OTHER, START_A};
175 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
175 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
176176
177177 // pos two, previous continue
178178 inputSequence = new String[] {"Stefanie", "Schmidt", "lives", "at", "home"};
179179 outcomesSequence = new String[] {START_A, CONTINUE_A};
180 Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
180 Assertions.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
181181 }
182182
183183 }
2424 import java.util.List;
2525 import java.util.Map;
2626
27 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2829
2930 import opennlp.tools.formats.ResourceAsStreamFactory;
3031 import opennlp.tools.util.InputStreamFactory;
3435 import opennlp.tools.util.PlainTextByLineStream;
3536 import opennlp.tools.util.Span;
3637
37 import static org.junit.Assert.assertEquals;
38 import static org.junit.Assert.assertFalse;
39 import static org.junit.Assert.assertNull;
40 import static org.junit.Assert.assertTrue;
41 import static org.junit.Assert.fail;
42
4338 /**
4439 * This is the test class for {@link NameSampleDataStream}..
4540 */
5348 /**
5449 * Create a string from a array section.
5550 *
56 * @param tokens the tokens
51 * @param tokens the tokens
5752 * @param nameSpan the section
5853 * @return the string
5954 */
7368 * @throws Exception
7469 */
7570 @Test
76 public void testWithoutNameTypes() throws Exception {
71 void testWithoutNameTypes() throws Exception {
7772 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
7873 "/opennlp/tools/namefind/AnnotatedSentences.txt");
7974
8277
8378 NameSample ns = ds.read();
8479
85 String[] expectedNames = { "Alan McKennedy", "Julie", "Marie Clara",
80 String[] expectedNames = {"Alan McKennedy", "Julie", "Marie Clara",
8681 "Stefanie Schmidt", "Mike", "Stefanie Schmidt", "George", "Luise",
8782 "George Bauer", "Alisa Fernandes", "Alisa", "Mike Sander",
8883 "Stefan Miller", "Stefan Miller", "Stefan Miller", "Elenor Meier",
8984 "Gina Schneider", "Bruno Schulz", "Michel Seile", "George Miller",
90 "Miller", "Peter Schubert", "Natalie" };
85 "Miller", "Peter Schubert", "Natalie"};
9186
9287 List<String> names = new ArrayList<>();
9388 List<Span> spans = new ArrayList<>();
10297
10398 ds.close();
10499
105 assertEquals(expectedNames.length, names.size());
106 assertEquals(createDefaultSpan(6,8), spans.get(0));
107 assertEquals(createDefaultSpan(3,4), spans.get(1));
108 assertEquals(createDefaultSpan(1,3), spans.get(2));
109 assertEquals(createDefaultSpan(4,6), spans.get(3));
110 assertEquals(createDefaultSpan(1,2), spans.get(4));
111 assertEquals(createDefaultSpan(4,6), spans.get(5));
112 assertEquals(createDefaultSpan(2,3), spans.get(6));
113 assertEquals(createDefaultSpan(16,17), spans.get(7));
114 assertEquals(createDefaultSpan(18,20), spans.get(8));
115 assertEquals(createDefaultSpan(0,2), spans.get(9));
116 assertEquals(createDefaultSpan(0,1), spans.get(10));
117 assertEquals(createDefaultSpan(3,5), spans.get(11));
118 assertEquals(createDefaultSpan(3,5), spans.get(12));
119 assertEquals(createDefaultSpan(10,12), spans.get(13));
120 assertEquals(createDefaultSpan(1,3), spans.get(14));
121 assertEquals(createDefaultSpan(6,8), spans.get(15));
122 assertEquals(createDefaultSpan(6,8), spans.get(16));
123 assertEquals(createDefaultSpan(8,10), spans.get(17));
124 assertEquals(createDefaultSpan(12,14), spans.get(18));
125 assertEquals(createDefaultSpan(1,3), spans.get(19));
126 assertEquals(createDefaultSpan(0,1), spans.get(20));
127 assertEquals(createDefaultSpan(2,4), spans.get(21));
128 assertEquals(createDefaultSpan(5,6), spans.get(22));
100 Assertions.assertEquals(expectedNames.length, names.size());
101 Assertions.assertEquals(createDefaultSpan(6, 8), spans.get(0));
102 Assertions.assertEquals(createDefaultSpan(3, 4), spans.get(1));
103 Assertions.assertEquals(createDefaultSpan(1, 3), spans.get(2));
104 Assertions.assertEquals(createDefaultSpan(4, 6), spans.get(3));
105 Assertions.assertEquals(createDefaultSpan(1, 2), spans.get(4));
106 Assertions.assertEquals(createDefaultSpan(4, 6), spans.get(5));
107 Assertions.assertEquals(createDefaultSpan(2, 3), spans.get(6));
108 Assertions.assertEquals(createDefaultSpan(16, 17), spans.get(7));
109 Assertions.assertEquals(createDefaultSpan(18, 20), spans.get(8));
110 Assertions.assertEquals(createDefaultSpan(0, 2), spans.get(9));
111 Assertions.assertEquals(createDefaultSpan(0, 1), spans.get(10));
112 Assertions.assertEquals(createDefaultSpan(3, 5), spans.get(11));
113 Assertions.assertEquals(createDefaultSpan(3, 5), spans.get(12));
114 Assertions.assertEquals(createDefaultSpan(10, 12), spans.get(13));
115 Assertions.assertEquals(createDefaultSpan(1, 3), spans.get(14));
116 Assertions.assertEquals(createDefaultSpan(6, 8), spans.get(15));
117 Assertions.assertEquals(createDefaultSpan(6, 8), spans.get(16));
118 Assertions.assertEquals(createDefaultSpan(8, 10), spans.get(17));
119 Assertions.assertEquals(createDefaultSpan(12, 14), spans.get(18));
120 Assertions.assertEquals(createDefaultSpan(1, 3), spans.get(19));
121 Assertions.assertEquals(createDefaultSpan(0, 1), spans.get(20));
122 Assertions.assertEquals(createDefaultSpan(2, 4), spans.get(21));
123 Assertions.assertEquals(createDefaultSpan(5, 6), spans.get(22));
129124 }
130125
131126 private Span createDefaultSpan(int s, int e) {
136131 * Checks that invalid spans cause an {@link ObjectStreamException} to be thrown.
137132 */
138133 @Test
139 public void testWithoutNameTypeAndInvalidData() {
134 void testWithoutNameTypeAndInvalidData() {
140135
141136 try (NameSampleDataStream sampleStream = new NameSampleDataStream(
142137 ObjectStreamUtils.createObjectStream("<START> <START> Name <END>"))) {
143138 sampleStream.read();
144 fail();
139 Assertions.fail();
145140 } catch (IOException expected) {
146141 // the read above is expected to throw an exception
147142 }
149144 try (NameSampleDataStream sampleStream = new NameSampleDataStream(
150145 ObjectStreamUtils.createObjectStream("<START> Name <END> <END>"))) {
151146 sampleStream.read();
152 fail();
147 Assertions.fail();
153148 } catch (IOException expected) {
154149 // the read above is expected to throw an exception
155150 }
158153 ObjectStreamUtils.createObjectStream(
159154 "<START> <START> Person <END> Street <END>"))) {
160155 sampleStream.read();
161 fail();
156 Assertions.fail();
162157 } catch (IOException expected) {
163158 // the read above is expected to throw an exception
164159 }
171166 * @throws Exception
172167 */
173168 @Test
174 public void testWithNameTypes() throws Exception {
169 void testWithNameTypes() throws Exception {
175170 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
176171 "/opennlp/tools/namefind/voa1.train");
177172
196191 }
197192 ds.close();
198193
199 String[] expectedPerson = { "Barack Obama", "Obama", "Obama",
194 String[] expectedPerson = {"Barack Obama", "Obama", "Obama",
200195 "Lee Myung - bak", "Obama", "Obama", "Scott Snyder", "Snyder", "Obama",
201 "Obama", "Obama", "Tim Peters", "Obama", "Peters" };
202
203 String[] expectedDate = { "Wednesday", "Thursday", "Wednesday" };
204
205 String[] expectedLocation = { "U . S .", "South Korea", "North Korea",
196 "Obama", "Obama", "Tim Peters", "Obama", "Peters"};
197
198 String[] expectedDate = {"Wednesday", "Thursday", "Wednesday"};
199
200 String[] expectedLocation = {"U . S .", "South Korea", "North Korea",
206201 "China", "South Korea", "North Korea", "North Korea", "U . S .",
207202 "South Korea", "United States", "Pyongyang", "North Korea",
208 "South Korea", "Afghanistan", "Seoul", "U . S .", "China" };
203 "South Korea", "Afghanistan", "Seoul", "U . S .", "China"};
209204
210205 String[] expectedOrganization = {"Center for U . S . Korea Policy"};
211206
212 assertEquals(expectedPerson.length, names.get(person).size());
213 assertEquals(expectedDate.length, names.get(date).size());
214 assertEquals(expectedLocation.length, names.get(location).size());
215 assertEquals(expectedOrganization.length, names.get(organization).size());
216
217 assertEquals(new Span(5,7, person), spans.get(person).get(0));
218 assertEquals(expectedPerson[0], names.get(person).get(0));
219 assertEquals(new Span(10,11, person), spans.get(person).get(1));
220 assertEquals(expectedPerson[1], names.get(person).get(1));
221 assertEquals(new Span(29,30, person), spans.get(person).get(2));
222 assertEquals(expectedPerson[2], names.get(person).get(2));
223 assertEquals(new Span(23,27, person), spans.get(person).get(3));
224 assertEquals(expectedPerson[3], names.get(person).get(3));
225 assertEquals(new Span(1,2, person), spans.get(person).get(4));
226 assertEquals(expectedPerson[4], names.get(person).get(4));
227 assertEquals(new Span(8,9, person), spans.get(person).get(5));
228 assertEquals(expectedPerson[5], names.get(person).get(5));
229 assertEquals(new Span(0,2, person), spans.get(person).get(6));
230 assertEquals(expectedPerson[6], names.get(person).get(6));
231 assertEquals(new Span(25,26, person), spans.get(person).get(7));
232 assertEquals(expectedPerson[7], names.get(person).get(7));
233 assertEquals(new Span(1,2, person), spans.get(person).get(8));
234 assertEquals(expectedPerson[8], names.get(person).get(8));
235 assertEquals(new Span(6,7, person), spans.get(person).get(9));
236 assertEquals(expectedPerson[9], names.get(person).get(9));
237 assertEquals(new Span(14,15, person), spans.get(person).get(10));
238 assertEquals(expectedPerson[10], names.get(person).get(10));
239 assertEquals(new Span(0,2, person), spans.get(person).get(11));
240 assertEquals(expectedPerson[11], names.get(person).get(11));
241 assertEquals(new Span(12,13, person), spans.get(person).get(12));
242 assertEquals(expectedPerson[12], names.get(person).get(12));
243 assertEquals(new Span(12,13, person), spans.get(person).get(13));
244 assertEquals(expectedPerson[13], names.get(person).get(13));
245
246 assertEquals(new Span(7,8, date), spans.get(date).get(0));
247 assertEquals(expectedDate[0], names.get(date).get(0));
248 assertEquals(new Span(27,28, date), spans.get(date).get(1));
249 assertEquals(expectedDate[1], names.get(date).get(1));
250 assertEquals(new Span(15,16, date), spans.get(date).get(2));
251 assertEquals(expectedDate[2], names.get(date).get(2));
252
253 assertEquals(new Span(0, 4, location), spans.get(location).get(0));
254 assertEquals(expectedLocation[0], names.get(location).get(0));
255 assertEquals(new Span(10,12, location), spans.get(location).get(1));
256 assertEquals(expectedLocation[1], names.get(location).get(1));
257 assertEquals(new Span(28,30, location), spans.get(location).get(2));
258 assertEquals(expectedLocation[2], names.get(location).get(2));
259 assertEquals(new Span(3,4, location), spans.get(location).get(3));
260 assertEquals(expectedLocation[3], names.get(location).get(3));
261 assertEquals(new Span(5,7, location), spans.get(location).get(4));
262 assertEquals(expectedLocation[4], names.get(location).get(4));
263 assertEquals(new Span(16,18, location), spans.get(location).get(5));
264 assertEquals(expectedLocation[5], names.get(location).get(5));
265 assertEquals(new Span(1,3, location), spans.get(location).get(6));
266 assertEquals(expectedLocation[6], names.get(location).get(6));
267 assertEquals(new Span(5,9, location), spans.get(location).get(7));
268 assertEquals(expectedLocation[7], names.get(location).get(7));
269 assertEquals(new Span(0,2, location), spans.get(location).get(8));
270 assertEquals(expectedLocation[8], names.get(location).get(8));
271 assertEquals(new Span(4,6, location), spans.get(location).get(9));
272 assertEquals(expectedLocation[9], names.get(location).get(9));
273 assertEquals(new Span(10,11, location), spans.get(location).get(10));
274 assertEquals(expectedLocation[10], names.get(location).get(10));
275 assertEquals(new Span(6,8, location), spans.get(location).get(11));
276 assertEquals(expectedLocation[11], names.get(location).get(11));
277 assertEquals(new Span(4,6, location), spans.get(location).get(12));
278 assertEquals(expectedLocation[12], names.get(location).get(12));
279 assertEquals(new Span(10,11, location), spans.get(location).get(13));
280 assertEquals(expectedLocation[13], names.get(location).get(13));
281 assertEquals(new Span(12,13, location), spans.get(location).get(14));
282 assertEquals(expectedLocation[14], names.get(location).get(14));
283 assertEquals(new Span(5,9, location), spans.get(location).get(15));
284 assertEquals(expectedLocation[15], names.get(location).get(15));
285 assertEquals(new Span(11,12, location), spans.get(location).get(16));
286 assertEquals(expectedLocation[16], names.get(location).get(16));
287
288 assertEquals(new Span(7,15, organization), spans.get(organization).get(0));
289 assertEquals(expectedOrganization[0], names.get(organization).get(0));
290
291 }
292
293 @Test
294 public void testWithNameTypeAndInvalidData() {
207 Assertions.assertEquals(expectedPerson.length, names.get(person).size());
208 Assertions.assertEquals(expectedDate.length, names.get(date).size());
209 Assertions.assertEquals(expectedLocation.length, names.get(location).size());
210 Assertions.assertEquals(expectedOrganization.length, names.get(organization).size());
211
212 Assertions.assertEquals(new Span(5, 7, person), spans.get(person).get(0));
213 Assertions.assertEquals(expectedPerson[0], names.get(person).get(0));
214 Assertions.assertEquals(new Span(10, 11, person), spans.get(person).get(1));
215 Assertions.assertEquals(expectedPerson[1], names.get(person).get(1));
216 Assertions.assertEquals(new Span(29, 30, person), spans.get(person).get(2));
217 Assertions.assertEquals(expectedPerson[2], names.get(person).get(2));
218 Assertions.assertEquals(new Span(23, 27, person), spans.get(person).get(3));
219 Assertions.assertEquals(expectedPerson[3], names.get(person).get(3));
220 Assertions.assertEquals(new Span(1, 2, person), spans.get(person).get(4));
221 Assertions.assertEquals(expectedPerson[4], names.get(person).get(4));
222 Assertions.assertEquals(new Span(8, 9, person), spans.get(person).get(5));
223 Assertions.assertEquals(expectedPerson[5], names.get(person).get(5));
224 Assertions.assertEquals(new Span(0, 2, person), spans.get(person).get(6));
225 Assertions.assertEquals(expectedPerson[6], names.get(person).get(6));
226 Assertions.assertEquals(new Span(25, 26, person), spans.get(person).get(7));
227 Assertions.assertEquals(expectedPerson[7], names.get(person).get(7));
228 Assertions.assertEquals(new Span(1, 2, person), spans.get(person).get(8));
229 Assertions.assertEquals(expectedPerson[8], names.get(person).get(8));
230 Assertions.assertEquals(new Span(6, 7, person), spans.get(person).get(9));
231 Assertions.assertEquals(expectedPerson[9], names.get(person).get(9));
232 Assertions.assertEquals(new Span(14, 15, person), spans.get(person).get(10));
233 Assertions.assertEquals(expectedPerson[10], names.get(person).get(10));
234 Assertions.assertEquals(new Span(0, 2, person), spans.get(person).get(11));
235 Assertions.assertEquals(expectedPerson[11], names.get(person).get(11));
236 Assertions.assertEquals(new Span(12, 13, person), spans.get(person).get(12));
237 Assertions.assertEquals(expectedPerson[12], names.get(person).get(12));
238 Assertions.assertEquals(new Span(12, 13, person), spans.get(person).get(13));
239 Assertions.assertEquals(expectedPerson[13], names.get(person).get(13));
240
241 Assertions.assertEquals(new Span(7, 8, date), spans.get(date).get(0));
242 Assertions.assertEquals(expectedDate[0], names.get(date).get(0));
243 Assertions.assertEquals(new Span(27, 28, date), spans.get(date).get(1));
244 Assertions.assertEquals(expectedDate[1], names.get(date).get(1));
245 Assertions.assertEquals(new Span(15, 16, date), spans.get(date).get(2));
246 Assertions.assertEquals(expectedDate[2], names.get(date).get(2));
247
248 Assertions.assertEquals(new Span(0, 4, location), spans.get(location).get(0));
249 Assertions.assertEquals(expectedLocation[0], names.get(location).get(0));
250 Assertions.assertEquals(new Span(10, 12, location), spans.get(location).get(1));
251 Assertions.assertEquals(expectedLocation[1], names.get(location).get(1));
252 Assertions.assertEquals(new Span(28, 30, location), spans.get(location).get(2));
253 Assertions.assertEquals(expectedLocation[2], names.get(location).get(2));
254 Assertions.assertEquals(new Span(3, 4, location), spans.get(location).get(3));
255 Assertions.assertEquals(expectedLocation[3], names.get(location).get(3));
256 Assertions.assertEquals(new Span(5, 7, location), spans.get(location).get(4));
257 Assertions.assertEquals(expectedLocation[4], names.get(location).get(4));
258 Assertions.assertEquals(new Span(16, 18, location), spans.get(location).get(5));
259 Assertions.assertEquals(expectedLocation[5], names.get(location).get(5));
260 Assertions.assertEquals(new Span(1, 3, location), spans.get(location).get(6));
261 Assertions.assertEquals(expectedLocation[6], names.get(location).get(6));
262 Assertions.assertEquals(new Span(5, 9, location), spans.get(location).get(7));
263 Assertions.assertEquals(expectedLocation[7], names.get(location).get(7));
264 Assertions.assertEquals(new Span(0, 2, location), spans.get(location).get(8));
265 Assertions.assertEquals(expectedLocation[8], names.get(location).get(8));
266 Assertions.assertEquals(new Span(4, 6, location), spans.get(location).get(9));
267 Assertions.assertEquals(expectedLocation[9], names.get(location).get(9));
268 Assertions.assertEquals(new Span(10, 11, location), spans.get(location).get(10));
269 Assertions.assertEquals(expectedLocation[10], names.get(location).get(10));
270 Assertions.assertEquals(new Span(6, 8, location), spans.get(location).get(11));
271 Assertions.assertEquals(expectedLocation[11], names.get(location).get(11));
272 Assertions.assertEquals(new Span(4, 6, location), spans.get(location).get(12));
273 Assertions.assertEquals(expectedLocation[12], names.get(location).get(12));
274 Assertions.assertEquals(new Span(10, 11, location), spans.get(location).get(13));
275 Assertions.assertEquals(expectedLocation[13], names.get(location).get(13));
276 Assertions.assertEquals(new Span(12, 13, location), spans.get(location).get(14));
277 Assertions.assertEquals(expectedLocation[14], names.get(location).get(14));
278 Assertions.assertEquals(new Span(5, 9, location), spans.get(location).get(15));
279 Assertions.assertEquals(expectedLocation[15], names.get(location).get(15));
280 Assertions.assertEquals(new Span(11, 12, location), spans.get(location).get(16));
281 Assertions.assertEquals(expectedLocation[16], names.get(location).get(16));
282
283 Assertions.assertEquals(new Span(7, 15, organization), spans.get(organization).get(0));
284 Assertions.assertEquals(expectedOrganization[0], names.get(organization).get(0));
285
286 }
287
288 @Test
289 void testWithNameTypeAndInvalidData() {
295290
296291 try (NameSampleDataStream sampleStream = new NameSampleDataStream(
297292 ObjectStreamUtils.createObjectStream("<START:> Name <END>"))) {
298293 sampleStream.read();
299 fail();
294 Assertions.fail();
300295 } catch (IOException expected) {
301296 // the read above is expected to throw an exception
302297 }
305300 ObjectStreamUtils.createObjectStream(
306301 "<START:street> <START:person> Name <END> <END>"))) {
307302 sampleStream.read();
308 fail();
309 } catch (IOException expected) {
310 // the read above is expected to throw an exception
311 }
312 }
313
314 @Test
315 public void testClearAdaptiveData() throws IOException {
303 Assertions.fail();
304 } catch (IOException expected) {
305 // the read above is expected to throw an exception
306 }
307 }
308
309 @Test
310 void testClearAdaptiveData() throws IOException {
316311 String trainingData = "a\n" +
317312 "b\n" +
318313 "c\n" +
324319
325320 ObjectStream<NameSample> trainingStream = new NameSampleDataStream(untokenizedLineStream);
326321
327 assertFalse(trainingStream.read().isClearAdaptiveDataSet());
328 assertFalse(trainingStream.read().isClearAdaptiveDataSet());
329 assertFalse(trainingStream.read().isClearAdaptiveDataSet());
330 assertTrue(trainingStream.read().isClearAdaptiveDataSet());
331 assertNull(trainingStream.read());
322 Assertions.assertFalse(trainingStream.read().isClearAdaptiveDataSet());
323 Assertions.assertFalse(trainingStream.read().isClearAdaptiveDataSet());
324 Assertions.assertFalse(trainingStream.read().isClearAdaptiveDataSet());
325 Assertions.assertTrue(trainingStream.read().isClearAdaptiveDataSet());
326 Assertions.assertNull(trainingStream.read());
332327
333328 trainingStream.close();
334329 }
335330
336331 @Test
337 public void testHtmlNameSampleParsing() throws IOException {
332 void testHtmlNameSampleParsing() throws IOException {
338333 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
339334 "/opennlp/tools/namefind/html1.train");
340335
343338
344339 NameSample ns = ds.read();
345340
346 assertEquals(1, ns.getSentence().length);
347 assertEquals("<html>", ns.getSentence()[0]);
348
349 ns = ds.read();
350 assertEquals(1, ns.getSentence().length);
351 assertEquals("<head/>", ns.getSentence()[0]);
352
353 ns = ds.read();
354 assertEquals(1, ns.getSentence().length);
355 assertEquals("<body>", ns.getSentence()[0]);
356
357 ns = ds.read();
358 assertEquals(1, ns.getSentence().length);
359 assertEquals("<ul>", ns.getSentence()[0]);
341 Assertions.assertEquals(1, ns.getSentence().length);
342 Assertions.assertEquals("<html>", ns.getSentence()[0]);
343
344 ns = ds.read();
345 Assertions.assertEquals(1, ns.getSentence().length);
346 Assertions.assertEquals("<head/>", ns.getSentence()[0]);
347
348 ns = ds.read();
349 Assertions.assertEquals(1, ns.getSentence().length);
350 Assertions.assertEquals("<body>", ns.getSentence()[0]);
351
352 ns = ds.read();
353 Assertions.assertEquals(1, ns.getSentence().length);
354 Assertions.assertEquals("<ul>", ns.getSentence()[0]);
360355
361356 // <li> <START:organization> Advanced Integrated Pest Management <END> </li>
362357 ns = ds.read();
363 assertEquals(6, ns.getSentence().length);
364 assertEquals("<li>", ns.getSentence()[0]);
365 assertEquals("Advanced", ns.getSentence()[1]);
366 assertEquals("Integrated", ns.getSentence()[2]);
367 assertEquals("Pest", ns.getSentence()[3]);
368 assertEquals("Management", ns.getSentence()[4]);
369 assertEquals("</li>", ns.getSentence()[5]);
370 assertEquals(new Span(1, 5, organization), ns.getNames()[0]);
358 Assertions.assertEquals(6, ns.getSentence().length);
359 Assertions.assertEquals("<li>", ns.getSentence()[0]);
360 Assertions.assertEquals("Advanced", ns.getSentence()[1]);
361 Assertions.assertEquals("Integrated", ns.getSentence()[2]);
362 Assertions.assertEquals("Pest", ns.getSentence()[3]);
363 Assertions.assertEquals("Management", ns.getSentence()[4]);
364 Assertions.assertEquals("</li>", ns.getSentence()[5]);
365 Assertions.assertEquals(new Span(1, 5, organization), ns.getNames()[0]);
371366
372367 // <li> <START:organization> Bay Cities Produce Co., Inc. <END> </li>
373368 ns = ds.read();
374 assertEquals(7, ns.getSentence().length);
375 assertEquals("<li>", ns.getSentence()[0]);
376 assertEquals("Bay", ns.getSentence()[1]);
377 assertEquals("Cities", ns.getSentence()[2]);
378 assertEquals("Produce", ns.getSentence()[3]);
379 assertEquals("Co.,", ns.getSentence()[4]);
380 assertEquals("Inc.", ns.getSentence()[5]);
381 assertEquals("</li>", ns.getSentence()[6]);
382 assertEquals(new Span(1, 6, organization), ns.getNames()[0]);
383
384 ns = ds.read();
385 assertEquals(1, ns.getSentence().length);
386 assertEquals("</ul>", ns.getSentence()[0]);
387
388 ns = ds.read();
389 assertEquals(1, ns.getSentence().length);
390 assertEquals("</body>", ns.getSentence()[0]);
391
392 ns = ds.read();
393 assertEquals(1, ns.getSentence().length);
394 assertEquals("</html>", ns.getSentence()[0]);
395
396 assertNull(ds.read());
369 Assertions.assertEquals(7, ns.getSentence().length);
370 Assertions.assertEquals("<li>", ns.getSentence()[0]);
371 Assertions.assertEquals("Bay", ns.getSentence()[1]);
372 Assertions.assertEquals("Cities", ns.getSentence()[2]);
373 Assertions.assertEquals("Produce", ns.getSentence()[3]);
374 Assertions.assertEquals("Co.,", ns.getSentence()[4]);
375 Assertions.assertEquals("Inc.", ns.getSentence()[5]);
376 Assertions.assertEquals("</li>", ns.getSentence()[6]);
377 Assertions.assertEquals(new Span(1, 6, organization), ns.getNames()[0]);
378
379 ns = ds.read();
380 Assertions.assertEquals(1, ns.getSentence().length);
381 Assertions.assertEquals("</ul>", ns.getSentence()[0]);
382
383 ns = ds.read();
384 Assertions.assertEquals(1, ns.getSentence().length);
385 Assertions.assertEquals("</body>", ns.getSentence()[0]);
386
387 ns = ds.read();
388 Assertions.assertEquals(1, ns.getSentence().length);
389 Assertions.assertEquals("</html>", ns.getSentence()[0]);
390
391 Assertions.assertNull(ds.read());
397392
398393 ds.close();
399394 }
2424 import java.io.ObjectOutput;
2525 import java.io.ObjectOutputStream;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 import opennlp.tools.util.Span;
3131
3232 /**
3333 * This is the test class for {@link NameSample}.
3434 */
35
3536 public class NameSampleTest {
3637
3738 /**
5253 NameSample nameSample;
5354 if (useTypes) {
5455 nameSample = new NameSample(sentence, names, false);
55 }
56 else {
56 } else {
5757 Span[] namesWithoutType = new Span[names.length];
5858 for (int i = 0; i < names.length; i++) {
5959 namesWithoutType[i] = new Span(names[i].getStart(),
6767 }
6868
6969 @Test
70 public void testNameSampleSerDe() throws IOException {
70 void testNameSampleSerDe() throws IOException {
7171 NameSample nameSample = createGoldSample();
7272 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
7373 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
8585 // do nothing
8686 }
8787
88 Assert.assertNotNull(deSerializedNameSample);
89 Assert.assertArrayEquals(nameSample.getSentence(), deSerializedNameSample.getSentence());
90 Assert.assertArrayEquals(nameSample.getNames(), deSerializedNameSample.getNames());
91 Assert.assertArrayEquals(nameSample.getAdditionalContext(),
88 Assertions.assertNotNull(deSerializedNameSample);
89 Assertions.assertArrayEquals(nameSample.getSentence(), deSerializedNameSample.getSentence());
90 Assertions.assertArrayEquals(nameSample.getNames(), deSerializedNameSample.getNames());
91 Assertions.assertArrayEquals(nameSample.getAdditionalContext(),
9292 deSerializedNameSample.getAdditionalContext());
9393 }
9494
9696 * Test serialization of sequential spans.
9797 */
9898 @Test
99 public void testSequentialSpans() {
99 void testSequentialSpans() {
100100
101101 String[] sentence = {"A", "Place", "a", "time", "A", "Person", "."};
102102
105105
106106 NameSample nameSample = new NameSample(sentence, names, false);
107107
108 Assert.assertEquals(
108 Assertions.assertEquals(
109109 "<START:Place> A Place <END> <START:Time> a time <END> <START:Person> A Person <END> .",
110110 nameSample.toString());
111111 }
114114 * Test serialization of unsorted sequential spans.
115115 */
116116 @Test
117 public void testUnsortedSequentialSpans() {
117 void testUnsortedSequentialSpans() {
118118
119119 String[] sentence = {"A", "Place", "a", "time", "A", "Person", "."};
120120
123123
124124 NameSample nameSample = new NameSample(sentence, names, false);
125125
126 Assert.assertEquals(
126 Assertions.assertEquals(
127127 "<START:Place> A Place <END> <START:Time> a time <END> <START:Person> A Person <END> .",
128128 nameSample.toString());
129129 }
131131 /**
132132 * Test if it fails to name spans are overlapping
133133 */
134 @Test(expected = RuntimeException.class)
135 public void testOverlappingNameSpans() throws Exception {
136
137 String[] sentence = {"A", "Place", "a", "time", "A", "Person", "."};
138
139 Span[] names = {new Span(0, 2, "Place"), new Span(3, 5, "Person"),
140 new Span(2, 4, "Time")};
141
142 new NameSample(sentence, names, false);
134 @Test
135 void testOverlappingNameSpans() {
136
137 Assertions.assertThrows(RuntimeException.class, () -> {
138
139 String[] sentence = {"A", "Place", "a", "time", "A", "Person", "."};
140
141 Span[] names = {new Span(0, 2, "Place"), new Span(3, 5, "Person"),
142 new Span(2, 4, "Time")};
143
144 new NameSample(sentence, names, false);
145 });
146
147
143148 }
144149
145150 /**
147152 * string representation and validate it.
148153 */
149154 @Test
150 public void testNoTypesToString() {
155 void testNoTypesToString() {
151156 String nameSampleStr = createSimpleNameSample(false).toString();
152157
153 Assert.assertEquals("<START> U . S . <END> President <START> Barack Obama <END>" +
158 Assertions.assertEquals("<START> U . S . <END> President <START> Barack Obama <END>" +
154159 " is considering " +
155160 "sending additional American forces to <START> Afghanistan <END> .", nameSampleStr);
156161 }
160165 * string representation and validate it.
161166 */
162167 @Test
163 public void testWithTypesToString() throws Exception {
168 void testWithTypesToString() throws Exception {
164169 String nameSampleStr = createSimpleNameSample(true).toString();
165 Assert.assertEquals("<START:Location> U . S . <END> President <START:Person>" +
170 Assertions.assertEquals("<START:Location> U . S . <END> President <START:Person>" +
166171 " Barack Obama <END> " +
167 "is considering sending additional American forces to <START:Location> Afghanistan <END> .",
172 "is considering sending additional American forces to <START:Location> Afghanistan <END> .",
168173 nameSampleStr);
169174
170175 NameSample parsedSample = NameSample.parse("<START:Location> U . S . <END> " +
171 "President <START:Person> Barack Obama <END> is considering sending " +
172 "additional American forces to <START:Location> Afghanistan <END> .",
176 "President <START:Person> Barack Obama <END> is considering sending " +
177 "additional American forces to <START:Location> Afghanistan <END> .",
173178 false);
174179
175 Assert.assertEquals(createSimpleNameSample(true), parsedSample);
180 Assertions.assertEquals(createSimpleNameSample(true), parsedSample);
176181 }
177182
178183 /**
180185 * correctly.
181186 */
182187 @Test
183 public void testNameAtEnd() {
188 void testNameAtEnd() {
184189
185190 String[] sentence = new String[] {
186191 "My",
189194 "Anna"
190195 };
191196
192 NameSample sample = new NameSample(sentence, new Span[]{new Span(3, 4)}, false);
193
194 Assert.assertEquals("My name is <START> Anna <END>", sample.toString());
197 NameSample sample = new NameSample(sentence, new Span[] {new Span(3, 4)}, false);
198
199 Assertions.assertEquals("My name is <START> Anna <END>", sample.toString());
195200 }
196201
197202 /**
200205 * @throws Exception
201206 */
202207 @Test
203 public void testParseWithAdditionalSpace() throws Exception {
208 void testParseWithAdditionalSpace() throws Exception {
204209 String line = "<START> M . K . <END> <START> Schwitters <END> ? <START> Heartfield <END> ?";
205210
206211 NameSample test = NameSample.parse(line, false);
207212
208 Assert.assertEquals(8, test.getSentence().length);
213 Assertions.assertEquals(8, test.getSentence().length);
209214 }
210215
211216 /**
212217 * Checks if it accepts name type with some special characters
213218 */
214219 @Test
215 public void testTypeWithSpecialChars() throws Exception {
220 void testTypeWithSpecialChars() throws Exception {
216221 NameSample parsedSample = NameSample
217222 .parse(
218223 "<START:type-1> U . S . <END> "
220225 + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .",
221226 false);
222227
223 Assert.assertEquals(3, parsedSample.getNames().length);
224 Assert.assertEquals("type-1", parsedSample.getNames()[0].getType());
225 Assert.assertEquals("type_2", parsedSample.getNames()[1].getType());
226 Assert.assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType());
228 Assertions.assertEquals(3, parsedSample.getNames().length);
229 Assertions.assertEquals("type-1", parsedSample.getNames()[0].getType());
230 Assertions.assertEquals("type_2", parsedSample.getNames()[1].getType());
231 Assertions.assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType());
227232 }
228233
229234 /**
230235 * Test if it fails to parse empty type
231236 */
232 @Test(expected = IOException.class)
233 public void testMissingType() throws Exception {
234 NameSample.parse("<START:> token <END>", false);
237 @Test
238 void testMissingType() {
239 Assertions.assertThrows(IOException.class, () -> {
240 NameSample.parse("<START:> token <END>", false);
241 });
235242 }
236243
237244 /**
238245 * Test if it fails to parse type with space
239 * @throws Exception
240 */
241 @Test(expected = IOException.class)
242 public void testTypeWithSpace() throws Exception {
243 NameSample.parse("<START:abc a> token <END>", false);
246 *
247 */
248 @Test
249 void testTypeWithSpace() {
250 Assertions.assertThrows(IOException.class, () -> {
251 NameSample.parse("<START:abc a> token <END>", false);
252 });
244253 }
245254
246255 /**
247256 * Test if it fails to parse type with new line
248 * @throws Exception
249 */
250 @Test(expected = IOException.class)
251 public void testTypeWithNewLine() throws Exception {
252 NameSample.parse("<START:abc\na> token <END>", false);
257 *
258 */
259 @Test
260 void testTypeWithNewLine() {
261 Assertions.assertThrows(IOException.class, () -> {
262 NameSample.parse("<START:abc\na> token <END>", false);
263 });
253264 }
254265
255266 /**
256267 * Test if it fails to parse type with :
257 * @throws Exception
258 */
259 @Test(expected = IOException.class)
260 public void testTypeWithInvalidChar1() throws Exception {
261 NameSample.parse("<START:abc:a> token <END>", false);
268 *
269 */
270 @Test
271 void testTypeWithInvalidChar1() {
272 Assertions.assertThrows(IOException.class, () -> {
273 NameSample.parse("<START:abc:a> token <END>", false);
274 });
262275 }
263276
264277 /**
265278 * Test if it fails to parse type with >
266 * @throws Exception
267 */
268 @Test(expected = IOException.class)
269 public void testTypeWithInvalidChar2() throws Exception {
270 NameSample.parse("<START:abc>a> token <END>", false);
279 *
280 */
281 @Test
282 void testTypeWithInvalidChar2() {
283 Assertions.assertThrows(IOException.class, () -> {
284 NameSample.parse("<START:abc>a> token <END>", false);
285 });
271286 }
272287
273288 /**
274289 * Test if it fails to parse nested names
275 * @throws Exception
276 */
277 @Test(expected = IOException.class)
278 public void testNestedNameSpans() throws Exception {
279 NameSample.parse("<START:Person> <START:Location> Kennedy <END> City <END>", false);
280 }
281
282 @Test
283 public void testEquals() {
284 Assert.assertFalse(createGoldSample() == createGoldSample());
285 Assert.assertTrue(createGoldSample().equals(createGoldSample()));
286 Assert.assertFalse(createGoldSample().equals(createPredSample()));
287 Assert.assertFalse(createPredSample().equals(new Object()));
290 *
291 */
292 @Test
293 void testNestedNameSpans() {
294 Assertions.assertThrows(IOException.class, () -> {
295 NameSample.parse("<START:Person> <START:Location> Kennedy <END> City <END>", false);
296 });
297 }
298
299 @Test
300 void testEquals() {
301 Assertions.assertFalse(createGoldSample() == createGoldSample());
302 Assertions.assertTrue(createGoldSample().equals(createGoldSample()));
303 Assertions.assertFalse(createGoldSample().equals(createPredSample()));
304 Assertions.assertFalse(createPredSample().equals(new Object()));
288305 }
289306
290307 public static NameSample createGoldSample() {
2424 import java.util.Map;
2525 import java.util.stream.Collectors;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 import opennlp.tools.util.InputStreamFactory;
3131 import opennlp.tools.util.ObjectStream;
4646 private static final String organization = "organization";
4747
4848 @Test
49 public void testNoFilter() throws IOException {
49 void testNoFilter() throws IOException {
5050
5151 final String[] types = new String[] {};
5252
5454
5555 NameSample ns = filter.read();
5656
57 Assert.assertEquals(0, ns.getNames().length);
57 Assertions.assertEquals(0, ns.getNames().length);
5858
5959 }
6060
6161 @Test
62 public void testSingleFilter() throws IOException {
62 void testSingleFilter() throws IOException {
6363
6464 final String[] types = new String[] {organization};
6565
6767
6868 NameSample ns = filter.read();
6969
70 Assert.assertEquals(1, ns.getNames().length);
71 Assert.assertEquals(organization, ns.getNames()[0].getType());
70 Assertions.assertEquals(1, ns.getNames().length);
71 Assertions.assertEquals(organization, ns.getNames()[0].getType());
7272
7373 }
7474
7575 @Test
76 public void testMultiFilter() throws IOException {
76 void testMultiFilter() throws IOException {
7777
7878 final String[] types = new String[] {person, organization};
7979
8383
8484 Map<String, List<Span>> collect = Arrays.stream(ns.getNames())
8585 .collect(Collectors.groupingBy(Span::getType));
86 Assert.assertEquals(2, collect.size());
87 Assert.assertEquals(2, collect.get(person).size());
88 Assert.assertEquals(1, collect.get(organization).size());
86 Assertions.assertEquals(2, collect.size());
87 Assertions.assertEquals(2, collect.get(person).size());
88 Assertions.assertEquals(1, collect.get(organization).size());
8989
9090 }
9191
1919 import java.util.Arrays;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.tokenize.WhitespaceTokenizer;
2727 import opennlp.tools.util.Span;
3636 " DMS 45N 123W AKA" +
3737 " +45.1234, -123.12 AKA 45.1234N 123.12W AKA 45 30 N 50 30 W";
3838
39 @Before
40 public void setUp() {
39 @BeforeEach
40 void setUp() {
4141 regexNameFinder = RegexNameFinderFactory.getDefaultRegexNameFinders(
4242 RegexNameFinderFactory.DEFAULT_REGEX_NAME_FINDER.DEGREES_MIN_SEC_LAT_LON,
4343 RegexNameFinderFactory.DEFAULT_REGEX_NAME_FINDER.EMAIL,
4747 }
4848
4949 @Test
50 public void testEmail() throws Exception {
50 void testEmail() {
5151 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
5252 Span[] find = regexNameFinder.find(tokens);
5353 List<Span> spanList = Arrays.asList(find);
54 Assert.assertTrue(spanList.contains(new Span(3, 4, "EMAIL")));
54 Assertions.assertTrue(spanList.contains(new Span(3, 4, "EMAIL")));
5555 Span emailSpan = new Span(3, 4, "EMAIL");
56 Assert.assertEquals("opennlp@gmail.com", tokens[emailSpan.getStart()]);
56 Assertions.assertEquals("opennlp@gmail.com", tokens[emailSpan.getStart()]);
5757 }
5858
5959 @Test
60 public void testPhoneNumber() throws Exception {
60 void testPhoneNumber() {
6161 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
6262 Span[] find = regexNameFinder.find(tokens);
6363 List<Span> spanList = Arrays.asList(find);
6464 Span phoneSpan = new Span(9, 10, "PHONE_NUM");
65 Assert.assertTrue(spanList.contains(phoneSpan));
66 Assert.assertEquals("123-234-5678", tokens[phoneSpan.getStart()]);
65 Assertions.assertTrue(spanList.contains(phoneSpan));
66 Assertions.assertEquals("123-234-5678", tokens[phoneSpan.getStart()]);
6767 }
6868
6969 @Test
70 public void testURL() throws Exception {
70 void testURL() {
7171 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
7272 Span[] find = regexNameFinder.find(tokens);
7373 List<Span> spanList = Arrays.asList(find);
7474 Span urlSpan = new Span(13, 14, "URL");
75 Assert.assertTrue(spanList.contains(urlSpan));
76 Assert.assertEquals("https://www.google.com", tokens[urlSpan.getStart()]);
75 Assertions.assertTrue(spanList.contains(urlSpan));
76 Assertions.assertEquals("https://www.google.com", tokens[urlSpan.getStart()]);
7777 }
7878
7979 @Test
80 public void testLatLong() throws Exception {
80 void testLatLong() {
8181 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
8282 Span[] find = regexNameFinder.find(tokens);
8383 List<Span> spanList = Arrays.asList(find);
8484 Span latLongSpan1 = new Span(22, 24, "DEGREES_MIN_SEC_LAT_LON");
8585 Span latLongSpan2 = new Span(35, 41, "DEGREES_MIN_SEC_LAT_LON");
86 Assert.assertTrue(spanList.contains(latLongSpan1));
87 Assert.assertTrue(spanList.contains(latLongSpan2));
88 Assert.assertEquals("528", tokens[latLongSpan1.getStart()]);
89 Assert.assertEquals("45", tokens[latLongSpan2.getStart()]);
86 Assertions.assertTrue(spanList.contains(latLongSpan1));
87 Assertions.assertTrue(spanList.contains(latLongSpan2));
88 Assertions.assertEquals("528", tokens[latLongSpan1.getStart()]);
89 Assertions.assertEquals("45", tokens[latLongSpan2.getStart()]);
9090 }
9191
9292 @Test
93 public void testMgrs() throws Exception {
93 void testMgrs() {
9494 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
9595 Span[] find = regexNameFinder.find(tokens);
9696 List<Span> spanList = Arrays.asList(find);
9797 Span mgrsSpan1 = new Span(18, 19, "MGRS");
9898 Span mgrsSpan2 = new Span(20, 24, "MGRS");
99 Assert.assertTrue(spanList.contains(mgrsSpan1));
100 Assert.assertTrue(spanList.contains(mgrsSpan2));
101 Assert.assertEquals("11SKU528111".toLowerCase(), tokens[mgrsSpan1.getStart()]);
102 Assert.assertEquals("11S", tokens[mgrsSpan2.getStart()]);
99 Assertions.assertTrue(spanList.contains(mgrsSpan1));
100 Assertions.assertTrue(spanList.contains(mgrsSpan2));
101 Assertions.assertEquals("11SKU528111".toLowerCase(), tokens[mgrsSpan1.getStart()]);
102 Assertions.assertEquals("11S", tokens[mgrsSpan2.getStart()]);
103103 }
104104 }
105105
2020 import java.util.Map;
2121 import java.util.regex.Pattern;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.util.Span;
2727
3131 public class RegexNameFinderTest {
3232
3333 @Test
34 public void testFindSingleTokenPattern() {
34 void testFindSingleTokenPattern() {
3535
3636 Pattern testPattern = Pattern.compile("test");
37 String[] sentence = new String[]{"a", "test", "b", "c"};
37 String[] sentence = new String[] {"a", "test", "b", "c"};
3838
3939
40 Pattern[] patterns = new Pattern[]{testPattern};
40 Pattern[] patterns = new Pattern[] {testPattern};
4141 Map<String, Pattern[]> regexMap = new HashMap<>();
4242 String type = "testtype";
4343
4444 regexMap.put(type, patterns);
4545
4646 RegexNameFinder finder =
47 new RegexNameFinder(regexMap);
47 new RegexNameFinder(regexMap);
4848
4949 Span[] result = finder.find(sentence);
5050
51 Assert.assertTrue(result.length == 1);
51 Assertions.assertTrue(result.length == 1);
5252
53 Assert.assertTrue(result[0].getStart() == 1);
54 Assert.assertTrue(result[0].getEnd() == 2);
53 Assertions.assertTrue(result[0].getStart() == 1);
54 Assertions.assertTrue(result[0].getEnd() == 2);
5555 }
5656
5757 @Test
58 public void testFindTokenizdPattern() {
58 void testFindTokenizdPattern() {
5959 Pattern testPattern = Pattern.compile("[0-9]+ year");
6060
61 String[] sentence = new String[]{"a", "80", "year", "b", "c"};
61 String[] sentence = new String[] {"a", "80", "year", "b", "c"};
6262
63 Pattern[] patterns = new Pattern[]{testPattern};
63 Pattern[] patterns = new Pattern[] {testPattern};
6464 Map<String, Pattern[]> regexMap = new HashMap<>();
6565 String type = "match";
6666
6767 regexMap.put(type, patterns);
6868
6969 RegexNameFinder finder =
70 new RegexNameFinder(regexMap);
70 new RegexNameFinder(regexMap);
7171
7272 Span[] result = finder.find(sentence);
7373
74 Assert.assertTrue(result.length == 1);
74 Assertions.assertTrue(result.length == 1);
7575
76 Assert.assertTrue(result[0].getStart() == 1);
77 Assert.assertTrue(result[0].getEnd() == 3);
78 Assert.assertTrue(result[0].getType().equals("match"));
76 Assertions.assertTrue(result[0].getStart() == 1);
77 Assertions.assertTrue(result[0].getEnd() == 3);
78 Assertions.assertTrue(result[0].getType().equals("match"));
7979 }
8080
8181 @Test
82 public void testFindMatchingPatternWithoutMatchingTokenBounds() {
82 void testFindMatchingPatternWithoutMatchingTokenBounds() {
8383 Pattern testPattern = Pattern.compile("[0-8] year"); // does match "0 year"
8484
85 String[] sentence = new String[]{"a", "80", "year", "c"};
86 Pattern[] patterns = new Pattern[]{testPattern};
85 String[] sentence = new String[] {"a", "80", "year", "c"};
86 Pattern[] patterns = new Pattern[] {testPattern};
8787 Map<String, Pattern[]> regexMap = new HashMap<>();
8888 String type = "testtype";
8989
9393
9494 Span[] result = finder.find(sentence);
9595
96 Assert.assertTrue(result.length == 0);
96 Assertions.assertTrue(result.length == 0);
9797 }
9898 }
2121 import java.util.Collections;
2222 import java.util.Map;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.cmdline.namefind.NameEvaluationErrorListener;
2828 import opennlp.tools.formats.ResourceAsStreamFactory;
3838 private final String TYPE = null;
3939
4040 @Test
41 /*
42 * Test that reproduces jira OPENNLP-463
43 */
44 public void testWithNullResources() throws Exception {
41 /*
42 * Test that reproduces jira OPENNLP-463
43 */
44 void testWithNullResources() throws Exception {
4545
4646 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
4747 "/opennlp/tools/namefind/AnnotatedSentences.txt");
5757 ModelType.MAXENT.toString());
5858
5959 TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng",
60 TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor)null);
60 TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor) null);
6161
6262 cv.evaluate(sampleStream, 2);
6363
64 Assert.assertNotNull(cv.getFMeasure());
64 Assertions.assertNotNull(cv.getFMeasure());
6565 }
6666
6767 @Test
68 /*
69 * Test that tries to reproduce jira OPENNLP-466
70 */
71 public void testWithNameEvaluationErrorListener() throws Exception {
68 /*
69 * Test that tries to reproduce jira OPENNLP-466
70 */
71 void testWithNameEvaluationErrorListener() throws Exception {
7272
7373 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
7474 "/opennlp/tools/namefind/AnnotatedSentences.txt");
9292
9393 cv.evaluate(sampleStream, 2);
9494
95 Assert.assertTrue(out.size() > 0);
96 Assert.assertNotNull(cv.getFMeasure());
95 Assertions.assertTrue(out.size() > 0);
96 Assertions.assertNotNull(cv.getFMeasure());
9797 }
98
99 @Test(expected = InsufficientTrainingDataException.class)
100 public void testWithInsufficientData() throws Exception {
10198
102 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
103 "/opennlp/tools/namefind/AnnotatedSentencesInsufficient.txt");
99 @Test
100 void testWithInsufficientData() {
104101
105 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
106 new PlainTextByLineStream(in, StandardCharsets.ISO_8859_1));
102 Assertions.assertThrows(InsufficientTrainingDataException.class, () -> {
107103
108 TrainingParameters mlParams = new TrainingParameters();
109 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 70);
110 mlParams.put(TrainingParameters.CUTOFF_PARAM, 1);
104 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
105 "/opennlp/tools/namefind/AnnotatedSentencesInsufficient.txt");
111106
112 mlParams.put(TrainingParameters.ALGORITHM_PARAM,
113 ModelType.MAXENT.toString());
107 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
108 new PlainTextByLineStream(in, StandardCharsets.ISO_8859_1));
114109
115 TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng",
116 TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor)null);
110 TrainingParameters mlParams = new TrainingParameters();
111 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 70);
112 mlParams.put(TrainingParameters.CUTOFF_PARAM, 1);
117113
118 cv.evaluate(sampleStream, 2);
114 mlParams.put(TrainingParameters.ALGORITHM_PARAM,
115 ModelType.MAXENT.toString());
116
117 TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng",
118 TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor) null);
119
120 cv.evaluate(sampleStream, 2);
121
122 });
123
119124
120125 }
121
126
122127 }
1919 import java.io.ByteArrayOutputStream;
2020 import java.io.OutputStream;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.cmdline.namefind.NameEvaluationErrorListener;
2626 import opennlp.tools.util.Span;
3535 public class TokenNameFinderEvaluatorTest {
3636
3737
38 /** Return a dummy name finder that always return something expected */
38 /**
39 * Return a dummy name finder that always return something expected
40 */
3941 public TokenNameFinder mockTokenNameFinder(Span[] ret) {
4042 TokenNameFinder mockInstance = mock(TokenNameFinder.class);
4143 when(mockInstance.find(any(String[].class))).thenReturn(ret);
4345 }
4446
4547 @Test
46 public void testPositive() {
48 void testPositive() {
4749 OutputStream stream = new ByteArrayOutputStream();
4850 TokenNameFinderEvaluationMonitor listener = new NameEvaluationErrorListener(stream);
4951
5355
5456 eval.evaluateSample(createSimpleNameSampleA());
5557
56 Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0);
58 Assertions.assertEquals(1.0, eval.getFMeasure().getFMeasure());
5759
58 Assert.assertEquals(0, stream.toString().length());
60 Assertions.assertEquals(0, stream.toString().length());
5961 }
6062
6163 @Test
62 public void testNegative() {
64 void testNegative() {
6365 OutputStream stream = new ByteArrayOutputStream();
6466 TokenNameFinderEvaluationMonitor listener = new NameEvaluationErrorListener(stream);
6567
6971
7072 eval.evaluateSample(createSimpleNameSampleA());
7173
72 Assert.assertEquals(0.8, eval.getFMeasure().getFMeasure(), 0.0);
74 Assertions.assertEquals(0.8, eval.getFMeasure().getFMeasure());
7375
74 Assert.assertNotSame(0, stream.toString().length());
76 Assertions.assertNotSame(0, stream.toString().length());
7577 }
76
7778
7879
7980 private static String[] sentence = {"U", ".", "S", ".", "President", "Barack", "Obama", "is",
8283
8384 private static NameSample createSimpleNameSampleA() {
8485
85 Span[] names = { new Span(0, 4, "Location"), new Span(5, 7, "Person"),
86 new Span(14, 15, "Location") };
86 Span[] names = {new Span(0, 4, "Location"), new Span(5, 7, "Person"),
87 new Span(14, 15, "Location")};
8788
8889 NameSample nameSample;
8990 nameSample = new NameSample(sentence, names, false);
9394
9495 private static NameSample createSimpleNameSampleB() {
9596
96 Span[] names = { new Span(0, 4, "Location"), new Span(14, 15, "Location") };
97 Span[] names = {new Span(0, 4, "Location"), new Span(14, 15, "Location")};
9798
9899 NameSample nameSample;
99100 nameSample = new NameSample(sentence, names, false);
2828 import java.util.Map;
2929 import java.util.stream.Collectors;
3030
31 import org.junit.Assert;
32 import org.junit.Test;
31 import org.junit.jupiter.api.Assertions;
32 import org.junit.jupiter.api.Test;
3333
3434 import opennlp.tools.cmdline.TerminateToolException;
3535 import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
4545 public class TokenNameFinderModelTest {
4646
4747 @Test
48 public void testNERWithPOSModel() throws IOException {
48 void testNERWithPOSModel() throws IOException {
4949
5050 // create a resources folder
5151 Path resourcesFolder = Files.createTempDirectory("resources").toAbsolutePath();
5252
5353 // save a POS model there
5454 POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.MAXENT);
55 File posModelFile = new File(resourcesFolder.toFile(),"pos-model.bin");
55 File posModelFile = new File(resourcesFolder.toFile(), "pos-model.bin");
5656
5757 posModel.serialize(posModelFile);
5858
59 Assert.assertTrue(posModelFile.exists());
59 Assertions.assertTrue(posModelFile.exists());
6060
6161 // load feature generator xml bytes
6262 InputStream fgInputStream = this.getClass().getResourceAsStream("ner-pos-features.xml");
7373 try {
7474 resources = TokenNameFinderTrainerTool.loadResources(resourcesFolder.toFile(),
7575 featureGenerator.toAbsolutePath().toFile());
76 }
77 catch (IOException e) {
76 } catch (IOException e) {
7877 throw new TerminateToolException(-1, e.getMessage(), e);
79 }
80 finally {
78 } finally {
8179 Files.delete(featureGenerator);
8280 }
8381
103101
104102 modelOut.close();
105103
106 Assert.assertTrue(model.exists());
107 }
108 finally {
104 Assertions.assertTrue(model.exists());
105 } finally {
109106 model.delete();
110107 FileUtil.deleteDirectory(resourcesFolder.toFile());
111108 }
1616
1717 package opennlp.tools.ngram;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for {@link NGramCharModel}
2525 public class NGramCharModelTest {
2626
2727 @Test
28 public void testZeroGetCount() throws Exception {
28 void testZeroGetCount() {
2929 NGramCharModel ngramModel = new NGramCharModel();
3030 int count = ngramModel.getCount("");
31 Assert.assertEquals(0, count);
32 Assert.assertEquals(0, ngramModel.size());
31 Assertions.assertEquals(0, count);
32 Assertions.assertEquals(0, ngramModel.size());
3333 }
3434
3535 @Test
36 public void testZeroGetCount2() throws Exception {
36 void testZeroGetCount2() {
3737 NGramCharModel ngramModel = new NGramCharModel();
3838 ngramModel.add("the");
3939 int count = ngramModel.getCount("fox");
40 Assert.assertEquals(0, count);
41 Assert.assertEquals(1, ngramModel.size());
40 Assertions.assertEquals(0, count);
41 Assertions.assertEquals(1, ngramModel.size());
4242 }
4343
4444 @Test
45 public void testAdd() throws Exception {
45 void testAdd() {
4646 NGramCharModel ngramModel = new NGramCharModel();
4747 ngramModel.add("fox");
4848 int count = ngramModel.getCount("the");
49 Assert.assertEquals(0, count);
50 Assert.assertEquals(1, ngramModel.size());
49 Assertions.assertEquals(0, count);
50 Assertions.assertEquals(1, ngramModel.size());
5151 }
5252
5353 @Test
54 public void testAdd1() throws Exception {
54 void testAdd1() {
5555 NGramCharModel ngramModel = new NGramCharModel();
5656 ngramModel.add("the");
5757 int count = ngramModel.getCount("the");
58 Assert.assertEquals(1, count);
59 Assert.assertEquals(1, ngramModel.size());
58 Assertions.assertEquals(1, count);
59 Assertions.assertEquals(1, ngramModel.size());
6060 }
6161
6262 @Test
63 public void testAdd2() throws Exception {
63 void testAdd2() {
6464 NGramCharModel ngramModel = new NGramCharModel();
6565 ngramModel.add("the", 1, 3);
6666 int count = ngramModel.getCount("th");
67 Assert.assertEquals(1, count);
68 Assert.assertEquals(6, ngramModel.size());
67 Assertions.assertEquals(1, count);
68 Assertions.assertEquals(6, ngramModel.size());
6969 }
7070
7171 @Test
72 public void testRemove() throws Exception {
72 void testRemove() {
7373 NGramCharModel ngramModel = new NGramCharModel();
7474 String ngram = "the";
7575 ngramModel.add(ngram);
7676 ngramModel.remove(ngram);
77 Assert.assertEquals(0, ngramModel.size());
77 Assertions.assertEquals(0, ngramModel.size());
7878 }
7979
8080 @Test
81 public void testContains() throws Exception {
81 void testContains() {
8282 NGramCharModel ngramModel = new NGramCharModel();
8383 String token = "the";
8484 ngramModel.add(token);
85 Assert.assertFalse(ngramModel.contains("fox"));
85 Assertions.assertFalse(ngramModel.contains("fox"));
8686 }
8787
8888 @Test
89 public void testContains2() throws Exception {
89 void testContains2() {
9090 NGramCharModel ngramModel = new NGramCharModel();
9191 String token = "the";
9292 ngramModel.add(token, 1, 3);
93 Assert.assertTrue(ngramModel.contains("the"));
93 Assertions.assertTrue(ngramModel.contains("the"));
9494 }
9595
9696
9797 @Test
98 public void testCutoff1() throws Exception {
98 void testCutoff1() {
9999 NGramCharModel ngramModel = new NGramCharModel();
100100 String token = "the";
101101 ngramModel.add(token, 1, 3);
102102 ngramModel.cutoff(2, 4);
103 Assert.assertEquals(0, ngramModel.size());
103 Assertions.assertEquals(0, ngramModel.size());
104104 }
105105 }
2020 import java.util.Arrays;
2121 import java.util.List;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 public class NGramGeneratorTest {
2727
2828 @Test
29 public void generateListTest1() {
29 void generateListTest1() {
3030
3131 final List<String> input = Arrays.asList("This", "is", "a", "sentence");
3232 final int window = 1;
3434
3535 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
3636
37 Assert.assertEquals(4, ngrams.size());
38 Assert.assertEquals("This", ngrams.get(0));
39 Assert.assertEquals("is", ngrams.get(1));
40 Assert.assertEquals("a", ngrams.get(2));
41 Assert.assertEquals("sentence", ngrams.get(3));
37 Assertions.assertEquals(4, ngrams.size());
38 Assertions.assertEquals("This", ngrams.get(0));
39 Assertions.assertEquals("is", ngrams.get(1));
40 Assertions.assertEquals("a", ngrams.get(2));
41 Assertions.assertEquals("sentence", ngrams.get(3));
4242
4343 }
4444
4545 @Test
46 public void generateListTest2() {
46 void generateListTest2() {
4747
4848 final List<String> input = Arrays.asList("This", "is", "a", "sentence");
4949 final int window = 2;
5151
5252 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
5353
54 Assert.assertEquals(3, ngrams.size());
55 Assert.assertEquals("This-is", ngrams.get(0));
56 Assert.assertEquals("is-a", ngrams.get(1));
57 Assert.assertEquals("a-sentence", ngrams.get(2));
54 Assertions.assertEquals(3, ngrams.size());
55 Assertions.assertEquals("This-is", ngrams.get(0));
56 Assertions.assertEquals("is-a", ngrams.get(1));
57 Assertions.assertEquals("a-sentence", ngrams.get(2));
5858
5959 }
6060
6161 @Test
62 public void generateListTest3() {
62 void generateListTest3() {
6363
6464 final List<String> input = Arrays.asList("This", "is", "a", "sentence");
6565 final int window = 3;
6767
6868 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
6969
70 Assert.assertEquals(2, ngrams.size());
71 Assert.assertEquals("This-is-a", ngrams.get(0));
72 Assert.assertEquals("is-a-sentence", ngrams.get(1));
70 Assertions.assertEquals(2, ngrams.size());
71 Assertions.assertEquals("This-is-a", ngrams.get(0));
72 Assertions.assertEquals("is-a-sentence", ngrams.get(1));
7373
7474 }
7575
7676 @Test
77 public void generateListTest4() {
77 void generateListTest4() {
7878
7979 final List<String> input = Arrays.asList("This", "is", "a", "sentence");
8080 final int window = 4;
8282
8383 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
8484
85 Assert.assertEquals(1, ngrams.size());
86 Assert.assertEquals("This-is-a-sentence", ngrams.get(0));
85 Assertions.assertEquals(1, ngrams.size());
86 Assertions.assertEquals("This-is-a-sentence", ngrams.get(0));
8787
8888 }
8989
9090 @Test
91 public void generateCharTest1() {
91 void generateCharTest1() {
9292
9393 final char[] input = "Test".toCharArray();
9494 final int window = 1;
9696
9797 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
9898
99 Assert.assertEquals(4, ngrams.size());
100 Assert.assertEquals("T", ngrams.get(0));
101 Assert.assertEquals("e", ngrams.get(1));
102 Assert.assertEquals("s", ngrams.get(2));
103 Assert.assertEquals("t", ngrams.get(3));
99 Assertions.assertEquals(4, ngrams.size());
100 Assertions.assertEquals("T", ngrams.get(0));
101 Assertions.assertEquals("e", ngrams.get(1));
102 Assertions.assertEquals("s", ngrams.get(2));
103 Assertions.assertEquals("t", ngrams.get(3));
104104
105105 }
106106
107107 @Test
108 public void generateCharTest2() {
108 void generateCharTest2() {
109109
110110 final char[] input = "Test".toCharArray();
111111 final int window = 2;
113113
114114 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
115115
116 Assert.assertEquals(3, ngrams.size());
117 Assert.assertEquals("T-e", ngrams.get(0));
118 Assert.assertEquals("e-s", ngrams.get(1));
119 Assert.assertEquals("s-t", ngrams.get(2));
116 Assertions.assertEquals(3, ngrams.size());
117 Assertions.assertEquals("T-e", ngrams.get(0));
118 Assertions.assertEquals("e-s", ngrams.get(1));
119 Assertions.assertEquals("s-t", ngrams.get(2));
120120
121121 }
122122
123123 @Test
124 public void generateCharTest3() {
124 void generateCharTest3() {
125125
126126 final char[] input = "Test".toCharArray();
127127 final int window = 3;
129129
130130 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
131131
132 Assert.assertEquals(2, ngrams.size());
133 Assert.assertEquals("T-e-s", ngrams.get(0));
134 Assert.assertEquals("e-s-t", ngrams.get(1));
132 Assertions.assertEquals(2, ngrams.size());
133 Assertions.assertEquals("T-e-s", ngrams.get(0));
134 Assertions.assertEquals("e-s-t", ngrams.get(1));
135135
136136 }
137137
138138 @Test
139 public void generateCharTest4() {
139 void generateCharTest4() {
140140
141141 final char[] input = "Test".toCharArray();
142142 final int window = 4;
144144
145145 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
146146
147 Assert.assertEquals(1, ngrams.size());
148 Assert.assertEquals("T-e-s-t", ngrams.get(0));
147 Assertions.assertEquals(1, ngrams.size());
148 Assertions.assertEquals("T-e-s-t", ngrams.get(0));
149149
150150 }
151151
152152 @Test
153 public void generateCharTest() {
153 void generateCharTest() {
154154
155155 final char[] input = "Test again".toCharArray();
156156 final int window = 4;
158158
159159 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
160160
161 Assert.assertEquals(7, ngrams.size());
162 Assert.assertEquals(("T-e-s-t"), ngrams.get(0));
163 Assert.assertEquals(("e-s-t- "), ngrams.get(1));
164 Assert.assertEquals(("s-t- -a"), ngrams.get(2));
165 Assert.assertEquals(("t- -a-g"), ngrams.get(3));
166 Assert.assertEquals((" -a-g-a"), ngrams.get(4));
167 Assert.assertEquals(("a-g-a-i"), ngrams.get(5));
168 Assert.assertEquals(("g-a-i-n"), ngrams.get(6));
161 Assertions.assertEquals(7, ngrams.size());
162 Assertions.assertEquals(("T-e-s-t"), ngrams.get(0));
163 Assertions.assertEquals(("e-s-t- "), ngrams.get(1));
164 Assertions.assertEquals(("s-t- -a"), ngrams.get(2));
165 Assertions.assertEquals(("t- -a-g"), ngrams.get(3));
166 Assertions.assertEquals((" -a-g-a"), ngrams.get(4));
167 Assertions.assertEquals(("a-g-a-i"), ngrams.get(5));
168 Assertions.assertEquals(("g-a-i-n"), ngrams.get(6));
169169
170170 }
171171
172172 @Test
173 public void generateLargerWindowThanListTest() {
174
173 void generateLargerWindowThanListTest() {
174
175175 final List<String> input = Arrays.asList("One", "two");
176176 final int window = 3;
177177 final String separator = "-";
178
178
179179 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
180
181 Assert.assertTrue(ngrams.isEmpty());
182
180
181 Assertions.assertTrue(ngrams.isEmpty());
182
183183 }
184
184
185185 @Test
186 public void emptyTest() {
187
186 void emptyTest() {
187
188188 final List<String> input = new ArrayList<>();
189189 final int window = 2;
190190 final String separator = "-";
191
191
192192 final List<String> ngrams = NGramGenerator.generate(input, window, separator);
193193
194 Assert.assertTrue(ngrams.isEmpty());
195
194 Assertions.assertTrue(ngrams.isEmpty());
195
196196 }
197
197
198198 }
2222 import java.nio.charset.Charset;
2323 import java.nio.charset.StandardCharsets;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.dictionary.Dictionary;
2929 import opennlp.tools.util.InvalidFormatException;
3232 /**
3333 * Tests for {@link opennlp.tools.ngram.NGramModel}
3434 */
35
3536 public class NGramModelTest {
3637
3738 @Test
38 public void testZeroGetCount() throws Exception {
39 void testZeroGetCount() {
3940 NGramModel ngramModel = new NGramModel();
4041 int count = ngramModel.getCount(new StringList(""));
41 Assert.assertEquals(0, count);
42 Assert.assertEquals(0, ngramModel.size());
43 }
44
45 @Test
46 public void testZeroGetCount2() throws Exception {
42 Assertions.assertEquals(0, count);
43 Assertions.assertEquals(0, ngramModel.size());
44 }
45
46 @Test
47 void testZeroGetCount2() {
4748 NGramModel ngramModel = new NGramModel();
4849 ngramModel.add(new StringList("the", "bro", "wn"));
4950 int count = ngramModel.getCount(new StringList("fox"));
50 Assert.assertEquals(0, count);
51 Assert.assertEquals(1, ngramModel.size());
52 }
53
54 @Test
55 public void testAdd() throws Exception {
51 Assertions.assertEquals(0, count);
52 Assertions.assertEquals(1, ngramModel.size());
53 }
54
55 @Test
56 void testAdd() {
5657 NGramModel ngramModel = new NGramModel();
5758 ngramModel.add(new StringList("the", "bro", "wn"));
5859 int count = ngramModel.getCount(new StringList("the"));
59 Assert.assertEquals(0, count);
60 Assert.assertEquals(1, ngramModel.size());
61 }
62
63 @Test
64 public void testAdd1() throws Exception {
60 Assertions.assertEquals(0, count);
61 Assertions.assertEquals(1, ngramModel.size());
62 }
63
64 @Test
65 void testAdd1() {
6566 NGramModel ngramModel = new NGramModel();
6667 ngramModel.add(new StringList("the", "bro", "wn"));
6768 int count = ngramModel.getCount(new StringList("the", "bro", "wn"));
68 Assert.assertEquals(1, count);
69 Assert.assertEquals(1, ngramModel.size());
70 }
71
72 @Test
73 public void testAdd2() throws Exception {
69 Assertions.assertEquals(1, count);
70 Assertions.assertEquals(1, ngramModel.size());
71 }
72
73 @Test
74 void testAdd2() {
7475 NGramModel ngramModel = new NGramModel();
7576 ngramModel.add(new StringList("the", "bro", "wn"), 2, 3);
7677 int count = ngramModel.getCount(new StringList("the", "bro", "wn"));
77 Assert.assertEquals(1, count);
78 Assert.assertEquals(3, ngramModel.size());
79 }
80
81 @Test
82 public void testAdd3() throws Exception {
78 Assertions.assertEquals(1, count);
79 Assertions.assertEquals(3, ngramModel.size());
80 }
81
82 @Test
83 void testAdd3() {
8384 NGramModel ngramModel = new NGramModel();
8485 ngramModel.add(new StringList("the", "brown", "fox"), 2, 3);
8586 int count = ngramModel.getCount(new StringList("the", "brown", "fox"));
86 Assert.assertEquals(1, count);
87 Assertions.assertEquals(1, count);
8788 count = ngramModel.getCount(new StringList("the", "brown"));
88 Assert.assertEquals(1, count);
89 Assertions.assertEquals(1, count);
8990 count = ngramModel.getCount(new StringList("brown", "fox"));
90 Assert.assertEquals(1, count);
91 Assert.assertEquals(3, ngramModel.size());
92 }
93
94 @Test
95 public void testRemove() throws Exception {
91 Assertions.assertEquals(1, count);
92 Assertions.assertEquals(3, ngramModel.size());
93 }
94
95 @Test
96 void testRemove() {
9697 NGramModel ngramModel = new NGramModel();
9798 StringList tokens = new StringList("the", "bro", "wn");
9899 ngramModel.add(tokens);
99100 ngramModel.remove(tokens);
100 Assert.assertEquals(0, ngramModel.size());
101 }
102
103 @Test
104 public void testContains() throws Exception {
101 Assertions.assertEquals(0, ngramModel.size());
102 }
103
104 @Test
105 void testContains() {
105106 NGramModel ngramModel = new NGramModel();
106107 StringList tokens = new StringList("the", "bro", "wn");
107108 ngramModel.add(tokens);
108 Assert.assertFalse(ngramModel.contains(new StringList("the")));
109 }
110
111 @Test
112 public void testContains2() throws Exception {
113 NGramModel ngramModel = new NGramModel();
114 StringList tokens = new StringList("the", "bro", "wn");
115 ngramModel.add(tokens, 1, 3);
116 Assert.assertTrue(ngramModel.contains(new StringList("the")));
117 }
118
119 @Test
120 public void testNumberOfGrams() throws Exception {
121 NGramModel ngramModel = new NGramModel();
122 StringList tokens = new StringList("the", "bro", "wn");
123 ngramModel.add(tokens, 1, 3);
124 Assert.assertEquals(6, ngramModel.numberOfGrams());
125 }
126
127 @Test
128 public void testCutoff1() throws Exception {
109 Assertions.assertFalse(ngramModel.contains(new StringList("the")));
110 }
111
112 @Test
113 void testContains2() {
114 NGramModel ngramModel = new NGramModel();
115 StringList tokens = new StringList("the", "bro", "wn");
116 ngramModel.add(tokens, 1, 3);
117 Assertions.assertTrue(ngramModel.contains(new StringList("the")));
118 }
119
120 @Test
121 void testNumberOfGrams() {
122 NGramModel ngramModel = new NGramModel();
123 StringList tokens = new StringList("the", "bro", "wn");
124 ngramModel.add(tokens, 1, 3);
125 Assertions.assertEquals(6, ngramModel.numberOfGrams());
126 }
127
128 @Test
129 void testCutoff1() {
129130 NGramModel ngramModel = new NGramModel();
130131 StringList tokens = new StringList("the", "brown", "fox", "jumped");
131132 ngramModel.add(tokens, 1, 3);
132133 ngramModel.cutoff(2, 4);
133 Assert.assertEquals(0, ngramModel.size());
134 }
135
136 @Test
137 public void testCutoff2() throws Exception {
134 Assertions.assertEquals(0, ngramModel.size());
135 }
136
137 @Test
138 void testCutoff2() {
138139 NGramModel ngramModel = new NGramModel();
139140 StringList tokens = new StringList("the", "brown", "fox", "jumped");
140141 ngramModel.add(tokens, 1, 3);
141142 ngramModel.cutoff(1, 3);
142 Assert.assertEquals(9, ngramModel.size());
143 }
144
145 @Test
146 public void testToDictionary() throws Exception {
143 Assertions.assertEquals(9, ngramModel.size());
144 }
145
146 @Test
147 void testToDictionary() {
147148 NGramModel ngramModel = new NGramModel();
148149 StringList tokens = new StringList("the", "brown", "fox", "jumped");
149150 ngramModel.add(tokens, 1, 3);
150151 tokens = new StringList("the", "brown", "Fox", "jumped");
151152 ngramModel.add(tokens, 1, 3);
152153 Dictionary dictionary = ngramModel.toDictionary();
153 Assert.assertNotNull(dictionary);
154 Assert.assertEquals(9, dictionary.size());
155 Assert.assertEquals(1, dictionary.getMinTokenCount());
156 Assert.assertEquals(3, dictionary.getMaxTokenCount());
157 }
158
159 @Test
160 public void testToDictionary1() throws Exception {
154 Assertions.assertNotNull(dictionary);
155 Assertions.assertEquals(9, dictionary.size());
156 Assertions.assertEquals(1, dictionary.getMinTokenCount());
157 Assertions.assertEquals(3, dictionary.getMaxTokenCount());
158 }
159
160 @Test
161 void testToDictionary1() {
161162 NGramModel ngramModel = new NGramModel();
162163 StringList tokens = new StringList("the", "brown", "fox", "jumped");
163164 ngramModel.add(tokens, 1, 3);
164165 tokens = new StringList("the", "brown", "Fox", "jumped");
165166 ngramModel.add(tokens, 1, 3);
166167 Dictionary dictionary = ngramModel.toDictionary(true);
167 Assert.assertNotNull(dictionary);
168 Assert.assertEquals(14, dictionary.size());
169 Assert.assertEquals(1, dictionary.getMinTokenCount());
170 Assert.assertEquals(3, dictionary.getMaxTokenCount());
171 }
172
173 @Test(expected = InvalidFormatException.class)
174 public void testInvalidFormat() throws Exception {
175 InputStream stream = new ByteArrayInputStream("inputstring".getBytes(StandardCharsets.UTF_8));
176 NGramModel ngramModel = new NGramModel(stream);
177 stream.close();
178 ngramModel.toDictionary(true);
179 }
180
181 @Test
182 public void testFromFile() throws Exception {
168 Assertions.assertNotNull(dictionary);
169 Assertions.assertEquals(14, dictionary.size());
170 Assertions.assertEquals(1, dictionary.getMinTokenCount());
171 Assertions.assertEquals(3, dictionary.getMaxTokenCount());
172 }
173
174 @Test
175 void testInvalidFormat() {
176 Assertions.assertThrows(InvalidFormatException.class, () -> {
177 InputStream stream = new ByteArrayInputStream("inputstring".getBytes(StandardCharsets.UTF_8));
178 NGramModel ngramModel = new NGramModel(stream);
179 stream.close();
180 ngramModel.toDictionary(true);
181 });
182 }
183
184 @Test
185 void testFromFile() throws Exception {
183186 InputStream stream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml");
184187 NGramModel ngramModel = new NGramModel(stream);
185188 stream.close();
186189 Dictionary dictionary = ngramModel.toDictionary(true);
187 Assert.assertNotNull(dictionary);
188 Assert.assertEquals(14, dictionary.size());
189 Assert.assertEquals(3, dictionary.getMaxTokenCount());
190 Assert.assertEquals(1, dictionary.getMinTokenCount());
191 }
192
193 @Test
194 public void testSerialize() throws Exception {
195
190 Assertions.assertNotNull(dictionary);
191 Assertions.assertEquals(14, dictionary.size());
192 Assertions.assertEquals(3, dictionary.getMaxTokenCount());
193 Assertions.assertEquals(1, dictionary.getMinTokenCount());
194 }
195
196 @Test
197 void testSerialize() throws Exception {
198
196199 InputStream stream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml");
197
200
198201 NGramModel ngramModel1 = new NGramModel(stream);
199202 stream.close();
200
203
201204 Dictionary dictionary = ngramModel1.toDictionary(true);
202 Assert.assertNotNull(dictionary);
203 Assert.assertEquals(14, dictionary.size());
204 Assert.assertEquals(3, dictionary.getMaxTokenCount());
205 Assert.assertEquals(1, dictionary.getMinTokenCount());
206
205 Assertions.assertNotNull(dictionary);
206 Assertions.assertEquals(14, dictionary.size());
207 Assertions.assertEquals(3, dictionary.getMaxTokenCount());
208 Assertions.assertEquals(1, dictionary.getMinTokenCount());
209
207210 ByteArrayOutputStream baos = new ByteArrayOutputStream();
208211 ngramModel1.serialize(baos);
209
212
210213 final String serialized = new String(baos.toByteArray(), Charset.defaultCharset());
211214 InputStream inputStream = new ByteArrayInputStream(serialized.getBytes(StandardCharsets.UTF_8));
212
215
213216 NGramModel ngramModel2 = new NGramModel(inputStream);
214217 stream.close();
215
216 Assert.assertEquals(ngramModel2.numberOfGrams(), ngramModel2.numberOfGrams());
217 Assert.assertEquals(ngramModel2.size(), ngramModel2.size());
218
218
219 Assertions.assertEquals(ngramModel2.numberOfGrams(), ngramModel2.numberOfGrams());
220 Assertions.assertEquals(ngramModel2.size(), ngramModel2.size());
221
219222 dictionary = ngramModel2.toDictionary(true);
220
221 Assert.assertNotNull(dictionary);
222 Assert.assertEquals(14, dictionary.size());
223 Assert.assertEquals(3, dictionary.getMaxTokenCount());
224 Assert.assertEquals(1, dictionary.getMinTokenCount());
225
226 }
227
228 @Test(expected = InvalidFormatException.class)
229 public void testFromInvalidFileMissingCount() throws Exception {
230 InputStream stream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model-no-count.xml");
231 NGramModel ngramModel = new NGramModel(stream);
232 stream.close();
233 ngramModel.toDictionary(true);
234 }
235
236 @Test(expected = InvalidFormatException.class)
237 public void testFromInvalidFileNotANumber() throws Exception {
238 InputStream stream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model-not-a-number.xml");
239 NGramModel ngramModel = new NGramModel(stream);
240 stream.close();
241 ngramModel.toDictionary(true);
223
224 Assertions.assertNotNull(dictionary);
225 Assertions.assertEquals(14, dictionary.size());
226 Assertions.assertEquals(3, dictionary.getMaxTokenCount());
227 Assertions.assertEquals(1, dictionary.getMinTokenCount());
228
229 }
230
231 @Test
232 void testFromInvalidFileMissingCount() {
233 Assertions.assertThrows(InvalidFormatException.class, () -> {
234 InputStream stream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model-no-count.xml");
235 NGramModel ngramModel = new NGramModel(stream);
236 stream.close();
237 ngramModel.toDictionary(true);
238 });
239 }
240
241 @Test
242 void testFromInvalidFileNotANumber() {
243 Assertions.assertThrows(InvalidFormatException.class, () -> {
244 InputStream stream = getClass().getResourceAsStream(
245 "/opennlp/tools/ngram/ngram-model-not-a-number.xml");
246 NGramModel ngramModel = new NGramModel(stream);
247 stream.close();
248 ngramModel.toDictionary(true);
249 });
242250 }
243251
244252 }
1919 import java.util.Collection;
2020 import java.util.LinkedList;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.util.StringList;
2626
3030 public class NGramUtilsTest {
3131
3232 @Test
33 public void testBigramMLProbability() {
33 void testBigramMLProbability() {
3434 Collection<StringList> set = new LinkedList<>();
3535 set.add(new StringList("<s>", "I", "am", "Sam", "</s>"));
3636 set.add(new StringList("<s>", "Sam", "I", "am", "</s>"));
3737 set.add(new StringList("<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>"));
3838 set.add(new StringList(""));
3939 Double d = NGramUtils.calculateBigramMLProbability("<s>", "I", set);
40 Assert.assertEquals(Double.valueOf(0.6666666666666666d), d);
40 Assertions.assertEquals(Double.valueOf(0.6666666666666666d), d);
4141 d = NGramUtils.calculateBigramMLProbability("Sam", "</s>", set);
42 Assert.assertEquals(Double.valueOf(0.5d), d);
42 Assertions.assertEquals(Double.valueOf(0.5d), d);
4343 d = NGramUtils.calculateBigramMLProbability("<s>", "Sam", set);
44 Assert.assertEquals(Double.valueOf(0.3333333333333333d), d);
44 Assertions.assertEquals(Double.valueOf(0.3333333333333333d), d);
4545 }
4646
4747 @Test
48 public void testTrigramMLProbability() {
48 void testTrigramMLProbability() {
4949 Collection<StringList> set = new LinkedList<>();
5050 set.add(new StringList("<s>", "I", "am", "Sam", "</s>"));
5151 set.add(new StringList("<s>", "Sam", "I", "am", "</s>"));
5252 set.add(new StringList("<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>"));
5353 set.add(new StringList(""));
5454 Double d = NGramUtils.calculateTrigramMLProbability("I", "am", "Sam", set);
55 Assert.assertEquals(Double.valueOf(0.5), d);
55 Assertions.assertEquals(Double.valueOf(0.5), d);
5656 d = NGramUtils.calculateTrigramMLProbability("Sam", "I", "am", set);
57 Assert.assertEquals(Double.valueOf(1d), d);
57 Assertions.assertEquals(Double.valueOf(1d), d);
5858 }
5959
6060 @Test
61 public void testNgramMLProbability() {
61 void testNgramMLProbability() {
6262 Collection<StringList> set = new LinkedList<>();
6363 set.add(new StringList("<s>", "I", "am", "Sam", "</s>"));
6464 set.add(new StringList("<s>", "Sam", "I", "am", "</s>"));
6565 set.add(new StringList("<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>"));
6666 set.add(new StringList(""));
6767 Double d = NGramUtils.calculateNgramMLProbability(new StringList("I", "am", "Sam"), set);
68 Assert.assertEquals(Double.valueOf(0.5), d);
68 Assertions.assertEquals(Double.valueOf(0.5), d);
6969 d = NGramUtils.calculateNgramMLProbability(new StringList("Sam", "I", "am"), set);
70 Assert.assertEquals(Double.valueOf(1d), d);
70 Assertions.assertEquals(Double.valueOf(1d), d);
7171 }
7272
7373 @Test
74 public void testLinearInterpolation() throws Exception {
74 void testLinearInterpolation() {
7575 Collection<StringList> set = new LinkedList<>();
7676 set.add(new StringList("the", "green", "book", "STOP"));
7777 set.add(new StringList("my", "blue", "book", "STOP"));
8080 Double lambda = 1d / 3d;
8181 Double d = NGramUtils.calculateTrigramLinearInterpolationProbability("the", "green",
8282 "book", set, lambda, lambda, lambda);
83 Assert.assertNotNull(d);
84 Assert.assertEquals("wrong result", Double.valueOf(0.5714285714285714d), d);
83 Assertions.assertNotNull(d);
84 Assertions.assertEquals(Double.valueOf(0.5714285714285714d), d, "wrong result");
8585 }
8686
8787 @Test
88 public void testLinearInterpolation2() throws Exception {
88 void testLinearInterpolation2() {
8989 Collection<StringList> set = new LinkedList<>();
9090 set.add(new StringList("D", "N", "V", "STOP"));
9191 set.add(new StringList("D", "N", "V", "STOP"));
9292 Double lambda = 1d / 3d;
9393 Double d = NGramUtils.calculateTrigramLinearInterpolationProbability("N", "V",
9494 "STOP", set, lambda, lambda, lambda);
95 Assert.assertNotNull(d);
96 Assert.assertEquals("wrong result", Double.valueOf(0.75d), d);
95 Assertions.assertNotNull(d);
96 Assertions.assertEquals(Double.valueOf(0.75d), d, "wrong result");
9797 }
9898
9999 @Test
100 public void testGetNGrams() throws Exception {
100 void testGetNGrams() {
101101 Collection<StringList> nGrams = NGramUtils.getNGrams(new StringList("I",
102102 "saw", "brown", "fox"), 2);
103 Assert.assertEquals(3, nGrams.size());
103 Assertions.assertEquals(3, nGrams.size());
104104 nGrams = NGramUtils.getNGrams(new StringList("I", "saw", "brown", "fox"), 3);
105 Assert.assertEquals(2, nGrams.size());
105 Assertions.assertEquals(2, nGrams.size());
106106 }
107107 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2223
2324 import opennlp.tools.chunker.ChunkSample;
2425 import opennlp.tools.util.ObjectStream;
2526 import opennlp.tools.util.ObjectStreamUtils;
2627
27 import static org.junit.Assert.assertEquals;
28 import static org.junit.Assert.assertNull;
29
3028 public class ChunkSampleStreamTest {
3129
3230 @Test
33 public void testConvertParseToPosSample() throws IOException {
31 void testConvertParseToPosSample() throws IOException {
3432 try (ObjectStream<ChunkSample> chunkSampleStream = new ChunkSampleStream(new ParseSampleStream(
3533 ObjectStreamUtils.createObjectStream(ParseTest.PARSE_STRING)))) {
3634
3735 ChunkSample sample = chunkSampleStream.read();
3836
39 assertEquals("She", sample.getSentence()[0]);
40 assertEquals("PRP", sample.getTags()[0]);
41 assertEquals("S-NP", sample.getPreds()[0]);
42 assertEquals("was", sample.getSentence()[1]);
43 assertEquals("VBD", sample.getTags()[1]);
44 assertEquals("O", sample.getPreds()[1]);
45 assertEquals("just", sample.getSentence()[2]);
46 assertEquals("RB", sample.getTags()[2]);
47 assertEquals("S-ADVP", sample.getPreds()[2]);
48 assertEquals("another", sample.getSentence()[3]);
49 assertEquals("DT", sample.getTags()[3]);
50 assertEquals("S-NP", sample.getPreds()[3]);
51 assertEquals("freighter", sample.getSentence()[4]);
52 assertEquals("NN", sample.getTags()[4]);
53 assertEquals("C-NP", sample.getPreds()[4]);
54 assertEquals("from", sample.getSentence()[5]);
55 assertEquals("IN", sample.getTags()[5]);
56 assertEquals("O", sample.getPreds()[5]);
57 assertEquals("the", sample.getSentence()[6]);
58 assertEquals("DT", sample.getTags()[6]);
59 assertEquals("S-NP", sample.getPreds()[6]);
60 assertEquals("States", sample.getSentence()[7]);
61 assertEquals("NNPS", sample.getTags()[7]);
62 assertEquals("C-NP", sample.getPreds()[7]);
63 assertEquals(",", sample.getSentence()[8]);
64 assertEquals(",", sample.getTags()[8]);
65 assertEquals("O", sample.getPreds()[8]);
66 assertEquals("and", sample.getSentence()[9]);
67 assertEquals("CC", sample.getTags()[9]);
68 assertEquals("O", sample.getPreds()[9]);
69 assertEquals("she", sample.getSentence()[10]);
70 assertEquals("PRP", sample.getTags()[10]);
71 assertEquals("S-NP", sample.getPreds()[10]);
72 assertEquals("seemed", sample.getSentence()[11]);
73 assertEquals("VBD", sample.getTags()[11]);
74 assertEquals("O", sample.getPreds()[11]);
75 assertEquals("as", sample.getSentence()[12]);
76 assertEquals("RB", sample.getTags()[12]);
77 assertEquals("S-ADJP", sample.getPreds()[12]);
78 assertEquals("commonplace", sample.getSentence()[13]);
79 assertEquals("JJ", sample.getTags()[13]);
80 assertEquals("C-ADJP", sample.getPreds()[13]);
81 assertEquals("as", sample.getSentence()[14]);
82 assertEquals("IN", sample.getTags()[14]);
83 assertEquals("O", sample.getPreds()[14]);
84 assertEquals("her", sample.getSentence()[15]);
85 assertEquals("PRP$", sample.getTags()[15]);
86 assertEquals("S-NP", sample.getPreds()[15]);
87 assertEquals("name", sample.getSentence()[16]);
88 assertEquals("NN", sample.getTags()[16]);
89 assertEquals("C-NP", sample.getPreds()[16]);
90 assertEquals(".", sample.getSentence()[17]);
91 assertEquals(".", sample.getTags()[17]);
92 assertEquals("O", sample.getPreds()[17]);
37 Assertions.assertEquals("She", sample.getSentence()[0]);
38 Assertions.assertEquals("PRP", sample.getTags()[0]);
39 Assertions.assertEquals("S-NP", sample.getPreds()[0]);
40 Assertions.assertEquals("was", sample.getSentence()[1]);
41 Assertions.assertEquals("VBD", sample.getTags()[1]);
42 Assertions.assertEquals("O", sample.getPreds()[1]);
43 Assertions.assertEquals("just", sample.getSentence()[2]);
44 Assertions.assertEquals("RB", sample.getTags()[2]);
45 Assertions.assertEquals("S-ADVP", sample.getPreds()[2]);
46 Assertions.assertEquals("another", sample.getSentence()[3]);
47 Assertions.assertEquals("DT", sample.getTags()[3]);
48 Assertions.assertEquals("S-NP", sample.getPreds()[3]);
49 Assertions.assertEquals("freighter", sample.getSentence()[4]);
50 Assertions.assertEquals("NN", sample.getTags()[4]);
51 Assertions.assertEquals("C-NP", sample.getPreds()[4]);
52 Assertions.assertEquals("from", sample.getSentence()[5]);
53 Assertions.assertEquals("IN", sample.getTags()[5]);
54 Assertions.assertEquals("O", sample.getPreds()[5]);
55 Assertions.assertEquals("the", sample.getSentence()[6]);
56 Assertions.assertEquals("DT", sample.getTags()[6]);
57 Assertions.assertEquals("S-NP", sample.getPreds()[6]);
58 Assertions.assertEquals("States", sample.getSentence()[7]);
59 Assertions.assertEquals("NNPS", sample.getTags()[7]);
60 Assertions.assertEquals("C-NP", sample.getPreds()[7]);
61 Assertions.assertEquals(",", sample.getSentence()[8]);
62 Assertions.assertEquals(",", sample.getTags()[8]);
63 Assertions.assertEquals("O", sample.getPreds()[8]);
64 Assertions.assertEquals("and", sample.getSentence()[9]);
65 Assertions.assertEquals("CC", sample.getTags()[9]);
66 Assertions.assertEquals("O", sample.getPreds()[9]);
67 Assertions.assertEquals("she", sample.getSentence()[10]);
68 Assertions.assertEquals("PRP", sample.getTags()[10]);
69 Assertions.assertEquals("S-NP", sample.getPreds()[10]);
70 Assertions.assertEquals("seemed", sample.getSentence()[11]);
71 Assertions.assertEquals("VBD", sample.getTags()[11]);
72 Assertions.assertEquals("O", sample.getPreds()[11]);
73 Assertions.assertEquals("as", sample.getSentence()[12]);
74 Assertions.assertEquals("RB", sample.getTags()[12]);
75 Assertions.assertEquals("S-ADJP", sample.getPreds()[12]);
76 Assertions.assertEquals("commonplace", sample.getSentence()[13]);
77 Assertions.assertEquals("JJ", sample.getTags()[13]);
78 Assertions.assertEquals("C-ADJP", sample.getPreds()[13]);
79 Assertions.assertEquals("as", sample.getSentence()[14]);
80 Assertions.assertEquals("IN", sample.getTags()[14]);
81 Assertions.assertEquals("O", sample.getPreds()[14]);
82 Assertions.assertEquals("her", sample.getSentence()[15]);
83 Assertions.assertEquals("PRP$", sample.getTags()[15]);
84 Assertions.assertEquals("S-NP", sample.getPreds()[15]);
85 Assertions.assertEquals("name", sample.getSentence()[16]);
86 Assertions.assertEquals("NN", sample.getTags()[16]);
87 Assertions.assertEquals("C-NP", sample.getPreds()[16]);
88 Assertions.assertEquals(".", sample.getSentence()[17]);
89 Assertions.assertEquals(".", sample.getTags()[17]);
90 Assertions.assertEquals("O", sample.getPreds()[17]);
9391
94 assertNull(chunkSampleStream.read());
92 Assertions.assertNull(chunkSampleStream.read());
9593 }
9694 }
9795 }
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.formats.ResourceAsStreamFactory;
2626 import opennlp.tools.util.InputStreamFactory;
3838 }
3939
4040 @Test
41 public void testReadTestStream() throws IOException {
41 void testReadTestStream() throws IOException {
4242 ObjectStream<Parse> parseStream = createParseSampleStream();
43 Assert.assertNotNull(parseStream.read());
44 Assert.assertNotNull(parseStream.read());
45 Assert.assertNotNull(parseStream.read());
46 Assert.assertNotNull(parseStream.read());
47 Assert.assertNull(parseStream.read());
43 Assertions.assertNotNull(parseStream.read());
44 Assertions.assertNotNull(parseStream.read());
45 Assertions.assertNotNull(parseStream.read());
46 Assertions.assertNotNull(parseStream.read());
47 Assertions.assertNull(parseStream.read());
4848 }
4949 }
1616
1717 package opennlp.tools.parser;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link Parse} class.
3131 "(NN name) )))))(. .) ))";
3232
3333 @Test
34 public void testToHashCode() {
34 void testToHashCode() {
3535 Parse p1 = Parse.parseParse(PARSE_STRING);
3636 p1.hashCode();
3737 }
3838
3939 @Test
40 public void testToString() {
40 void testToString() {
4141 Parse p1 = Parse.parseParse(PARSE_STRING);
4242 p1.toString();
4343 }
4444
4545 @Test
46 public void testEquals() {
46 void testEquals() {
4747 Parse p1 = Parse.parseParse(PARSE_STRING);
48 Assert.assertTrue(p1.equals(p1));
48 Assertions.assertTrue(p1.equals(p1));
4949 }
5050
5151 @Test
52 public void testParseClone() {
52 void testParseClone() {
5353 Parse p1 = Parse.parseParse(PARSE_STRING);
5454 Parse p2 = (Parse) p1.clone();
55 Assert.assertTrue(p1.equals(p2));
56 Assert.assertTrue(p2.equals(p1));
55 Assertions.assertTrue(p1.equals(p2));
56 Assertions.assertTrue(p2.equals(p1));
5757 }
5858
5959 @Test
60 public void testGetText() {
60 void testGetText() {
6161 Parse p = Parse.parseParse(PARSE_STRING);
6262
6363 // TODO: Why does parse attaches a space to the end of the text ???
6464 String expectedText = "She was just another freighter from the States , " +
6565 "and she seemed as commonplace as her name . ";
6666
67 Assert.assertEquals(expectedText, p.getText());
67 Assertions.assertEquals(expectedText, p.getText());
6868 }
6969
7070 @Test
71 public void testShow() {
71 void testShow() {
7272 Parse p1 = Parse.parseParse(PARSE_STRING);
7373
7474 StringBuffer parseString = new StringBuffer();
7575 p1.show(parseString);
7676 Parse p2 = Parse.parseParse(parseString.toString());
77 Assert.assertEquals(p1, p2);
77 Assertions.assertEquals(p1, p2);
7878 }
7979
8080 @Test
81 public void testTokenReplacement() {
81 void testTokenReplacement() {
8282 Parse p1 = Parse.parseParse("(TOP (S-CLF (NP-SBJ (PRP It) )(VP (VBD was) " +
8383 " (NP-PRD (NP (DT the) (NN trial) )(PP (IN of) " +
8484 " (NP (NP (NN oleomargarine) (NN heir) )(NP (NNP Minot) " +
9494 p1.show(parseString);
9595
9696 Parse p2 = Parse.parseParse(parseString.toString());
97 Assert.assertEquals(p1, p2);
97 Assertions.assertEquals(p1, p2);
9898 }
9999
100100 @Test
101 public void testGetTagNodes() {
101 void testGetTagNodes() {
102102 Parse p = Parse.parseParse(PARSE_STRING);
103103
104104 Parse[] tags = p.getTagNodes();
105105
106106 for (Parse node : tags) {
107 Assert.assertTrue(node.isPosTag());
107 Assertions.assertTrue(node.isPosTag());
108108 }
109109
110 Assert.assertEquals("PRP", tags[0].getType());
111 Assert.assertEquals("VBD", tags[1].getType());
112 Assert.assertEquals("RB", tags[2].getType());
113 Assert.assertEquals("DT", tags[3].getType());
114 Assert.assertEquals("NN", tags[4].getType());
115 Assert.assertEquals("IN", tags[5].getType());
116 Assert.assertEquals("DT", tags[6].getType());
117 Assert.assertEquals("NNPS", tags[7].getType());
118 Assert.assertEquals(",", tags[8].getType());
119 Assert.assertEquals("CC", tags[9].getType());
120 Assert.assertEquals("PRP", tags[10].getType());
121 Assert.assertEquals("VBD", tags[11].getType());
122 Assert.assertEquals("RB", tags[12].getType());
123 Assert.assertEquals("JJ", tags[13].getType());
124 Assert.assertEquals("IN", tags[14].getType());
125 Assert.assertEquals("PRP$", tags[15].getType());
126 Assert.assertEquals("NN", tags[16].getType());
127 Assert.assertEquals(".", tags[17].getType());
110 Assertions.assertEquals("PRP", tags[0].getType());
111 Assertions.assertEquals("VBD", tags[1].getType());
112 Assertions.assertEquals("RB", tags[2].getType());
113 Assertions.assertEquals("DT", tags[3].getType());
114 Assertions.assertEquals("NN", tags[4].getType());
115 Assertions.assertEquals("IN", tags[5].getType());
116 Assertions.assertEquals("DT", tags[6].getType());
117 Assertions.assertEquals("NNPS", tags[7].getType());
118 Assertions.assertEquals(",", tags[8].getType());
119 Assertions.assertEquals("CC", tags[9].getType());
120 Assertions.assertEquals("PRP", tags[10].getType());
121 Assertions.assertEquals("VBD", tags[11].getType());
122 Assertions.assertEquals("RB", tags[12].getType());
123 Assertions.assertEquals("JJ", tags[13].getType());
124 Assertions.assertEquals("IN", tags[14].getType());
125 Assertions.assertEquals("PRP$", tags[15].getType());
126 Assertions.assertEquals("NN", tags[16].getType());
127 Assertions.assertEquals(".", tags[17].getType());
128128 }
129129 }
2323 import java.io.UnsupportedEncodingException;
2424 import java.nio.charset.StandardCharsets;
2525
26 import org.junit.Assert;
26 import org.junit.jupiter.api.Assertions;
2727
2828 import opennlp.tools.formats.ResourceAsStreamFactory;
2929 import opennlp.tools.parser.lang.en.HeadRules;
7070 samples = new ParseSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
7171 } catch (UnsupportedEncodingException e) {
7272 // Should never happen
73 Assert.fail(e.getMessage());
73 Assertions.fail(e.getMessage());
7474 }
7575 }
7676 };
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.postag.POSSample;
2525 import opennlp.tools.util.ObjectStream;
2828 public class PosSampleStreamTest {
2929
3030 @Test
31 public void testConvertParseToPosSample() throws IOException {
31 void testConvertParseToPosSample() throws IOException {
3232
3333 try (ObjectStream<POSSample> posSampleStream = new PosSampleStream(new ParseSampleStream(
3434 ObjectStreamUtils.createObjectStream(ParseTest.PARSE_STRING)))) {
3535
3636 POSSample sample = posSampleStream.read();
3737
38 Assert.assertEquals("PRP", sample.getTags()[0]);
39 Assert.assertEquals("She", sample.getSentence()[0]);
40 Assert.assertEquals("VBD", sample.getTags()[1]);
41 Assert.assertEquals("was", sample.getSentence()[1]);
42 Assert.assertEquals("RB", sample.getTags()[2]);
43 Assert.assertEquals("just", sample.getSentence()[2]);
44 Assert.assertEquals("DT", sample.getTags()[3]);
45 Assert.assertEquals("another", sample.getSentence()[3]);
46 Assert.assertEquals("NN", sample.getTags()[4]);
47 Assert.assertEquals("freighter", sample.getSentence()[4]);
48 Assert.assertEquals("IN", sample.getTags()[5]);
49 Assert.assertEquals("from", sample.getSentence()[5]);
50 Assert.assertEquals("DT", sample.getTags()[6]);
51 Assert.assertEquals("the", sample.getSentence()[6]);
52 Assert.assertEquals("NNPS", sample.getTags()[7]);
53 Assert.assertEquals("States", sample.getSentence()[7]);
54 Assert.assertEquals(",", sample.getTags()[8]);
55 Assert.assertEquals(",", sample.getSentence()[8]);
56 Assert.assertEquals("CC", sample.getTags()[9]);
57 Assert.assertEquals("and", sample.getSentence()[9]);
58 Assert.assertEquals("PRP", sample.getTags()[10]);
59 Assert.assertEquals("she", sample.getSentence()[10]);
60 Assert.assertEquals("VBD", sample.getTags()[11]);
61 Assert.assertEquals("seemed", sample.getSentence()[11]);
62 Assert.assertEquals("RB", sample.getTags()[12]);
63 Assert.assertEquals("as", sample.getSentence()[12]);
64 Assert.assertEquals("JJ", sample.getTags()[13]);
65 Assert.assertEquals("commonplace", sample.getSentence()[13]);
66 Assert.assertEquals("IN", sample.getTags()[14]);
67 Assert.assertEquals("as", sample.getSentence()[14]);
68 Assert.assertEquals("PRP$", sample.getTags()[15]);
69 Assert.assertEquals("her", sample.getSentence()[15]);
70 Assert.assertEquals("NN", sample.getTags()[16]);
71 Assert.assertEquals("name", sample.getSentence()[16]);
72 Assert.assertEquals(".", sample.getTags()[17]);
73 Assert.assertEquals(".", sample.getSentence()[17]);
38 Assertions.assertEquals("PRP", sample.getTags()[0]);
39 Assertions.assertEquals("She", sample.getSentence()[0]);
40 Assertions.assertEquals("VBD", sample.getTags()[1]);
41 Assertions.assertEquals("was", sample.getSentence()[1]);
42 Assertions.assertEquals("RB", sample.getTags()[2]);
43 Assertions.assertEquals("just", sample.getSentence()[2]);
44 Assertions.assertEquals("DT", sample.getTags()[3]);
45 Assertions.assertEquals("another", sample.getSentence()[3]);
46 Assertions.assertEquals("NN", sample.getTags()[4]);
47 Assertions.assertEquals("freighter", sample.getSentence()[4]);
48 Assertions.assertEquals("IN", sample.getTags()[5]);
49 Assertions.assertEquals("from", sample.getSentence()[5]);
50 Assertions.assertEquals("DT", sample.getTags()[6]);
51 Assertions.assertEquals("the", sample.getSentence()[6]);
52 Assertions.assertEquals("NNPS", sample.getTags()[7]);
53 Assertions.assertEquals("States", sample.getSentence()[7]);
54 Assertions.assertEquals(",", sample.getTags()[8]);
55 Assertions.assertEquals(",", sample.getSentence()[8]);
56 Assertions.assertEquals("CC", sample.getTags()[9]);
57 Assertions.assertEquals("and", sample.getSentence()[9]);
58 Assertions.assertEquals("PRP", sample.getTags()[10]);
59 Assertions.assertEquals("she", sample.getSentence()[10]);
60 Assertions.assertEquals("VBD", sample.getTags()[11]);
61 Assertions.assertEquals("seemed", sample.getSentence()[11]);
62 Assertions.assertEquals("RB", sample.getTags()[12]);
63 Assertions.assertEquals("as", sample.getSentence()[12]);
64 Assertions.assertEquals("JJ", sample.getTags()[13]);
65 Assertions.assertEquals("commonplace", sample.getSentence()[13]);
66 Assertions.assertEquals("IN", sample.getTags()[14]);
67 Assertions.assertEquals("as", sample.getSentence()[14]);
68 Assertions.assertEquals("PRP$", sample.getTags()[15]);
69 Assertions.assertEquals("her", sample.getSentence()[15]);
70 Assertions.assertEquals("NN", sample.getTags()[16]);
71 Assertions.assertEquals("name", sample.getSentence()[16]);
72 Assertions.assertEquals(".", sample.getTags()[17]);
73 Assertions.assertEquals(".", sample.getSentence()[17]);
7474
75 Assert.assertNull(posSampleStream.read());
75 Assertions.assertNull(posSampleStream.read());
7676 }
7777 }
7878 }
1919 import java.io.ByteArrayInputStream;
2020 import java.io.ByteArrayOutputStream;
2121
22 import org.junit.Test;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.parser.HeadRules;
2525 import opennlp.tools.parser.Parse;
3939 * runtime problems.
4040 */
4141 @Test
42 public void testChunkingParserTraining() throws Exception {
42 void testChunkingParserTraining() throws Exception {
4343
4444 ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData();
4545 HeadRules headRules = ParserTestUtil.createTestHeadRules();
6060 model.serialize(outArray);
6161 outArray.close();
6262
63 ParserModel outputModel = new ParserModel(new ByteArrayInputStream(outArray.toByteArray()));
63 ParserModel outputModel = new ParserModel(new ByteArrayInputStream(outArray.toByteArray()));
6464
6565 // TODO: compare both models
6666 }
2424 import java.io.OutputStreamWriter;
2525 import java.nio.charset.StandardCharsets;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 public class HeadRulesTest {
3131
3232 @Test
33 public void testSerialization() throws IOException {
33 void testSerialization() throws IOException {
3434 InputStream headRulesIn =
3535 HeadRulesTest.class.getResourceAsStream("/opennlp/tools/parser/en_head_rules");
3636
4343 HeadRules headRulesRecreated = new HeadRules(new InputStreamReader(
4444 new ByteArrayInputStream(out.toByteArray()), StandardCharsets.UTF_8));
4545
46 Assert.assertEquals(headRulesOrginal, headRulesRecreated);
46 Assertions.assertEquals(headRulesOrginal, headRulesRecreated);
4747 }
4848 }
1919 import java.io.ByteArrayInputStream;
2020 import java.io.ByteArrayOutputStream;
2121
22 import org.junit.Test;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.parser.HeadRules;
2525 import opennlp.tools.parser.Parse;
3838 * runtime problems.
3939 */
4040 @Test
41 public void testTreeInsertParserTraining() throws Exception {
41 void testTreeInsertParserTraining() throws Exception {
4242
4343 ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData();
4444 HeadRules headRules = ParserTestUtil.createTestHeadRules();
1616
1717 package opennlp.tools.postag;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
2323 import opennlp.tools.util.featuregen.TokenFeatureGenerator;
3333
3434 cg.getContext(0, tokens, tags, null);
3535
36 Assert.assertEquals(1, cg.getContext(0, tokens, tags, null).length);
37 Assert.assertEquals("w=a", cg.getContext(0, tokens, tags, null)[0]);
38 Assert.assertEquals("w=b", cg.getContext(1, tokens, tags, null)[0]);
39 Assert.assertEquals("w=c", cg.getContext(2, tokens, tags, null)[0]);
40 Assert.assertEquals("w=d", cg.getContext(3, tokens, tags, null)[0]);
41 Assert.assertEquals("w=e", cg.getContext(4, tokens, tags, null)[0]);
36 Assertions.assertEquals(1, cg.getContext(0, tokens, tags, null).length);
37 Assertions.assertEquals("w=a", cg.getContext(0, tokens, tags, null)[0]);
38 Assertions.assertEquals("w=b", cg.getContext(1, tokens, tags, null)[0]);
39 Assertions.assertEquals("w=c", cg.getContext(2, tokens, tags, null)[0]);
40 Assertions.assertEquals("w=d", cg.getContext(3, tokens, tags, null)[0]);
41 Assertions.assertEquals("w=e", cg.getContext(4, tokens, tags, null)[0]);
4242 }
4343
4444 @Test
45 public void testWithoutCache() {
45 void testWithoutCache() {
4646 testContextGeneration(0);
4747 }
4848
4949 @Test
50 public void testWithCache() {
50 void testWithCache() {
5151 testContextGeneration(3);
5252 }
5353
2727 import java.util.stream.Collectors;
2828 import java.util.stream.IntStream;
2929
30 import org.junit.Assert;
31 import org.junit.BeforeClass;
32 import org.junit.Test;
30 import org.junit.jupiter.api.Assertions;
31 import org.junit.jupiter.api.BeforeAll;
32 import org.junit.jupiter.api.Test;
3333
3434 import opennlp.tools.dictionary.Dictionary;
3535 import opennlp.tools.util.StringList;
3636
3737 /**
38 *
3938 * We encountered a concurrency issue in the pos tagger module in the class
4039 * DefaultPOSContextGenerator.
41
42 The issue is demonstrated in DefaultPOSContextGeneratorTest.java. The test "multithreading()"
43 consistently fails on our system with the current code if the number of threads
44 (NUMBER_OF_THREADS) is set to 10. If the number of threads is set to 1 (effectively disabling
45 multithreading), the test consistently passes.
46
47 We resolved the issue by removing a field in DefaultPOSContextGenerator.java.
48 *
40 * <p>
41 * The issue is demonstrated in DefaultPOSContextGeneratorTest.java. The test "multithreading()"
42 * consistently fails on our system with the current code if the number of threads
43 * (NUMBER_OF_THREADS) is set to 10. If the number of threads is set to 1 (effectively disabling
44 * multithreading), the test consistently passes.
45 * <p>
46 * We resolved the issue by removing a field in DefaultPOSContextGenerator.java.
4947 */
5048
5149
5755 private static DefaultPOSContextGenerator defaultPOSContextGenerator;
5856 private static String[] tags;
5957
60 @BeforeClass
61 public static void setUp() {
58 @BeforeAll
59 static void setUp() {
6260 final String matchingToken = "tokenC";
6361
6462 tokens = new Object[] {"tokenA", "tokenB", matchingToken, "tokenD"};
7472 }
7573
7674 @Test
77 public void noDictionaryMatch() {
75 void noDictionaryMatch() {
7876 int index = 1;
7977
8078 final String[] actual = defaultPOSContextGenerator.getContext(index, tokens, tags);
9896 "nn=tokenD"
9997 };
10098
101 Assert.assertArrayEquals("Calling with not matching index at: " + index +
99 Assertions.assertArrayEquals(expected, actual, "Calling with not matching index at: " + index +
102100 "\nexpected \n" + Arrays.toString(expected) + " but actually was \n"
103 + Arrays.toString(actual), expected, actual);
101 + Arrays.toString(actual));
104102 }
105103
106104 @Test
107 public void dictionaryMatch() {
105 void dictionaryMatch() {
108106 int indexWithDictionaryMatch = 2;
109107
110108 final String[] actual =
121119 "nn=*SE*"
122120 };
123121
124 Assert.assertArrayEquals("Calling with index matching dictionary entry at: "
122 Assertions.assertArrayEquals(expected, actual, "Calling with index matching dictionary entry at: "
125123 + indexWithDictionaryMatch + "\nexpected \n" + Arrays.toString(expected)
126 + " but actually was \n" + Arrays.toString(actual), expected, actual);
124 + " but actually was \n" + Arrays.toString(actual));
127125 }
128126
129127 @Test
130 public void multithreading() {
128 void multithreading() {
131129 Callable<Void> matching = () -> {
132130
133131 dictionaryMatch();
159157 try {
160158 future.get();
161159 } catch (InterruptedException e) {
162 Assert.fail("Interrupted because of: " + e.getCause().getMessage());
160 Assertions.fail("Interrupted because of: " + e.getCause().getMessage());
163161 } catch (ExecutionException ee) {
164 Assert.fail(ee.getCause().getMessage());
162 Assertions.fail(ee.getCause().getMessage());
165163 }
166164
167165 });
168166 } catch (final InterruptedException e) {
169 Assert.fail("Test interrupted");
167 Assertions.fail("Test interrupted");
170168 }
171169 }
172170 }
6767 @Override
6868 public Map<String, Object> createArtifactMap() {
6969 Map<String, Object> artifactMap = super.createArtifactMap();
70 if (this.dict != null)
70 if (this.dict != null) {
7171 artifactMap.put(DUMMY_POSDICT, this.dict);
72 }
7273 return artifactMap;
7374 }
7475
9596 static class DummyPOSSequenceValidator implements SequenceValidator<String> {
9697
9798 public boolean validSequence(int i, String[] inputSequence,
98 String[] outcomesSequence, String outcome) {
99 String[] outcomesSequence, String outcome) {
99100 return true;
100101 }
101102
2121 import java.io.IOException;
2222 import java.io.InputStream;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 /**
2828 * Tests for the {@link POSDictionary} class.
3838
3939 try {
4040 dict.serialize(out);
41 }
42 finally {
41 } finally {
4342 out.close();
4443 }
4544
5251 }
5352
5453 @Test
55 public void testSerialization() throws IOException {
54 void testSerialization() throws IOException {
5655 POSDictionary dictionary = new POSDictionary();
5756
5857 dictionary.put("a", "1", "2", "3");
5958 dictionary.put("b", "4", "5", "6");
6059 dictionary.put("c", "7", "8", "9");
61 dictionary.put("Always", "RB","NNP");
60 dictionary.put("Always", "RB", "NNP");
6261
63 Assert.assertTrue(dictionary.equals(serializeDeserializeDict(dictionary)));
62 Assertions.assertTrue(dictionary.equals(serializeDeserializeDict(dictionary)));
6463 }
6564
6665 @Test
67 public void testLoadingDictionaryWithoutCaseAttribute() throws IOException {
66 void testLoadingDictionaryWithoutCaseAttribute() throws IOException {
6867 POSDictionary dict = loadDictionary("TagDictionaryWithoutCaseAttribute.xml");
6968
70 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("McKinsey"));
71 Assert.assertNull(dict.getTags("Mckinsey"));
69 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("McKinsey"));
70 Assertions.assertNull(dict.getTags("Mckinsey"));
7271 }
7372
7473 @Test
75 public void testCaseSensitiveDictionary() throws IOException {
74 void testCaseSensitiveDictionary() throws IOException {
7675 POSDictionary dict = loadDictionary("TagDictionaryCaseSensitive.xml");
7776
78 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("McKinsey"));
79 Assert.assertNull(dict.getTags("Mckinsey"));
77 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("McKinsey"));
78 Assertions.assertNull(dict.getTags("Mckinsey"));
8079
8180 dict = serializeDeserializeDict(dict);
8281
83 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("McKinsey"));
84 Assert.assertNull(dict.getTags("Mckinsey"));
82 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("McKinsey"));
83 Assertions.assertNull(dict.getTags("Mckinsey"));
8584 }
8685
8786 @Test
88 public void testCaseInsensitiveDictionary() throws IOException {
87 void testCaseInsensitiveDictionary() throws IOException {
8988 POSDictionary dict = loadDictionary("TagDictionaryCaseInsensitive.xml");
9089
91 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("McKinsey"));
92 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("Mckinsey"));
93 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("MCKINSEY"));
94 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("mckinsey"));
90 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("McKinsey"));
91 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("Mckinsey"));
92 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("MCKINSEY"));
93 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("mckinsey"));
9594
9695 dict = serializeDeserializeDict(dict);
9796
98 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("McKinsey"));
99 Assert.assertArrayEquals(new String[]{"NNP"}, dict.getTags("Mckinsey"));
97 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("McKinsey"));
98 Assertions.assertArrayEquals(new String[] {"NNP"}, dict.getTags("Mckinsey"));
10099 }
101100
102101 @Test
103 public void testToString() throws IOException {
102 void testToString() throws IOException {
104103 POSDictionary dict = loadDictionary("TagDictionaryCaseInsensitive.xml");
105 Assert.assertEquals("POSDictionary{size=1, caseSensitive=false}", dict.toString());
104 Assertions.assertEquals("POSDictionary{size=1, caseSensitive=false}", dict.toString());
106105 dict = loadDictionary("TagDictionaryCaseSensitive.xml");
107 Assert.assertEquals("POSDictionary{size=1, caseSensitive=true}", dict.toString());
106 Assertions.assertEquals("POSDictionary{size=1, caseSensitive=true}", dict.toString());
108107 }
109108
110109 @Test
111 public void testEqualsAndHashCode() throws IOException {
110 void testEqualsAndHashCode() throws IOException {
112111 POSDictionary dictA = loadDictionary("TagDictionaryCaseInsensitive.xml");
113112 POSDictionary dictB = loadDictionary("TagDictionaryCaseInsensitive.xml");
114113
115 Assert.assertEquals(dictA, dictB);
116 Assert.assertEquals(dictA.hashCode(), dictB.hashCode());
114 Assertions.assertEquals(dictA, dictB);
115 Assertions.assertEquals(dictA.hashCode(), dictB.hashCode());
117116 }
118117 }
2121 import java.util.Arrays;
2222 import java.util.List;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.cmdline.postag.POSEvaluationErrorListener;
2828 import opennlp.tools.util.InvalidFormatException;
3131 public class POSEvaluatorTest {
3232
3333 @Test
34 public void testPositive() throws InvalidFormatException {
34 void testPositive() throws InvalidFormatException {
3535 OutputStream stream = new ByteArrayOutputStream();
3636 POSTaggerEvaluationMonitor listener = new POSEvaluationErrorListener(stream);
3737
3939 POSSampleTest.createGoldSample()), listener);
4040
4141 eval.evaluateSample(POSSampleTest.createGoldSample());
42 Assert.assertEquals(1.0, eval.getWordAccuracy(), 0.0);
43 Assert.assertEquals(0, stream.toString().length());
42 Assertions.assertEquals(1.0, eval.getWordAccuracy(), 0.0);
43 Assertions.assertEquals(0, stream.toString().length());
4444 }
4545
4646 @Test
47 public void testNegative() throws InvalidFormatException {
47 void testNegative() throws InvalidFormatException {
4848 OutputStream stream = new ByteArrayOutputStream();
4949 POSTaggerEvaluationMonitor listener = new POSEvaluationErrorListener(stream);
5050
5252 new DummyPOSTagger(POSSampleTest.createGoldSample()), listener);
5353
5454 eval.evaluateSample(POSSampleTest.createPredSample());
55 Assert.assertEquals(.7, eval.getWordAccuracy(), .1d);
56 Assert.assertNotSame(0, stream.toString().length());
55 Assertions.assertEquals(.7, eval.getWordAccuracy(), .1d);
56 Assertions.assertNotSame(0, stream.toString().length());
5757 }
5858
5959 class DummyPOSTagger implements POSTagger {
2020 import java.io.ByteArrayOutputStream;
2121 import java.io.IOException;
2222
23 import org.junit.Test;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.util.model.ModelType;
2626
2727 public class POSModelTest {
2828
2929 @Test
30 public void testPOSModelSerializationMaxent() throws IOException {
30 void testPOSModelSerializationMaxent() throws IOException {
3131 POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.MAXENT);
3232
3333 ByteArrayOutputStream out = new ByteArrayOutputStream();
3434
3535 try {
3636 posModel.serialize(out);
37 }
38 finally {
37 } finally {
3938 out.close();
4039 }
4140
4544 }
4645
4746 @Test
48 public void testPOSModelSerializationPerceptron() throws IOException {
47 void testPOSModelSerializationPerceptron() throws IOException {
4948 POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.PERCEPTRON);
5049
5150 ByteArrayOutputStream out = new ByteArrayOutputStream();
5251
5352 try {
5453 posModel.serialize(out);
55 }
56 finally {
54 } finally {
5755 out.close();
5856 }
5957
1616
1717 package opennlp.tools.postag;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 import opennlp.tools.ml.model.Event;
2323 import opennlp.tools.util.ObjectStream;
3333 * expected outcomes.
3434 */
3535 @Test
36 public void testOutcomesForSingleSentence() throws Exception {
36 void testOutcomesForSingleSentence() throws Exception {
3737 String sentence = "That_DT sounds_VBZ good_JJ ._.";
3838
3939 POSSample sample = POSSample.parse(sentence);
4040
4141 try (ObjectStream<Event> eventStream = new POSSampleEventStream(
4242 ObjectStreamUtils.createObjectStream(sample))) {
43 Assert.assertEquals("DT", eventStream.read().getOutcome());
44 Assert.assertEquals("VBZ", eventStream.read().getOutcome());
45 Assert.assertEquals("JJ", eventStream.read().getOutcome());
46 Assert.assertEquals(".", eventStream.read().getOutcome());
47 Assert.assertNull(eventStream.read());
43 Assertions.assertEquals("DT", eventStream.read().getOutcome());
44 Assertions.assertEquals("VBZ", eventStream.read().getOutcome());
45 Assertions.assertEquals("JJ", eventStream.read().getOutcome());
46 Assertions.assertEquals(".", eventStream.read().getOutcome());
47 Assertions.assertNull(eventStream.read());
4848 }
4949 }
5050 }
2525 import java.io.ObjectOutput;
2626 import java.io.ObjectOutputStream;
2727
28 import org.junit.Assert;
29 import org.junit.Test;
28 import org.junit.jupiter.api.Assertions;
29 import org.junit.jupiter.api.Test;
3030
3131 import opennlp.tools.util.InvalidFormatException;
3232
3636 public class POSSampleTest {
3737
3838 @Test
39 public void testEquals() throws InvalidFormatException {
40 Assert.assertFalse(createGoldSample() == createGoldSample());
41 Assert.assertTrue(createGoldSample().equals(createGoldSample()));
42 Assert.assertFalse(createPredSample().equals(createGoldSample()));
43 Assert.assertFalse(createPredSample().equals(new Object()));
39 void testEquals() throws InvalidFormatException {
40 Assertions.assertFalse(createGoldSample() == createGoldSample());
41 Assertions.assertTrue(createGoldSample().equals(createGoldSample()));
42 Assertions.assertFalse(createPredSample().equals(createGoldSample()));
43 Assertions.assertFalse(createPredSample().equals(new Object()));
4444 }
4545
4646 public static POSSample createGoldSample() throws InvalidFormatException {
5656 }
5757
5858 @Test
59 public void testPOSSampleSerDe() throws IOException {
59 void testPOSSampleSerDe() throws IOException {
6060 POSSample posSample = createGoldSample();
6161 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
6262 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
7474 // do nothing
7575 }
7676
77 Assert.assertNotNull(deSerializedPOSSample);
78 Assert.assertArrayEquals(posSample.getAddictionalContext(),
77 Assertions.assertNotNull(deSerializedPOSSample);
78 Assertions.assertArrayEquals(posSample.getAddictionalContext(),
7979 deSerializedPOSSample.getAddictionalContext());
80 Assert.assertArrayEquals(posSample.getSentence(), deSerializedPOSSample.getSentence());
81 Assert.assertArrayEquals(posSample.getTags(), deSerializedPOSSample.getTags());
80 Assertions.assertArrayEquals(posSample.getSentence(), deSerializedPOSSample.getSentence());
81 Assertions.assertArrayEquals(posSample.getTags(), deSerializedPOSSample.getTags());
8282 }
8383
8484 /**
8585 * Tests if it can parse a valid token_tag sentence.
86 *
8786 */
8887 @Test
89 public void testParse() throws InvalidFormatException {
88 void testParse() throws InvalidFormatException {
9089 String sentence = "the_DT stories_NNS about_IN well-heeled_JJ " +
9190 "communities_NNS and_CC developers_NNS";
9291 POSSample sample = POSSample.parse(sentence);
93 Assert.assertEquals(sentence, sample.toString());
92 Assertions.assertEquals(sentence, sample.toString());
9493 }
9594
9695 /**
9796 * Tests if it can parse an empty {@link String}.
9897 */
9998 @Test
100 public void testParseEmptyString() throws InvalidFormatException {
99 void testParseEmptyString() throws InvalidFormatException {
101100 String sentence = "";
102101
103102 POSSample sample = POSSample.parse(sentence);
104103
105 Assert.assertEquals(sample.getSentence().length, 0);
106 Assert.assertEquals(sample.getTags().length, 0);
104 Assertions.assertEquals(sample.getSentence().length, 0);
105 Assertions.assertEquals(sample.getTags().length, 0);
107106 }
108107
109108 /**
110109 * Tests if it can parse an empty token.
111 *
112110 */
113111 @Test
114 public void testParseEmtpyToken() throws InvalidFormatException {
112 void testParseEmtpyToken() throws InvalidFormatException {
115113 String sentence = "the_DT _NNS";
116114 POSSample sample = POSSample.parse(sentence);
117 Assert.assertEquals(sample.getSentence()[1], "");
115 Assertions.assertEquals(sample.getSentence()[1], "");
118116 }
119117
120118 /**
121119 * Tests if it can parse an empty tag.
122 *
123120 */
124121 @Test
125 public void testParseEmtpyTag() throws InvalidFormatException {
122 void testParseEmtpyTag() throws InvalidFormatException {
126123 String sentence = "the_DT stories_";
127124 POSSample sample = POSSample.parse(sentence);
128 Assert.assertEquals(sample.getTags()[1], "");
125 Assertions.assertEquals(sample.getTags()[1], "");
129126 }
130127
131128 /**
133130 * in the sentence.
134131 */
135132 @Test
136 public void testParseWithError() {
133 void testParseWithError() {
137134 String sentence = "the_DT stories";
138135
139136 try {
142139 return;
143140 }
144141
145 Assert.fail();
142 Assertions.fail();
146143 }
147144 }
2121 import java.io.IOException;
2222 import java.nio.charset.StandardCharsets;
2323
24 import org.junit.Assert;
25 import org.junit.Test;
24 import org.junit.jupiter.api.Assertions;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.formats.ResourceAsStreamFactory;
2828 import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSContextGenerator;
5656 }
5757
5858 @Test
59 public void testPOSTaggerWithCustomFactory() throws IOException {
59 void testPOSTaggerWithCustomFactory() throws IOException {
6060 DummyPOSDictionary posDict = new DummyPOSDictionary(
6161 POSDictionary.create(POSDictionaryTest.class
6262 .getResourceAsStream("TagDictionaryCaseSensitive.xml")));
6464 POSModel posModel = trainPOSModel(new DummyPOSTaggerFactory(posDict));
6565
6666 POSTaggerFactory factory = posModel.getFactory();
67 Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
68 Assert.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator);
69 Assert.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator);
67 Assertions.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
68 Assertions.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator);
69 Assertions.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator);
7070
7171 ByteArrayOutputStream out = new ByteArrayOutputStream();
7272 posModel.serialize(out);
7575 POSModel fromSerialized = new POSModel(in);
7676
7777 factory = fromSerialized.getFactory();
78 Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
79 Assert.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator);
80 Assert.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator);
78 Assertions.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
79 Assertions.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator);
80 Assertions.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator);
8181 }
8282
8383 @Test
84 public void testPOSTaggerWithDefaultFactory() throws IOException {
84 void testPOSTaggerWithDefaultFactory() throws IOException {
8585 POSDictionary posDict = POSDictionary.create(POSDictionaryTest.class
86 .getResourceAsStream("TagDictionaryCaseSensitive.xml"));
86 .getResourceAsStream("TagDictionaryCaseSensitive.xml"));
8787 POSModel posModel = trainPOSModel(new POSTaggerFactory(null, null, posDict));
8888
8989 POSTaggerFactory factory = posModel.getFactory();
90 Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary);
91 Assert.assertTrue(factory.getPOSContextGenerator() != null);
92 Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
90 Assertions.assertTrue(factory.getTagDictionary() instanceof POSDictionary);
91 Assertions.assertTrue(factory.getPOSContextGenerator() != null);
92 Assertions.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
9393
9494 ByteArrayOutputStream out = new ByteArrayOutputStream();
9595 posModel.serialize(out);
9898 POSModel fromSerialized = new POSModel(in);
9999
100100 factory = fromSerialized.getFactory();
101 Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary);
102 Assert.assertTrue(factory.getPOSContextGenerator() != null);
103 Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
101 Assertions.assertTrue(factory.getTagDictionary() instanceof POSDictionary);
102 Assertions.assertTrue(factory.getPOSContextGenerator() != null);
103 Assertions.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
104104 }
105105
106 @Test(expected = InvalidFormatException.class)
107 public void testCreateWithInvalidName() throws InvalidFormatException {
108 BaseToolFactory.create("X", null);
106 @Test
107 void testCreateWithInvalidName() {
108 Assertions.assertThrows(InvalidFormatException.class, () -> {
109 BaseToolFactory.create("X", null);
110 });
109111 }
110112
111 @Test(expected = InvalidFormatException.class)
112 public void testCreateWithInvalidName2() throws InvalidFormatException {
113 POSTaggerFactory.create("X", null, null);
113 @Test
114 void testCreateWithInvalidName2() {
115 Assertions.assertThrows(InvalidFormatException.class, () -> {
116 POSTaggerFactory.create("X", null, null);
117 });
114118 }
115119
116 @Test(expected = InvalidFormatException.class)
117 public void testCreateWithHierarchy() throws InvalidFormatException {
118 BaseToolFactory.create(Object.class.getCanonicalName(), null);
120 @Test
121 void testCreateWithHierarchy() {
122 Assertions.assertThrows(InvalidFormatException.class, () -> {
123 BaseToolFactory.create(Object.class.getCanonicalName(), null);
124 });
119125 }
120126
121 @Test(expected = InvalidFormatException.class)
122 public void testCreateWithHierarchy2() throws InvalidFormatException {
123 POSTaggerFactory.create(this.getClass().getCanonicalName(), null, null);
127 @Test
128 void testCreateWithHierarchy2() {
129 Assertions.assertThrows(InvalidFormatException.class, () -> {
130 POSTaggerFactory.create(this.getClass().getCanonicalName(), null, null);
131 });
124132 }
125133 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 public class POSTaggerMEIT {
2525
2626 @Test
27 public void testPOSTagger() throws IOException {
27 void testPOSTagger() throws IOException {
2828
2929 POSTagger tagger = new POSTaggerME("en");
3030
3636 "injured",
3737 "."});
3838
39 Assert.assertEquals(6, tags.length);
40 Assert.assertEquals("DT", tags[0]);
41 Assert.assertEquals("NN", tags[1]);
42 Assert.assertEquals("VBD", tags[2]);
43 Assert.assertEquals("RB", tags[3]);
44 Assert.assertEquals("VBN", tags[4]);
45 Assert.assertEquals(".", tags[5]);
39 Assertions.assertEquals(6, tags.length);
40 Assertions.assertEquals("DT", tags[0]);
41 Assertions.assertEquals("NN", tags[1]);
42 Assertions.assertEquals("VBD", tags[2]);
43 Assertions.assertEquals("RB", tags[3]);
44 Assertions.assertEquals("VBN", tags[4]);
45 Assertions.assertEquals(".", tags[5]);
4646 }
47
47
4848 }
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.formats.ResourceAsStreamFactory;
2626 import opennlp.tools.util.InputStreamFactory;
3333 /**
3434 * Tests for the {@link POSTaggerME} class.
3535 */
36
3637 public class POSTaggerMETest {
3738
3839 private static ObjectStream<POSSample> createSampleStream() throws IOException {
5859 }
5960
6061 @Test
61 public void testPOSTagger() throws IOException {
62 void testPOSTagger() throws IOException {
6263 POSModel posModel = trainPOSModel(ModelType.MAXENT);
6364
6465 POSTagger tagger = new POSTaggerME(posModel);
7172 "injured",
7273 "."});
7374
74 Assert.assertEquals(6, tags.length);
75 Assert.assertEquals("DT", tags[0]);
76 Assert.assertEquals("NN", tags[1]);
77 Assert.assertEquals("VBD", tags[2]);
78 Assert.assertEquals("RB", tags[3]);
79 Assert.assertEquals("VBN", tags[4]);
80 Assert.assertEquals(".", tags[5]);
75 Assertions.assertEquals(6, tags.length);
76 Assertions.assertEquals("DT", tags[0]);
77 Assertions.assertEquals("NN", tags[1]);
78 Assertions.assertEquals("VBD", tags[2]);
79 Assertions.assertEquals("RB", tags[3]);
80 Assertions.assertEquals("VBN", tags[4]);
81 Assertions.assertEquals(".", tags[5]);
8182 }
8283
8384 @Test
84 public void testBuildNGramDictionary() throws IOException {
85 void testBuildNGramDictionary() throws IOException {
8586 ObjectStream<POSSample> samples = createSampleStream();
8687 POSTaggerME.buildNGramDictionary(samples, 0);
8788 }
88
89 @Test(expected = InsufficientTrainingDataException.class)
90 public void insufficientTestData() throws IOException {
9189
92 InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class,
93 "/opennlp/tools/postag/AnnotatedSentencesInsufficient.txt");
90 @Test
91 void insufficientTestData() {
9492
95 ObjectStream<POSSample> stream = new WordTagSampleStream(
96 new PlainTextByLineStream(in, StandardCharsets.UTF_8));
97
98 TrainingParameters params = new TrainingParameters();
99 params.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name());
100 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
101 params.put(TrainingParameters.CUTOFF_PARAM, 5);
93 Assertions.assertThrows(InsufficientTrainingDataException.class, () -> {
10294
103 POSTaggerME.train("eng", stream, params, new POSTaggerFactory());
95 InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class,
96 "/opennlp/tools/postag/AnnotatedSentencesInsufficient.txt");
97
98 ObjectStream<POSSample> stream = new WordTagSampleStream(
99 new PlainTextByLineStream(in, StandardCharsets.UTF_8));
100
101 TrainingParameters params = new TrainingParameters();
102 params.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name());
103 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
104 params.put(TrainingParameters.CUTOFF_PARAM, 5);
105
106 POSTaggerME.train("eng", stream, params, new POSTaggerFactory());
107
108 });
109
104110
105111 }
106
112
107113 }
2020 import java.util.ArrayList;
2121 import java.util.Collection;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.util.CollectionObjectStream;
2727
3131 public class WordTagSampleStreamTest {
3232
3333 @Test
34 public void testParseSimpleSample() throws IOException {
34 void testParseSimpleSample() throws IOException {
3535
3636 Collection<String> sampleString = new ArrayList<>(1);
3737 sampleString.add("This_x1 is_x2 a_x3 test_x4 sentence_x5 ._x6");
3838
3939 try (WordTagSampleStream stream =
40 new WordTagSampleStream(new CollectionObjectStream<>(sampleString))) {
40 new WordTagSampleStream(new CollectionObjectStream<>(sampleString))) {
4141 POSSample sample = stream.read();
4242 String[] words = sample.getSentence();
4343
44 Assert.assertEquals("This", words[0]);
45 Assert.assertEquals("is", words[1]);
46 Assert.assertEquals("a", words[2]);
47 Assert.assertEquals("test", words[3]);
48 Assert.assertEquals("sentence", words[4]);
49 Assert.assertEquals(".", words[5]);
44 Assertions.assertEquals("This", words[0]);
45 Assertions.assertEquals("is", words[1]);
46 Assertions.assertEquals("a", words[2]);
47 Assertions.assertEquals("test", words[3]);
48 Assertions.assertEquals("sentence", words[4]);
49 Assertions.assertEquals(".", words[5]);
5050
5151 String[] tags = sample.getTags();
52 Assert.assertEquals("x1", tags[0]);
53 Assert.assertEquals("x2", tags[1]);
54 Assert.assertEquals("x3", tags[2]);
55 Assert.assertEquals("x4", tags[3]);
56 Assert.assertEquals("x5", tags[4]);
57 Assert.assertEquals("x6", tags[5]);
52 Assertions.assertEquals("x1", tags[0]);
53 Assertions.assertEquals("x2", tags[1]);
54 Assertions.assertEquals("x3", tags[2]);
55 Assertions.assertEquals("x4", tags[3]);
56 Assertions.assertEquals("x5", tags[4]);
57 Assertions.assertEquals("x6", tags[5]);
5858
59 Assert.assertNull(stream.read());
59 Assertions.assertNull(stream.read());
6060 stream.reset();
61 Assert.assertNotNull(stream.read());
61 Assertions.assertNotNull(stream.read());
6262 }
6363 }
6464 }
1818
1919 import java.util.List;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 /**
2525 * Tests for the {@link DefaultEndOfSentenceScanner} class.
2727 public class DefaultEndOfSentenceScannerTest {
2828
2929 @Test
30 public void testScanning() {
30 void testScanning() {
3131 EndOfSentenceScanner scanner = new DefaultEndOfSentenceScanner(
32 new char[]{'.', '!', '?'});
32 new char[] {'.', '!', '?'});
3333
3434 List<Integer> eosPositions =
3535 scanner.getPositions("... um die Wertmarken zu auswählen !?");
3636
37 Assert.assertEquals(0, eosPositions.get(0).intValue());
38 Assert.assertEquals(1, eosPositions.get(1).intValue());
39 Assert.assertEquals(2, eosPositions.get(2).intValue());
37 Assertions.assertEquals(0, eosPositions.get(0).intValue());
38 Assertions.assertEquals(1, eosPositions.get(1).intValue());
39 Assertions.assertEquals(2, eosPositions.get(2).intValue());
4040
41 Assert.assertEquals(35, eosPositions.get(3).intValue());
42 Assert.assertEquals(36, eosPositions.get(4).intValue());
41 Assertions.assertEquals(35, eosPositions.get(3).intValue());
42 Assertions.assertEquals(36, eosPositions.get(4).intValue());
4343 }
4444
4545 }
2020 import java.util.Collections;
2121 import java.util.HashSet;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.sentdetect.lang.Factory;
2727
2828 public class DefaultSDContextGeneratorTest {
2929
3030 @Test
31 public void testGetContext() throws Exception {
31 void testGetContext() {
3232 SDContextGenerator sdContextGenerator =
3333 new DefaultSDContextGenerator(Collections.<String>emptySet(), Factory.defaultEosCharacters);
3434
3535 String[] context = sdContextGenerator.getContext(
3636 "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2);
37 Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/v=/s=/n=Smith/ncap".split("/"), context);
37 Assertions.assertArrayEquals("sn/eos=./x=Mr/2/xcap/v=/s=/n=Smith/ncap".split("/"), context);
3838
3939 context = sdContextGenerator.getContext(
4040 "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29);
41 Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
41 Assertions.assertArrayEquals("sn/eos=./x=Inc/3/xcap/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
4242 }
4343
4444 @Test
45 public void testGetContextWithAbbreviations() throws Exception {
45 void testGetContextWithAbbreviations() {
4646 SDContextGenerator sdContextGenerator =
4747 new DefaultSDContextGenerator(new HashSet<>(Arrays.asList("Mr./Inc.".split("/"))),
4848 Factory.defaultEosCharacters);
4949
5050 String[] context = sdContextGenerator.getContext(
5151 "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2);
52 Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/xabbrev/v=/s=/n=Smith/ncap".split("/"), context);
52 Assertions.assertArrayEquals("sn/eos=./x=Mr/2/xcap/xabbrev/v=/s=/n=Smith/ncap".split("/"), context);
5353
5454 context = sdContextGenerator.getContext(
5555 "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29);
56 Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/xabbrev/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
56 Assertions.assertArrayEquals("sn/eos=./x=Inc/3/xcap/xabbrev/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
5757 }
5858 }
1616
1717 package opennlp.tools.sentdetect;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link NewlineSentenceDetector} class.
3030
3131 String[] results = sd.sentDetect(sentences);
3232
33 Assert.assertEquals(3, results.length);
34 Assert.assertEquals("one.", results[0]);
35 Assert.assertEquals("two.", results[1]);
36 Assert.assertEquals("three.", results[2]);
33 Assertions.assertEquals(3, results.length);
34 Assertions.assertEquals("one.", results[0]);
35 Assertions.assertEquals("two.", results[1]);
36 Assertions.assertEquals("three.", results[2]);
3737 }
3838
3939 @Test
40 public void testNewlineCr() {
40 void testNewlineCr() {
4141 testSentenceValues("one.\rtwo. \r\r three.\r");
4242 }
4343
4444 @Test
45 public void testNewlineLf() {
45 void testNewlineLf() {
4646 testSentenceValues("one.\ntwo. \n\n three.\n");
4747 }
4848
4949 @Test
50 public void testNewlineCrLf() {
50 void testNewlineCrLf() {
5151 testSentenceValues("one.\r\ntwo. \r\n\r\n three.\r\n");
5252 }
5353 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.ml.model.Event;
2525 import opennlp.tools.sentdetect.lang.Factory;
3333 public class SDEventStreamTest {
3434
3535 @Test
36 public void testEventOutcomes() throws IOException {
36 void testEventOutcomes() throws IOException {
3737 // Sample with two sentences
3838 SentenceSample sample = new SentenceSample("Test sent. one. Test sent. 2?",
3939 new Span(0, 15), new Span(16, 29));
4747 factory.createSentenceContextGenerator("eng"),
4848 factory.createEndOfSentenceScanner("eng"));
4949
50 Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome());
51 Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome());
52 Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome());
53 Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome());
50 Assertions.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome());
51 Assertions.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome());
52 Assertions.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome());
53 Assertions.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome());
5454
55 Assert.assertNull(eventStream.read());
55 Assertions.assertNull(eventStream.read());
5656 }
5757 }
1919 import java.io.ByteArrayOutputStream;
2020 import java.io.OutputStream;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.cmdline.sentdetect.SentenceEvaluationErrorListener;
26 import opennlp.tools.util.InvalidFormatException;
2726 import opennlp.tools.util.Span;
2827
2928 public class SentenceDetectorEvaluatorTest {
3029
3130 @Test
32 public void testPositive() throws InvalidFormatException {
31 void testPositive() {
3332 OutputStream stream = new ByteArrayOutputStream();
3433 SentenceDetectorEvaluationMonitor listener = new SentenceEvaluationErrorListener(stream);
3534
3837
3938 eval.evaluateSample(SentenceSampleTest.createGoldSample());
4039
41 Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0);
40 Assertions.assertEquals(1.0, eval.getFMeasure().getFMeasure());
4241
43 Assert.assertEquals(0, stream.toString().length());
42 Assertions.assertEquals(0, stream.toString().length());
4443 }
4544
4645 @Test
47 public void testNegative() throws InvalidFormatException {
46 void testNegative() {
4847 OutputStream stream = new ByteArrayOutputStream();
4948 SentenceDetectorEvaluationMonitor listener = new SentenceEvaluationErrorListener(stream);
5049
5352
5453 eval.evaluateSample(SentenceSampleTest.createPredSample());
5554
56 Assert.assertEquals(-1.0, eval.getFMeasure().getFMeasure(), .1d);
55 Assertions.assertEquals(eval.getFMeasure().getFMeasure(), -1.0, .1d);
5756
58 Assert.assertNotSame(0, stream.toString().length());
57 Assertions.assertNotSame(0, stream.toString().length());
5958 }
6059
6160
6261 /**
6362 * a dummy sentence detector that always return something expected
6463 */
65 class DummySD implements SentenceDetector {
64 public class DummySD implements SentenceDetector {
6665
6766 private SentenceSample sample;
6867
2323 import java.nio.charset.StandardCharsets;
2424 import java.util.Arrays;
2525
26 import org.junit.Assert;
27 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.tools.dictionary.Dictionary;
3030 import opennlp.tools.formats.ResourceAsStreamFactory;
6666 }
6767
6868 @Test
69 public void testDefault() throws IOException {
69 void testDefault() throws IOException {
7070
7171 Dictionary dic = loadAbbDictionary();
7272
7575 eos));
7676
7777 SentenceDetectorFactory factory = sdModel.getFactory();
78 Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
79 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
80 Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
78 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
79 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
80 Assertions.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
8181
8282 ByteArrayOutputStream out = new ByteArrayOutputStream();
8383 sdModel.serialize(out);
8686 SentenceModel fromSerialized = new SentenceModel(in);
8787
8888 factory = fromSerialized.getFactory();
89 Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
90 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
91 Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
89 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
90 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
91 Assertions.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
9292 }
9393
9494 @Test
95 public void testNullDict() throws IOException {
95 void testNullDict() throws IOException {
9696 Dictionary dic = null;
9797
9898 char[] eos = {'.', '?'};
100100 dic, eos));
101101
102102 SentenceDetectorFactory factory = sdModel.getFactory();
103 Assert.assertNull(factory.getAbbreviationDictionary());
104 Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
105 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
106 Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
103 Assertions.assertNull(factory.getAbbreviationDictionary());
104 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
105 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
106 Assertions.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
107107
108108 ByteArrayOutputStream out = new ByteArrayOutputStream();
109109 sdModel.serialize(out);
112112 SentenceModel fromSerialized = new SentenceModel(in);
113113
114114 factory = fromSerialized.getFactory();
115 Assert.assertNull(factory.getAbbreviationDictionary());
116 Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
117 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
118 Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
115 Assertions.assertNull(factory.getAbbreviationDictionary());
116 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
117 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
118 Assertions.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
119119 }
120120
121121 @Test
122 public void testDefaultEOS() throws IOException {
122 void testDefaultEOS() throws IOException {
123123 Dictionary dic = null;
124124
125125 char[] eos = null;
127127 dic, eos));
128128
129129 SentenceDetectorFactory factory = sdModel.getFactory();
130 Assert.assertNull(factory.getAbbreviationDictionary());
131 Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
132 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
133 Assert.assertTrue(Arrays.equals(Factory.defaultEosCharacters,
130 Assertions.assertNull(factory.getAbbreviationDictionary());
131 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
132 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
133 Assertions.assertTrue(Arrays.equals(Factory.defaultEosCharacters,
134134 factory.getEOSCharacters()));
135135
136136 ByteArrayOutputStream out = new ByteArrayOutputStream();
140140 SentenceModel fromSerialized = new SentenceModel(in);
141141
142142 factory = fromSerialized.getFactory();
143 Assert.assertNull(factory.getAbbreviationDictionary());
144 Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
145 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
146 Assert.assertTrue(Arrays.equals(Factory.defaultEosCharacters,
143 Assertions.assertNull(factory.getAbbreviationDictionary());
144 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator);
145 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner);
146 Assertions.assertTrue(Arrays.equals(Factory.defaultEosCharacters,
147147 factory.getEOSCharacters()));
148148 }
149149
150150 @Test
151 public void testDummyFactory() throws IOException {
151 void testDummyFactory() throws IOException {
152152
153153 Dictionary dic = loadAbbDictionary();
154154
157157 dic, eos));
158158
159159 SentenceDetectorFactory factory = sdModel.getFactory();
160 Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
161 Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator);
162 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner);
163 Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
160 Assertions.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
161 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator);
162 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner);
163 Assertions.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
164164
165165 ByteArrayOutputStream out = new ByteArrayOutputStream();
166166 sdModel.serialize(out);
169169 SentenceModel fromSerialized = new SentenceModel(in);
170170
171171 factory = fromSerialized.getFactory();
172 Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
173 Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator);
174 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner);
175 Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
172 Assertions.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
173 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator);
174 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner);
175 Assertions.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
176176
177 Assert.assertEquals(factory.getAbbreviationDictionary(), sdModel.getAbbreviations());
178 Assert.assertTrue(Arrays.equals(factory.getEOSCharacters(), sdModel.getEosCharacters()));
177 Assertions.assertEquals(factory.getAbbreviationDictionary(), sdModel.getAbbreviations());
178 Assertions.assertTrue(Arrays.equals(factory.getEOSCharacters(), sdModel.getEosCharacters()));
179179 }
180180
181181 @Test
182 public void testCreateDummyFactory() throws IOException {
182 void testCreateDummyFactory() throws IOException {
183183 Dictionary dic = loadAbbDictionary();
184184 char[] eos = {'.', '?'};
185185
187187 DummySentenceDetectorFactory.class.getCanonicalName(), "spa", false,
188188 dic, eos);
189189
190 Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
191 Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator);
192 Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner);
193 Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
190 Assertions.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
191 Assertions.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator);
192 Assertions.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner);
193 Assertions.assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
194194 }
195195
196196 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.util.Span;
2525
2626 public class SentenceDetectorMEIT {
2727
2828 @Test
29 public void testSentenceDetectorDownloadModel() throws IOException {
29 void testSentenceDetectorDownloadModel() throws IOException {
3030
3131 SentenceDetectorME sentDetect = new SentenceDetectorME("en");
3232
3333 // Tests sentence detector with sentDetect method
3434 String sampleSentences1 = "This is a test. There are many tests, this is the second.";
3535 String[] sents = sentDetect.sentDetect(sampleSentences1);
36 Assert.assertEquals(sents.length,2);
37 Assert.assertEquals(sents[0],"This is a test.");
38 Assert.assertEquals(sents[1],"There are many tests, this is the second.");
36 Assertions.assertEquals(sents.length, 2);
37 Assertions.assertEquals(sents[0], "This is a test.");
38 Assertions.assertEquals(sents[1], "There are many tests, this is the second.");
3939 double[] probs = sentDetect.getSentenceProbabilities();
40 Assert.assertEquals(probs.length,2);
40 Assertions.assertEquals(probs.length, 2);
4141
4242 String sampleSentences2 = "This is a test. There are many tests, this is the second";
4343 sents = sentDetect.sentDetect(sampleSentences2);
44 Assert.assertEquals(sents.length,2);
44 Assertions.assertEquals(sents.length, 2);
4545 probs = sentDetect.getSentenceProbabilities();
46 Assert.assertEquals(probs.length,2);
47 Assert.assertEquals(sents[0],"This is a test.");
48 Assert.assertEquals(sents[1],"There are many tests, this is the second");
46 Assertions.assertEquals(probs.length, 2);
47 Assertions.assertEquals(sents[0], "This is a test.");
48 Assertions.assertEquals(sents[1], "There are many tests, this is the second");
4949
5050 String sampleSentences3 = "This is a \"test\". He said \"There are many tests, this is the second.\"";
5151 sents = sentDetect.sentDetect(sampleSentences3);
52 Assert.assertEquals(sents.length,2);
52 Assertions.assertEquals(sents.length, 2);
5353 probs = sentDetect.getSentenceProbabilities();
54 Assert.assertEquals(probs.length,2);
55 Assert.assertEquals(sents[0],"This is a \"test\".");
56 Assert.assertEquals(sents[1],"He said \"There are many tests, this is the second.\"");
54 Assertions.assertEquals(probs.length, 2);
55 Assertions.assertEquals(sents[0], "This is a \"test\".");
56 Assertions.assertEquals(sents[1], "He said \"There are many tests, this is the second.\"");
5757
5858 String sampleSentences4 = "This is a \"test\". I said \"This is a test.\" Any questions?";
5959 sents = sentDetect.sentDetect(sampleSentences4);
60 Assert.assertEquals(sents.length,3);
60 Assertions.assertEquals(sents.length, 3);
6161 probs = sentDetect.getSentenceProbabilities();
62 Assert.assertEquals(probs.length,3);
63 Assert.assertEquals(sents[0],"This is a \"test\".");
64 Assert.assertEquals(sents[1],"I said \"This is a test.\"");
65 Assert.assertEquals(sents[2],"Any questions?");
62 Assertions.assertEquals(probs.length, 3);
63 Assertions.assertEquals(sents[0], "This is a \"test\".");
64 Assertions.assertEquals(sents[1], "I said \"This is a test.\"");
65 Assertions.assertEquals(sents[2], "Any questions?");
6666
6767 String sampleSentences5 = "This is a one sentence test space at the end. ";
6868 sents = sentDetect.sentDetect(sampleSentences5);
69 Assert.assertEquals(1, sentDetect.getSentenceProbabilities().length);
70 Assert.assertEquals(sents[0],"This is a one sentence test space at the end.");
69 Assertions.assertEquals(1, sentDetect.getSentenceProbabilities().length);
70 Assertions.assertEquals(sents[0], "This is a one sentence test space at the end.");
7171
7272 String sampleSentences6 = "This is a one sentences test with tab at the end. ";
7373 sents = sentDetect.sentDetect(sampleSentences6);
74 Assert.assertEquals(sents[0],"This is a one sentences test with tab at the end.");
74 Assertions.assertEquals(sents[0], "This is a one sentences test with tab at the end.");
7575
7676 String sampleSentences7 = "This is a test. With spaces between the two sentences.";
7777 sents = sentDetect.sentDetect(sampleSentences7);
78 Assert.assertEquals(sents[0],"This is a test.");
79 Assert.assertEquals(sents[1],"With spaces between the two sentences.");
78 Assertions.assertEquals(sents[0], "This is a test.");
79 Assertions.assertEquals(sents[1], "With spaces between the two sentences.");
8080
8181 String sampleSentences9 = "";
8282 sents = sentDetect.sentDetect(sampleSentences9);
83 Assert.assertEquals(0, sents.length);
83 Assertions.assertEquals(0, sents.length);
8484
8585 String sampleSentences10 = " "; // whitespaces and tabs
8686 sents = sentDetect.sentDetect(sampleSentences10);
87 Assert.assertEquals(0, sents.length);
87 Assertions.assertEquals(0, sents.length);
8888
8989 String sampleSentences11 = "This is test sentence without a dot at the end and spaces ";
9090 sents = sentDetect.sentDetect(sampleSentences11);
91 Assert.assertEquals(sents[0],"This is test sentence without a dot at the end and spaces");
91 Assertions.assertEquals(sents[0], "This is test sentence without a dot at the end and spaces");
9292 probs = sentDetect.getSentenceProbabilities();
93 Assert.assertEquals(1, probs.length);
93 Assertions.assertEquals(1, probs.length);
9494
9595 String sampleSentence12 = " This is a test.";
9696 sents = sentDetect.sentDetect(sampleSentence12);
97 Assert.assertEquals(sents[0],"This is a test.");
97 Assertions.assertEquals(sents[0], "This is a test.");
9898
9999 String sampleSentence13 = " This is a test";
100100 sents = sentDetect.sentDetect(sampleSentence13);
101 Assert.assertEquals(sents[0],"This is a test");
101 Assertions.assertEquals(sents[0], "This is a test");
102102
103103 // Test that sentPosDetect also works
104104 Span[] pos = sentDetect.sentPosDetect(sampleSentences2);
105 Assert.assertEquals(pos.length,2);
105 Assertions.assertEquals(pos.length, 2);
106106 probs = sentDetect.getSentenceProbabilities();
107 Assert.assertEquals(probs.length,2);
108 Assert.assertEquals(new Span(0, 15), pos[0]);
109 Assert.assertEquals(new Span(16, 56), pos[1]);
107 Assertions.assertEquals(probs.length, 2);
108 Assertions.assertEquals(new Span(0, 15), pos[0]);
109 Assertions.assertEquals(new Span(16, 56), pos[1]);
110110
111111 }
112
112
113113 }
2020 import java.io.IOException;
2121 import java.nio.charset.StandardCharsets;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.formats.ResourceAsStreamFactory;
2727 import opennlp.tools.util.InputStreamFactory;
3333 /**
3434 * Tests for the {@link SentenceDetectorME} class.
3535 */
36
3637 public class SentenceDetectorMETest {
3738
3839 @Test
39 public void testSentenceDetector() throws IOException {
40 void testSentenceDetector() throws IOException {
4041
4142 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
4243 "/opennlp/tools/sentdetect/Sentences.txt");
5152 "eng", new SentenceSampleStream(new PlainTextByLineStream(in,
5253 StandardCharsets.UTF_8)), factory, mlParams);
5354
54 Assert.assertEquals("eng", sentdetectModel.getLanguage());
55 Assertions.assertEquals("eng", sentdetectModel.getLanguage());
5556
5657 SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);
5758
5859 // Tests sentence detector with sentDetect method
5960 String sampleSentences1 = "This is a test. There are many tests, this is the second.";
6061 String[] sents = sentDetect.sentDetect(sampleSentences1);
61 Assert.assertEquals(sents.length,2);
62 Assert.assertEquals(sents[0],"This is a test.");
63 Assert.assertEquals(sents[1],"There are many tests, this is the second.");
62 Assertions.assertEquals(sents.length, 2);
63 Assertions.assertEquals(sents[0], "This is a test.");
64 Assertions.assertEquals(sents[1], "There are many tests, this is the second.");
6465 double[] probs = sentDetect.getSentenceProbabilities();
65 Assert.assertEquals(probs.length,2);
66 Assertions.assertEquals(probs.length, 2);
6667
6768 String sampleSentences2 = "This is a test. There are many tests, this is the second";
6869 sents = sentDetect.sentDetect(sampleSentences2);
69 Assert.assertEquals(sents.length,2);
70 Assertions.assertEquals(sents.length, 2);
7071 probs = sentDetect.getSentenceProbabilities();
71 Assert.assertEquals(probs.length,2);
72 Assert.assertEquals(sents[0],"This is a test.");
73 Assert.assertEquals(sents[1],"There are many tests, this is the second");
72 Assertions.assertEquals(probs.length, 2);
73 Assertions.assertEquals(sents[0], "This is a test.");
74 Assertions.assertEquals(sents[1], "There are many tests, this is the second");
7475
7576 String sampleSentences3 = "This is a \"test\". He said \"There are many tests, this is the second.\"";
7677 sents = sentDetect.sentDetect(sampleSentences3);
77 Assert.assertEquals(sents.length,2);
78 Assertions.assertEquals(sents.length, 2);
7879 probs = sentDetect.getSentenceProbabilities();
79 Assert.assertEquals(probs.length,2);
80 Assert.assertEquals(sents[0],"This is a \"test\".");
81 Assert.assertEquals(sents[1],"He said \"There are many tests, this is the second.\"");
80 Assertions.assertEquals(probs.length, 2);
81 Assertions.assertEquals(sents[0], "This is a \"test\".");
82 Assertions.assertEquals(sents[1], "He said \"There are many tests, this is the second.\"");
8283
8384 String sampleSentences4 = "This is a \"test\". I said \"This is a test.\" Any questions?";
8485 sents = sentDetect.sentDetect(sampleSentences4);
85 Assert.assertEquals(sents.length,3);
86 Assertions.assertEquals(sents.length, 3);
8687 probs = sentDetect.getSentenceProbabilities();
87 Assert.assertEquals(probs.length,3);
88 Assert.assertEquals(sents[0],"This is a \"test\".");
89 Assert.assertEquals(sents[1],"I said \"This is a test.\"");
90 Assert.assertEquals(sents[2],"Any questions?");
88 Assertions.assertEquals(probs.length, 3);
89 Assertions.assertEquals(sents[0], "This is a \"test\".");
90 Assertions.assertEquals(sents[1], "I said \"This is a test.\"");
91 Assertions.assertEquals(sents[2], "Any questions?");
9192
9293 String sampleSentences5 = "This is a one sentence test space at the end. ";
9394 sents = sentDetect.sentDetect(sampleSentences5);
94 Assert.assertEquals(1, sentDetect.getSentenceProbabilities().length);
95 Assert.assertEquals(sents[0],"This is a one sentence test space at the end.");
95 Assertions.assertEquals(1, sentDetect.getSentenceProbabilities().length);
96 Assertions.assertEquals(sents[0], "This is a one sentence test space at the end.");
9697
9798 String sampleSentences6 = "This is a one sentences test with tab at the end. ";
9899 sents = sentDetect.sentDetect(sampleSentences6);
99 Assert.assertEquals(sents[0],"This is a one sentences test with tab at the end.");
100 Assertions.assertEquals(sents[0], "This is a one sentences test with tab at the end.");
100101
101102 String sampleSentences7 = "This is a test. With spaces between the two sentences.";
102103 sents = sentDetect.sentDetect(sampleSentences7);
103 Assert.assertEquals(sents[0],"This is a test.");
104 Assert.assertEquals(sents[1],"With spaces between the two sentences.");
104 Assertions.assertEquals(sents[0], "This is a test.");
105 Assertions.assertEquals(sents[1], "With spaces between the two sentences.");
105106
106107 String sampleSentences9 = "";
107108 sents = sentDetect.sentDetect(sampleSentences9);
108 Assert.assertEquals(0, sents.length);
109 Assertions.assertEquals(0, sents.length);
109110
110111 String sampleSentences10 = " "; // whitespaces and tabs
111112 sents = sentDetect.sentDetect(sampleSentences10);
112 Assert.assertEquals(0, sents.length);
113 Assertions.assertEquals(0, sents.length);
113114
114115 String sampleSentences11 = "This is test sentence without a dot at the end and spaces ";
115116 sents = sentDetect.sentDetect(sampleSentences11);
116 Assert.assertEquals(sents[0],"This is test sentence without a dot at the end and spaces");
117 Assertions.assertEquals(sents[0], "This is test sentence without a dot at the end and spaces");
117118 probs = sentDetect.getSentenceProbabilities();
118 Assert.assertEquals(1, probs.length);
119 Assertions.assertEquals(1, probs.length);
119120
120121 String sampleSentence12 = " This is a test.";
121122 sents = sentDetect.sentDetect(sampleSentence12);
122 Assert.assertEquals(sents[0],"This is a test.");
123 Assertions.assertEquals(sents[0], "This is a test.");
123124
124125 String sampleSentence13 = " This is a test";
125126 sents = sentDetect.sentDetect(sampleSentence13);
126 Assert.assertEquals(sents[0],"This is a test");
127 Assertions.assertEquals(sents[0], "This is a test");
127128
128129 // Test that sentPosDetect also works
129130 Span[] pos = sentDetect.sentPosDetect(sampleSentences2);
130 Assert.assertEquals(pos.length,2);
131 Assertions.assertEquals(pos.length, 2);
131132 probs = sentDetect.getSentenceProbabilities();
132 Assert.assertEquals(probs.length,2);
133 Assert.assertEquals(new Span(0, 15), pos[0]);
134 Assert.assertEquals(new Span(16, 56), pos[1]);
133 Assertions.assertEquals(probs.length, 2);
134 Assertions.assertEquals(new Span(0, 15), pos[0]);
135 Assertions.assertEquals(new Span(16, 56), pos[1]);
135136
136137 }
137138
138 @Test(expected = InsufficientTrainingDataException.class)
139 public void testInsufficientData() throws IOException {
139 @Test
140 void testInsufficientData() {
140141
141 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
142 "/opennlp/tools/sentdetect/SentencesInsufficient.txt");
142 Assertions.assertThrows(InsufficientTrainingDataException.class, () -> {
143143
144 TrainingParameters mlParams = new TrainingParameters();
145 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
146 mlParams.put(TrainingParameters.CUTOFF_PARAM, 0);
144 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
145 "/opennlp/tools/sentdetect/SentencesInsufficient.txt");
147146
148 SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null);
149
150 SentenceDetectorME.train("eng",
151 new SentenceSampleStream(
152 new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams);
153
147 TrainingParameters mlParams = new TrainingParameters();
148 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
149 mlParams.put(TrainingParameters.CUTOFF_PARAM, 0);
150
151 SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null);
152
153 SentenceDetectorME.train("eng",
154 new SentenceSampleStream(
155 new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams);
156
157 });
158
159
154160 }
155
161
156162 }
2424 import java.io.ObjectOutput;
2525 import java.io.ObjectOutputStream;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 import opennlp.tools.util.Span;
3131
3232 /**
3333 * Tests for the {@link SentenceSample} class.
3434 */
35
3536 public class SentenceSampleTest {
3637
3738 @Test
38 public void testRetrievingContent() {
39 void testRetrievingContent() {
3940 SentenceSample sample = new SentenceSample("1. 2.",
4041 new Span(0, 2), new Span(3, 5));
4142
42 Assert.assertEquals("1. 2.", sample.getDocument());
43 Assert.assertEquals(new Span(0, 2), sample.getSentences()[0]);
44 Assert.assertEquals(new Span(3, 5), sample.getSentences()[1]);
43 Assertions.assertEquals("1. 2.", sample.getDocument());
44 Assertions.assertEquals(new Span(0, 2), sample.getSentences()[0]);
45 Assertions.assertEquals(new Span(3, 5), sample.getSentences()[1]);
4546 }
4647
4748 @Test
48 public void testSentenceSampleSerDe() throws IOException {
49 void testSentenceSampleSerDe() throws IOException {
4950 SentenceSample sentenceSample = createGoldSample();
5051 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
5152 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
6364 // do nothing
6465 }
6566
66 Assert.assertNotNull(deSerializedSentenceSample);
67 Assert.assertEquals(sentenceSample.getDocument(), deSerializedSentenceSample.getDocument());
68 Assert.assertArrayEquals(sentenceSample.getSentences(), deSerializedSentenceSample.getSentences());
69 }
70
71 @Test(expected = IllegalArgumentException.class)
72 public void testInvalidSpansFailFast() {
73 SentenceSample sample = new SentenceSample("1. 2.",
74 new Span(0, 2), new Span(5, 7));
67 Assertions.assertNotNull(deSerializedSentenceSample);
68 Assertions.assertEquals(sentenceSample.getDocument(), deSerializedSentenceSample.getDocument());
69 Assertions.assertArrayEquals(sentenceSample.getSentences(), deSerializedSentenceSample.getSentences());
7570 }
7671
7772 @Test
78 public void testEquals() {
79 Assert.assertFalse(createGoldSample() == createGoldSample());
80 Assert.assertTrue(createGoldSample().equals(createGoldSample()));
81 Assert.assertFalse(createPredSample().equals(createGoldSample()));
82 Assert.assertFalse(createPredSample().equals(new Object()));
73 void testInvalidSpansFailFast() {
74 Assertions.assertThrows(IllegalArgumentException.class, () -> {
75 SentenceSample sample = new SentenceSample("1. 2.",
76 new Span(0, 2), new Span(5, 7));
77 });
78 }
79
80 @Test
81 void testEquals() {
82 Assertions.assertFalse(createGoldSample() == createGoldSample());
83 Assertions.assertTrue(createGoldSample().equals(createGoldSample()));
84 Assertions.assertFalse(createPredSample().equals(createGoldSample()));
85 Assertions.assertFalse(createPredSample().equals(new Object()));
8386 }
8487
8588 public static SentenceSample createGoldSample() {
1616
1717 package opennlp.tools.stemmer;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 public class PorterStemmerTest {
2323
2424 private PorterStemmer stemmer = new PorterStemmer();
2525
2626 @Test
27 public void testNotNull() {
28 Assert.assertNotNull(stemmer);
27 void testNotNull() {
28 Assertions.assertNotNull(stemmer);
2929 }
3030
3131 @Test
32 public void testStemming() {
33 Assert.assertEquals(stemmer.stem("deny"), "deni" );
34 Assert.assertEquals(stemmer.stem("declining"), "declin" );
35 Assert.assertEquals(stemmer.stem("diversity"), "divers" );
36 Assert.assertEquals(stemmer.stem("divers"), "diver" );
37 Assert.assertEquals(stemmer.stem("dental"), "dental" );
32 void testStemming() {
33 Assertions.assertEquals(stemmer.stem("deny"), "deni");
34 Assertions.assertEquals(stemmer.stem("declining"), "declin");
35 Assertions.assertEquals(stemmer.stem("diversity"), "divers");
36 Assertions.assertEquals(stemmer.stem("divers"), "diver");
37 Assertions.assertEquals(stemmer.stem("dental"), "dental");
3838 }
3939 }
1616
1717 package opennlp.tools.stemmer;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 import opennlp.tools.stemmer.snowball.SnowballStemmer;
2323 import opennlp.tools.stemmer.snowball.SnowballStemmer.ALGORITHM;
2525 public class SnowballStemmerTest {
2626
2727 @Test
28 public void testArabic() {
28 void testArabic() {
2929 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.ARABIC);
30 Assert.assertEquals(stemmer.stem("أأباءاهم"), "اباء");
31 Assert.assertEquals(stemmer.stem("استفتياكما"), "استفتي");
32 Assert.assertEquals(stemmer.stem("استنتاجاتهما"), "استنتاجا");
30 Assertions.assertEquals(stemmer.stem("أأباءاهم"), "اباء");
31 Assertions.assertEquals(stemmer.stem("استفتياكما"), "استفتي");
32 Assertions.assertEquals(stemmer.stem("استنتاجاتهما"), "استنتاجا");
3333 }
3434
3535 @Test
36 public void testDanish() {
36 void testDanish() {
3737 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.DANISH);
38 Assert.assertEquals(stemmer.stem("aabenbaringen"), "aabenbaring");
39 Assert.assertEquals(stemmer.stem("skuebrødsbordene"), "skuebrødsbord");
40 Assert.assertEquals(stemmer.stem("skrøbeligheder"), "skrøb");
38 Assertions.assertEquals(stemmer.stem("aabenbaringen"), "aabenbaring");
39 Assertions.assertEquals(stemmer.stem("skuebrødsbordene"), "skuebrødsbord");
40 Assertions.assertEquals(stemmer.stem("skrøbeligheder"), "skrøb");
4141 }
4242
4343 @Test
44 public void testDutch() {
44 void testDutch() {
4545 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.DUTCH);
46 Assert.assertEquals(stemmer.stem("vliegtuigtransport"), "vliegtuigtransport");
47 Assert.assertEquals(stemmer.stem("sterlabcertificaat"), "sterlabcertificat");
48 Assert.assertEquals(stemmer.stem("vollegrondsgroenteteelt"),
46 Assertions.assertEquals(stemmer.stem("vliegtuigtransport"), "vliegtuigtransport");
47 Assertions.assertEquals(stemmer.stem("sterlabcertificaat"), "sterlabcertificat");
48 Assertions.assertEquals(stemmer.stem("vollegrondsgroenteteelt"),
4949 "vollegrondsgroenteteelt");
5050
5151 }
5252
5353 @Test
54 public void testCatalan() {
54 void testCatalan() {
5555 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.CATALAN);
56 Assert.assertEquals(stemmer.stem("importantíssimes"), "important");
57 Assert.assertEquals(stemmer.stem("besar"), "bes");
58 Assert.assertEquals(stemmer.stem("accidentalment"), "accidental");
56 Assertions.assertEquals(stemmer.stem("importantíssimes"), "important");
57 Assertions.assertEquals(stemmer.stem("besar"), "bes");
58 Assertions.assertEquals(stemmer.stem("accidentalment"), "accidental");
5959
6060 }
6161
6262 @Test
63 public void testEnglish() {
63 void testEnglish() {
6464 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.ENGLISH);
65 Assert.assertEquals(stemmer.stem("accompanying"), "accompani");
66 Assert.assertEquals(stemmer.stem("malediction"), "maledict");
67 Assert.assertEquals(stemmer.stem("softeners"), "soften");
65 Assertions.assertEquals(stemmer.stem("accompanying"), "accompani");
66 Assertions.assertEquals(stemmer.stem("malediction"), "maledict");
67 Assertions.assertEquals(stemmer.stem("softeners"), "soften");
6868
6969 }
7070
7171 @Test
72 public void testFinnish() {
72 void testFinnish() {
7373 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.FINNISH);
74 Assert.assertEquals(stemmer.stem("esiintymispaikasta"), "esiintymispaik");
75 Assert.assertEquals(stemmer.stem("esiintyviätaiteilijaystäviään"),
74 Assertions.assertEquals(stemmer.stem("esiintymispaikasta"), "esiintymispaik");
75 Assertions.assertEquals(stemmer.stem("esiintyviätaiteilijaystäviään"),
7676 "esiintyviätaiteilijaystäviä");
77 Assert.assertEquals(stemmer.stem("hellbergiä"), "hellberg");
77 Assertions.assertEquals(stemmer.stem("hellbergiä"), "hellberg");
7878
7979 }
8080
8181 @Test
82 public void testFrench() {
82 void testFrench() {
8383 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.FRENCH);
84 Assert.assertEquals(stemmer.stem("accomplissaient"), "accompl");
85 Assert.assertEquals(stemmer.stem("examinateurs"), "examin");
86 Assert.assertEquals(stemmer.stem("prévoyant"), "prévoi");
84 Assertions.assertEquals(stemmer.stem("accomplissaient"), "accompl");
85 Assertions.assertEquals(stemmer.stem("examinateurs"), "examin");
86 Assertions.assertEquals(stemmer.stem("prévoyant"), "prévoi");
8787 }
8888
8989 @Test
90 public void testGerman() {
90 void testGerman() {
9191 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.GERMAN);
92 Assert.assertEquals(stemmer.stem("buchbindergesellen"), "buchbindergesell");
93 Assert.assertEquals(stemmer.stem("mindere"), "mind");
94 Assert.assertEquals(stemmer.stem("mitverursacht"), "mitverursacht");
92 Assertions.assertEquals(stemmer.stem("buchbindergesellen"), "buchbindergesell");
93 Assertions.assertEquals(stemmer.stem("mindere"), "mind");
94 Assertions.assertEquals(stemmer.stem("mitverursacht"), "mitverursacht");
9595
9696 }
9797
9898 @Test
99 public void testGreek() {
99 void testGreek() {
100100 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.GREEK);
101 Assert.assertEquals(stemmer.stem("επιστροφή"), "επιστροφ");
102 Assert.assertEquals(stemmer.stem("Αμερικανών"), "αμερικαν");
103 Assert.assertEquals(stemmer.stem("στρατιωτών"), "στρατιωτ");
101 Assertions.assertEquals(stemmer.stem("επιστροφή"), "επιστροφ");
102 Assertions.assertEquals(stemmer.stem("Αμερικανών"), "αμερικαν");
103 Assertions.assertEquals(stemmer.stem("στρατιωτών"), "στρατιωτ");
104104
105105 }
106106
107107 @Test
108 public void testHungarian() {
108 void testHungarian() {
109109 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.HUNGARIAN);
110 Assert.assertEquals(stemmer.stem("abbahagynám"), "abbahagyna");
111 Assert.assertEquals(stemmer.stem("konstrukciójából"), "konstrukció");
112 Assert.assertEquals(stemmer.stem("lopta"), "lopt");
110 Assertions.assertEquals(stemmer.stem("abbahagynám"), "abbahagyna");
111 Assertions.assertEquals(stemmer.stem("konstrukciójából"), "konstrukció");
112 Assertions.assertEquals(stemmer.stem("lopta"), "lopt");
113113
114114 }
115115
116116 @Test
117 public void testIrish() {
117 void testIrish() {
118118 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.IRISH);
119 Assert.assertEquals(stemmer.stem("bhfeidhm"), "feidhm");
120 Assert.assertEquals(stemmer.stem("feirmeoireacht"), "feirmeoir");
121 Assert.assertEquals(stemmer.stem("monarcacht"), "monarc");
119 Assertions.assertEquals(stemmer.stem("bhfeidhm"), "feidhm");
120 Assertions.assertEquals(stemmer.stem("feirmeoireacht"), "feirmeoir");
121 Assertions.assertEquals(stemmer.stem("monarcacht"), "monarc");
122122
123123 }
124124
125125 @Test
126 public void testItalian() {
126 void testItalian() {
127127 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.ITALIAN);
128 Assert.assertEquals(stemmer.stem("abbattimento"), "abbatt");
129 Assert.assertEquals(stemmer.stem("dancer"), "dancer");
130 Assert.assertEquals(stemmer.stem("dance"), "danc");
128 Assertions.assertEquals(stemmer.stem("abbattimento"), "abbatt");
129 Assertions.assertEquals(stemmer.stem("dancer"), "dancer");
130 Assertions.assertEquals(stemmer.stem("dance"), "danc");
131131
132132 }
133133
134134 @Test
135 public void testIndonesian() {
135 void testIndonesian() {
136136 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.INDONESIAN);
137 Assert.assertEquals(stemmer.stem("peledakan"), "ledak");
138 Assert.assertEquals(stemmer.stem("pelajaran"), "ajar");
139 Assert.assertEquals(stemmer.stem("perbaikan"), "baik");
137 Assertions.assertEquals(stemmer.stem("peledakan"), "ledak");
138 Assertions.assertEquals(stemmer.stem("pelajaran"), "ajar");
139 Assertions.assertEquals(stemmer.stem("perbaikan"), "baik");
140140
141141 }
142142
143143 @Test
144 public void testPortuguese() {
144 void testPortuguese() {
145145 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.PORTUGUESE);
146 Assert.assertEquals(stemmer.stem("aborrecimentos"), "aborrec");
147 Assert.assertEquals(stemmer.stem("aché"), "aché");
148 Assert.assertEquals(stemmer.stem("ache"), "ache");
146 Assertions.assertEquals(stemmer.stem("aborrecimentos"), "aborrec");
147 Assertions.assertEquals(stemmer.stem("aché"), "aché");
148 Assertions.assertEquals(stemmer.stem("ache"), "ache");
149149
150150 }
151151
152152 @Test
153 public void testRomanian() {
153 void testRomanian() {
154154 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.ROMANIAN);
155 Assert.assertEquals(stemmer.stem("absurdităţilor"), "absurd");
156 Assert.assertEquals(stemmer.stem("laşi"), "laş");
157 Assert.assertEquals(stemmer.stem("saracilor"), "sarac");
155 Assertions.assertEquals(stemmer.stem("absurdităţilor"), "absurd");
156 Assertions.assertEquals(stemmer.stem("laşi"), "laş");
157 Assertions.assertEquals(stemmer.stem("saracilor"), "sarac");
158158 }
159159
160160 @Test
161 public void testSpanish() {
161 void testSpanish() {
162162 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.SPANISH);
163 Assert.assertEquals(stemmer.stem("besó"), "bes");
164 Assert.assertEquals(stemmer.stem("importantísimas"), "importantisim");
165 Assert.assertEquals(stemmer.stem("incidental"), "incidental");
163 Assertions.assertEquals(stemmer.stem("besó"), "bes");
164 Assertions.assertEquals(stemmer.stem("importantísimas"), "importantisim");
165 Assertions.assertEquals(stemmer.stem("incidental"), "incidental");
166166 }
167167
168168 @Test
169 public void testSwedish() {
169 void testSwedish() {
170170 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.SWEDISH);
171 Assert.assertEquals(stemmer.stem("aftonringningen"), "aftonringning");
172 Assert.assertEquals(stemmer.stem("andedrag"), "andedrag");
173 Assert.assertEquals(stemmer.stem("andedrägt"), "andedräg");
171 Assertions.assertEquals(stemmer.stem("aftonringningen"), "aftonringning");
172 Assertions.assertEquals(stemmer.stem("andedrag"), "andedrag");
173 Assertions.assertEquals(stemmer.stem("andedrägt"), "andedräg");
174174
175175 }
176176
177177 @Test
178 public void testTurkish() {
178 void testTurkish() {
179179 SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.TURKISH);
180 Assert.assertEquals(stemmer.stem("ab'yle"), "ab'yle");
181 Assert.assertEquals(stemmer.stem("kaçmamaktadır"), "kaçmamak");
182 Assert.assertEquals(stemmer.stem("sarayı'nı"), "sarayı'nı");
180 Assertions.assertEquals(stemmer.stem("ab'yle"), "ab'yle");
181 Assertions.assertEquals(stemmer.stem("kaçmamaktadır"), "kaçmamak");
182 Assertions.assertEquals(stemmer.stem("sarayı'nı"), "sarayı'nı");
183183 }
184184 }
2020 import java.io.ByteArrayOutputStream;
2121 import java.io.IOException;
2222
23 import org.junit.Assert;
24 import org.junit.Before;
25 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.BeforeEach;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.tokenize.DetokenizationDictionary.Operation;
2828
3030
3131 private DetokenizationDictionary dict;
3232
33 @Before
34 public void setUp() throws Exception {
33 @BeforeEach
34 void setUp() {
3535
3636 String[] tokens = new String[] {"\"", "(", ")", "-"};
3737
4242 }
4343
4444 private static void testEntries(DetokenizationDictionary dict) {
45 Assert.assertEquals(Operation.RIGHT_LEFT_MATCHING, dict.getOperation("\""));
46 Assert.assertEquals(Operation.MOVE_RIGHT, dict.getOperation("("));
47 Assert.assertEquals(Operation.MOVE_LEFT, dict.getOperation(")"));
48 Assert.assertEquals(Operation.MOVE_BOTH, dict.getOperation("-"));
45 Assertions.assertEquals(Operation.RIGHT_LEFT_MATCHING, dict.getOperation("\""));
46 Assertions.assertEquals(Operation.MOVE_RIGHT, dict.getOperation("("));
47 Assertions.assertEquals(Operation.MOVE_LEFT, dict.getOperation(")"));
48 Assertions.assertEquals(Operation.MOVE_BOTH, dict.getOperation("-"));
4949 }
5050
5151 @Test
52 public void testSimpleDict() {
52 void testSimpleDict() {
5353 testEntries(dict);
5454 }
5555
5656 @Test
57 public void testSerialization() throws IOException {
57 void testSerialization() throws IOException {
5858 ByteArrayOutputStream out = new ByteArrayOutputStream();
5959 dict.serialize(out);
6060
1919 import java.io.ByteArrayOutputStream;
2020 import java.io.OutputStream;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.cmdline.tokenizer.DetokenEvaluationErrorListener;
26 import opennlp.tools.util.InvalidFormatException;
27
2826
2927 public class DetokenizerEvaluatorTest {
3028 @Test
31 public void testPositive() throws InvalidFormatException {
29 void testPositive() {
3230 OutputStream stream = new ByteArrayOutputStream();
3331 DetokenEvaluationErrorListener listener = new DetokenEvaluationErrorListener(stream);
3432
3735
3836 eval.evaluateSample(TokenSampleTest.createGoldSample());
3937
40 Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0);
38 Assertions.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0);
4139
42 Assert.assertEquals(0, stream.toString().length());
40 Assertions.assertEquals(0, stream.toString().length());
4341 }
4442
4543 @Test
46 public void testNegative() throws InvalidFormatException {
44 void testNegative() {
4745 OutputStream stream = new ByteArrayOutputStream();
4846 DetokenEvaluationErrorListener listener = new DetokenEvaluationErrorListener(
4947 stream);
5351
5452 eval.evaluateSample(TokenSampleTest.createPredSilverSample());
5553
56 Assert.assertEquals(-1.0d, eval.getFMeasure().getFMeasure(), .1d);
54 Assertions.assertEquals(-1.0d, eval.getFMeasure().getFMeasure(), .1d);
5755
58 Assert.assertNotSame(0, stream.toString().length());
56 Assertions.assertNotSame(0, stream.toString().length());
5957 }
6058
6159 /**
1919 import java.io.IOException;
2020 import java.io.InputStream;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.tokenize.DetokenizationDictionary.Operation;
2626 import opennlp.tools.tokenize.Detokenizer.DetokenizationOperation;
2828 public class DictionaryDetokenizerTest {
2929
3030 @Test
31 public void testDetokenizer() {
31 void testDetokenizer() {
3232
33 String[] tokens = new String[]{".", "!", "(", ")", "\"", "-"};
33 String[] tokens = new String[] {".", "!", "(", ")", "\"", "-"};
3434
35 Operation[] operations = new Operation[]{
35 Operation[] operations = new Operation[] {
3636 Operation.MOVE_LEFT,
3737 Operation.MOVE_LEFT,
3838 Operation.MOVE_RIGHT,
4444 Detokenizer detokenizer = new DictionaryDetokenizer(dict);
4545
4646 DetokenizationOperation[] detokenizeOperations =
47 detokenizer.detokenize(new String[]{"Simple", "test", ".", "co", "-", "worker"});
47 detokenizer.detokenize(new String[] {"Simple", "test", ".", "co", "-", "worker"});
4848
49 Assert.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[0]);
50 Assert.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[1]);
51 Assert.assertEquals(DetokenizationOperation.MERGE_TO_LEFT, detokenizeOperations[2]);
52 Assert.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[3]);
53 Assert.assertEquals(DetokenizationOperation.MERGE_BOTH, detokenizeOperations[4]);
54 Assert.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[5]);
49 Assertions.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[0]);
50 Assertions.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[1]);
51 Assertions.assertEquals(DetokenizationOperation.MERGE_TO_LEFT, detokenizeOperations[2]);
52 Assertions.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[3]);
53 Assertions.assertEquals(DetokenizationOperation.MERGE_BOTH, detokenizeOperations[4]);
54 Assertions.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[5]);
5555 }
5656
5757 static Detokenizer createLatinDetokenizer() throws IOException {
6666 }
6767
6868 @Test
69 public void testDetokenizeToString() throws IOException {
69 void testDetokenizeToString() throws IOException {
7070
7171 Detokenizer detokenizer = createLatinDetokenizer();
7272
73 String[] tokens = new String[]{"A", "test", ",", "(", "string", ")", "."};
73 String[] tokens = new String[] {"A", "test", ",", "(", "string", ")", "."};
7474
7575 String sentence = detokenizer.detokenize(tokens, null);
7676
77 Assert.assertEquals("A test, (string).", sentence);
77 Assertions.assertEquals("A test, (string).", sentence);
7878 }
7979
8080 @Test
81 public void testDetokenizeToString2() throws IOException {
81 void testDetokenizeToString2() throws IOException {
8282
8383 Detokenizer detokenizer = createLatinDetokenizer();
8484
85 String[] tokens = new String[]{"A", "co", "-", "worker", "helped", "."};
85 String[] tokens = new String[] {"A", "co", "-", "worker", "helped", "."};
8686
8787 String sentence = detokenizer.detokenize(tokens, null);
8888
89 Assert.assertEquals("A co-worker helped.", sentence);
89 Assertions.assertEquals("A co-worker helped.", sentence);
9090 }
9191 }
1616
1717 package opennlp.tools.tokenize;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link SimpleTokenizer} class.
3131 * Tests if it can tokenize whitespace separated tokens.
3232 */
3333 @Test
34 public void testWhitespaceTokenization() {
34 void testWhitespaceTokenization() {
3535
3636 String text = "a b c d e f ";
3737
3838 String[] tokenizedText = mTokenizer.tokenize(text);
3939
40 Assert.assertTrue("a".equals(tokenizedText[0]));
41 Assert.assertTrue("b".equals(tokenizedText[1]));
42 Assert.assertTrue("c".equals(tokenizedText[2]));
43 Assert.assertTrue("d".equals(tokenizedText[3]));
44 Assert.assertTrue("e".equals(tokenizedText[4]));
45 Assert.assertTrue("f".equals(tokenizedText[5]));
40 Assertions.assertTrue("a".equals(tokenizedText[0]));
41 Assertions.assertTrue("b".equals(tokenizedText[1]));
42 Assertions.assertTrue("c".equals(tokenizedText[2]));
43 Assertions.assertTrue("d".equals(tokenizedText[3]));
44 Assertions.assertTrue("e".equals(tokenizedText[4]));
45 Assertions.assertTrue("f".equals(tokenizedText[5]));
4646
47 Assert.assertTrue(tokenizedText.length == 6);
47 Assertions.assertTrue(tokenizedText.length == 6);
4848 }
4949
5050 /**
5151 * Tests if it can tokenize a word and a dot.
5252 */
5353 @Test
54 public void testWordDotTokenization() {
54 void testWordDotTokenization() {
5555 String text = "a.";
5656
5757 String[] tokenizedText = mTokenizer.tokenize(text);
5858
59 Assert.assertTrue("a".equals(tokenizedText[0]));
60 Assert.assertTrue(".".equals(tokenizedText[1]));
61 Assert.assertTrue(tokenizedText.length == 2);
59 Assertions.assertTrue("a".equals(tokenizedText[0]));
60 Assertions.assertTrue(".".equals(tokenizedText[1]));
61 Assertions.assertTrue(tokenizedText.length == 2);
6262 }
6363
6464 /**
6565 * Tests if it can tokenize a word and numeric.
6666 */
6767 @Test
68 public void testWordNumericTokeniztation() {
68 void testWordNumericTokeniztation() {
6969 String text = "305KW";
7070
7171 String[] tokenizedText = mTokenizer.tokenize(text);
7272
73 Assert.assertTrue("305".equals(tokenizedText[0]));
74 Assert.assertTrue("KW".equals(tokenizedText[1]));
75 Assert.assertTrue(tokenizedText.length == 2);
73 Assertions.assertTrue("305".equals(tokenizedText[0]));
74 Assertions.assertTrue("KW".equals(tokenizedText[1]));
75 Assertions.assertTrue(tokenizedText.length == 2);
7676 }
7777
7878 @Test
79 public void testWordWithOtherTokenization() {
79 void testWordWithOtherTokenization() {
8080 String text = "rebecca.sleep()";
8181
8282 String[] tokenizedText = mTokenizer.tokenize(text);
8383
84 Assert.assertTrue("rebecca".equals(tokenizedText[0]));
85 Assert.assertTrue(".".equals(tokenizedText[1]));
86 Assert.assertTrue("sleep".equals(tokenizedText[2]));
87 Assert.assertTrue("(".equals(tokenizedText[3]));
88 Assert.assertTrue(")".equals(tokenizedText[4]));
89 Assert.assertTrue(tokenizedText.length == 5);
84 Assertions.assertTrue("rebecca".equals(tokenizedText[0]));
85 Assertions.assertTrue(".".equals(tokenizedText[1]));
86 Assertions.assertTrue("sleep".equals(tokenizedText[2]));
87 Assertions.assertTrue("(".equals(tokenizedText[3]));
88 Assertions.assertTrue(")".equals(tokenizedText[4]));
89 Assertions.assertTrue(tokenizedText.length == 5);
9090 }
9191
9292 @Test
93 public void testTokenizationOfStringWithUnixNewLineTokens() {
93 void testTokenizationOfStringWithUnixNewLineTokens() {
9494 SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
9595 tokenizer.setKeepNewLines(true);
9696
97 Assert.assertEquals(2, tokenizer.tokenize("a\n").length);
98 Assert.assertArrayEquals(new String[] {"a", "\n"}, tokenizer.tokenize("a\n"));
97 Assertions.assertEquals(2, tokenizer.tokenize("a\n").length);
98 Assertions.assertArrayEquals(new String[] {"a", "\n"}, tokenizer.tokenize("a\n"));
9999
100 Assert.assertEquals(3, tokenizer.tokenize("a\nb").length);
101 Assert.assertArrayEquals(new String[] {"a", "\n", "b"}, tokenizer.tokenize("a\nb"));
100 Assertions.assertEquals(3, tokenizer.tokenize("a\nb").length);
101 Assertions.assertArrayEquals(new String[] {"a", "\n", "b"}, tokenizer.tokenize("a\nb"));
102102
103 Assert.assertEquals(4, tokenizer.tokenize("a\n\n b").length);
104 Assert.assertArrayEquals(new String[] {"a", "\n", "\n", "b"}, tokenizer.tokenize("a\n\n b"));
103 Assertions.assertEquals(4, tokenizer.tokenize("a\n\n b").length);
104 Assertions.assertArrayEquals(new String[] {"a", "\n", "\n", "b"}, tokenizer.tokenize("a\n\n b"));
105105
106 Assert.assertEquals(7, tokenizer.tokenize("a\n\n b\n\n c").length);
107 Assert.assertArrayEquals(new String[] {"a", "\n", "\n", "b", "\n", "\n", "c"},
108 tokenizer.tokenize("a\n\n b\n\n c"));
106 Assertions.assertEquals(7, tokenizer.tokenize("a\n\n b\n\n c").length);
107 Assertions.assertArrayEquals(new String[] {"a", "\n", "\n", "b", "\n", "\n", "c"},
108 tokenizer.tokenize("a\n\n b\n\n c"));
109109 }
110110
111111 @Test
112 public void testTokenizationOfStringWithWindowsNewLineTokens() {
112 void testTokenizationOfStringWithWindowsNewLineTokens() {
113113 SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
114114 tokenizer.setKeepNewLines(true);
115
116 Assert.assertEquals(3, tokenizer.tokenize("a\r\n").length);
117 Assert.assertArrayEquals(new String[] {"a", "\r", "\n"}, tokenizer.tokenize("a\r\n"));
118115
119 Assert.assertEquals(4, tokenizer.tokenize("a\r\nb").length);
120 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "b"}, tokenizer.tokenize("a\r\nb"));
116 Assertions.assertEquals(3, tokenizer.tokenize("a\r\n").length);
117 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n"}, tokenizer.tokenize("a\r\n"));
121118
122 Assert.assertEquals(6, tokenizer.tokenize("a\r\n\r\n b").length);
123 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b"}, tokenizer
119 Assertions.assertEquals(4, tokenizer.tokenize("a\r\nb").length);
120 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "b"}, tokenizer.tokenize("a\r\nb"));
121
122 Assertions.assertEquals(6, tokenizer.tokenize("a\r\n\r\n b").length);
123 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b"}, tokenizer
124124 .tokenize("a\r\n\r\n b"));
125125
126 Assert.assertEquals(11, tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c").length);
127 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b", "\r", "\n", "\r", "\n", "c"},
128 tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c"));
126 Assertions.assertEquals(11, tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c").length);
127 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b", "\r", "\n", "\r", "\n", "c"},
128 tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c"));
129129 }
130130 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.ml.model.Event;
2525 import opennlp.tools.util.ObjectStream;
3434 * Tests the event stream for correctly generated outcomes.
3535 */
3636 @Test
37 public void testEventOutcomes() throws IOException {
37 void testEventOutcomes() throws IOException {
3838
3939 ObjectStream<String> sentenceStream =
4040 ObjectStreamUtils.createObjectStream("\"<SPLIT>out<SPLIT>.<SPLIT>\"");
4343
4444 try (ObjectStream<Event> eventStream = new TokSpanEventStream(tokenSampleStream, false)) {
4545
46 Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome());
47 Assert.assertEquals(TokenizerME.NO_SPLIT, eventStream.read().getOutcome());
48 Assert.assertEquals(TokenizerME.NO_SPLIT, eventStream.read().getOutcome());
49 Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome());
50 Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome());
46 Assertions.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome());
47 Assertions.assertEquals(TokenizerME.NO_SPLIT, eventStream.read().getOutcome());
48 Assertions.assertEquals(TokenizerME.NO_SPLIT, eventStream.read().getOutcome());
49 Assertions.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome());
50 Assertions.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome());
5151
52 Assert.assertNull(eventStream.read());
53 Assert.assertNull(eventStream.read());
52 Assertions.assertNull(eventStream.read());
53 Assertions.assertNull(eventStream.read());
5454 }
5555 }
5656 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.util.ObjectStream;
2525 import opennlp.tools.util.ObjectStreamUtils;
3333 /**
3434 * Tests if the {@link TokenSample} correctly tokenizes tokens which
3535 * are separated by a whitespace.
36 *
3736 */
3837 @Test
39 public void testParsingWhitespaceSeparatedTokens() throws IOException {
38 void testParsingWhitespaceSeparatedTokens() throws IOException {
4039 String sampleTokens = "Slave to the wage";
4140
4241 ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
4645
4746 Span[] tokenSpans = tokenSample.getTokenSpans();
4847
49 Assert.assertEquals(4, tokenSpans.length);
48 Assertions.assertEquals(4, tokenSpans.length);
5049
51 Assert.assertEquals("Slave", tokenSpans[0].getCoveredText(sampleTokens));
52 Assert.assertEquals("to", tokenSpans[1].getCoveredText(sampleTokens));
53 Assert.assertEquals("the", tokenSpans[2].getCoveredText(sampleTokens));
54 Assert.assertEquals("wage", tokenSpans[3].getCoveredText(sampleTokens));
50 Assertions.assertEquals("Slave", tokenSpans[0].getCoveredText(sampleTokens));
51 Assertions.assertEquals("to", tokenSpans[1].getCoveredText(sampleTokens));
52 Assertions.assertEquals("the", tokenSpans[2].getCoveredText(sampleTokens));
53 Assertions.assertEquals("wage", tokenSpans[3].getCoveredText(sampleTokens));
5554 }
5655
5756 /**
5857 * Tests if the {@link TokenSample} correctly tokenizes tokens which
5958 * are separated by the split chars.
60 *
6159 */
6260 @Test
63 public void testParsingSeparatedString() throws IOException {
61 void testParsingSeparatedString() throws IOException {
6462 String sampleTokens = "a<SPLIT>b<SPLIT>c<SPLIT>d";
6563
6664 ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
7068
7169 Span[] tokenSpans = tokenSample.getTokenSpans();
7270
73 Assert.assertEquals(4, tokenSpans.length);
71 Assertions.assertEquals(4, tokenSpans.length);
7472
75 Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText()));
76 Assert.assertEquals(new Span(0,1), tokenSpans[0]);
73 Assertions.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText()));
74 Assertions.assertEquals(new Span(0, 1), tokenSpans[0]);
7775
78 Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText()));
79 Assert.assertEquals(new Span(1,2), tokenSpans[1]);
76 Assertions.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText()));
77 Assertions.assertEquals(new Span(1, 2), tokenSpans[1]);
8078
81 Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText()));
82 Assert.assertEquals(new Span(2,3), tokenSpans[2]);
79 Assertions.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText()));
80 Assertions.assertEquals(new Span(2, 3), tokenSpans[2]);
8381
84 Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText()));
85 Assert.assertEquals(new Span(3,4), tokenSpans[3]);
82 Assertions.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText()));
83 Assertions.assertEquals(new Span(3, 4), tokenSpans[3]);
8684
8785 }
8886
8987 /**
9088 * Tests if the {@link TokenSample} correctly tokenizes tokens which
9189 * are separated by whitespace and by the split chars.
92 *
9390 */
9491 @Test
95 public void testParsingWhitespaceAndSeparatedString() throws IOException {
92 void testParsingWhitespaceAndSeparatedString() throws IOException {
9693 String sampleTokens = "a b<SPLIT>c d<SPLIT>e";
9794
9895 try (ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
10198
10299 Span[] tokenSpans = tokenSample.getTokenSpans();
103100
104 Assert.assertEquals(5, tokenSpans.length);
101 Assertions.assertEquals(5, tokenSpans.length);
105102
106 Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText()));
107 Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText()));
108 Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText()));
109 Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText()));
110 Assert.assertEquals("e", tokenSpans[4].getCoveredText(tokenSample.getText()));
103 Assertions.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText()));
104 Assertions.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText()));
105 Assertions.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText()));
106 Assertions.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText()));
107 Assertions.assertEquals("e", tokenSpans[4].getCoveredText(tokenSample.getText()));
111108 }
112109 }
113110 }
2424 import java.io.ObjectOutput;
2525 import java.io.ObjectOutputStream;
2626
27 import org.junit.Assert;
28 import org.junit.Test;
27 import org.junit.jupiter.api.Assertions;
28 import org.junit.jupiter.api.Test;
2929
3030 import opennlp.tools.util.Span;
3131
4747 }
4848
4949 @Test
50 public void testRetrievingContent() {
50 void testRetrievingContent() {
5151
5252 String sentence = "A test";
5353
5454 TokenSample sample = new TokenSample(sentence, new Span[] {new Span(0, 1),
5555 new Span(2, 6)});
5656
57 Assert.assertEquals("A test", sample.getText());
57 Assertions.assertEquals("A test", sample.getText());
5858
59 Assert.assertEquals(new Span(0, 1), sample.getTokenSpans()[0]);
60 Assert.assertEquals(new Span(2, 6), sample.getTokenSpans()[1]);
59 Assertions.assertEquals(new Span(0, 1), sample.getTokenSpans()[0]);
60 Assertions.assertEquals(new Span(2, 6), sample.getTokenSpans()[1]);
6161 }
6262
6363 @Test
64 public void testTokenSampleSerDe() throws IOException {
64 void testTokenSampleSerDe() throws IOException {
6565 TokenSample tokenSample = createGoldSample();
6666 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
6767 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
7979 // do nothing
8080 }
8181
82 Assert.assertNotNull(deSerializedTokenSample);
83 Assert.assertEquals(tokenSample.getText(), deSerializedTokenSample.getText());
84 Assert.assertArrayEquals(tokenSample.getTokenSpans(), deSerializedTokenSample.getTokenSpans());
82 Assertions.assertNotNull(deSerializedTokenSample);
83 Assertions.assertEquals(tokenSample.getText(), deSerializedTokenSample.getText());
84 Assertions.assertArrayEquals(tokenSample.getTokenSpans(), deSerializedTokenSample.getTokenSpans());
8585 }
8686
8787 @Test
88 public void testCreationWithDetokenizer() throws IOException {
88 void testCreationWithDetokenizer() throws IOException {
8989
9090 Detokenizer detokenizer = DictionaryDetokenizerTest.createLatinDetokenizer();
9191
103103
104104 TokenSample a = new TokenSample(detokenizer, tokens);
105105
106 Assert.assertEquals("start () end. hyphen-string.", a.getText());
107 Assert.assertEquals("start (" + TokenSample.DEFAULT_SEPARATOR_CHARS + ") end"
106 Assertions.assertEquals("start () end. hyphen-string.", a.getText());
107 Assertions.assertEquals("start (" + TokenSample.DEFAULT_SEPARATOR_CHARS + ") end"
108108 + TokenSample.DEFAULT_SEPARATOR_CHARS + "."
109109 + " hyphen" + TokenSample.DEFAULT_SEPARATOR_CHARS + "-" + TokenSample.DEFAULT_SEPARATOR_CHARS
110110 + "string" + TokenSample.DEFAULT_SEPARATOR_CHARS + ".", a.toString());
111111
112 Assert.assertEquals(9, a.getTokenSpans().length);
112 Assertions.assertEquals(9, a.getTokenSpans().length);
113113
114 Assert.assertEquals(new Span(0, 5), a.getTokenSpans()[0]);
115 Assert.assertEquals(new Span(6, 7), a.getTokenSpans()[1]);
116 Assert.assertEquals(new Span(7, 8), a.getTokenSpans()[2]);
117 Assert.assertEquals(new Span(9, 12), a.getTokenSpans()[3]);
118 Assert.assertEquals(new Span(12, 13), a.getTokenSpans()[4]);
114 Assertions.assertEquals(new Span(0, 5), a.getTokenSpans()[0]);
115 Assertions.assertEquals(new Span(6, 7), a.getTokenSpans()[1]);
116 Assertions.assertEquals(new Span(7, 8), a.getTokenSpans()[2]);
117 Assertions.assertEquals(new Span(9, 12), a.getTokenSpans()[3]);
118 Assertions.assertEquals(new Span(12, 13), a.getTokenSpans()[4]);
119119
120 Assert.assertEquals(new Span(14, 20), a.getTokenSpans()[5]);
121 Assert.assertEquals(new Span(20, 21), a.getTokenSpans()[6]);
122 Assert.assertEquals(new Span(21, 27), a.getTokenSpans()[7]);
123 Assert.assertEquals(new Span(27, 28), a.getTokenSpans()[8]);
120 Assertions.assertEquals(new Span(14, 20), a.getTokenSpans()[5]);
121 Assertions.assertEquals(new Span(20, 21), a.getTokenSpans()[6]);
122 Assertions.assertEquals(new Span(21, 27), a.getTokenSpans()[7]);
123 Assertions.assertEquals(new Span(27, 28), a.getTokenSpans()[8]);
124124 }
125125
126126 @Test
127 public void testEquals() {
128 Assert.assertFalse(createGoldSample() == createGoldSample());
129 Assert.assertTrue(createGoldSample().equals(createGoldSample()));
130 Assert.assertFalse(createPredSample().equals(createGoldSample()));
131 Assert.assertFalse(createPredSample().equals(new Object()));
127 void testEquals() {
128 Assertions.assertFalse(createGoldSample() == createGoldSample());
129 Assertions.assertTrue(createGoldSample().equals(createGoldSample()));
130 Assertions.assertFalse(createPredSample().equals(createGoldSample()));
131 Assertions.assertFalse(createPredSample().equals(new Object()));
132132 }
133133 }
1919 import java.io.ByteArrayOutputStream;
2020 import java.io.OutputStream;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.cmdline.tokenizer.TokenEvaluationErrorListener;
26 import opennlp.tools.util.InvalidFormatException;
2726 import opennlp.tools.util.Span;
2827
2928 public class TokenizerEvaluatorTest {
3029
3130 @Test
32 public void testPositive() throws InvalidFormatException {
31 void testPositive() {
3332 OutputStream stream = new ByteArrayOutputStream();
3433 TokenizerEvaluationMonitor listener = new TokenEvaluationErrorListener(stream);
3534
3837
3938 eval.evaluateSample(TokenSampleTest.createGoldSample());
4039
41 Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0);
40 Assertions.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0);
4241
43 Assert.assertEquals(0, stream.toString().length());
42 Assertions.assertEquals(0, stream.toString().length());
4443 }
4544
4645 @Test
47 public void testNegative() throws InvalidFormatException {
46 void testNegative() {
4847 OutputStream stream = new ByteArrayOutputStream();
4948 TokenizerEvaluationMonitor listener = new TokenEvaluationErrorListener(
5049 stream);
5453
5554 eval.evaluateSample(TokenSampleTest.createPredSample());
5655
57 Assert.assertEquals(.5d, eval.getFMeasure().getFMeasure(), .1d);
56 Assertions.assertEquals(.5d, eval.getFMeasure().getFMeasure(), .1d);
5857
59 Assert.assertNotSame(0, stream.toString().length());
58 Assertions.assertNotSame(0, stream.toString().length());
6059 }
6160
6261 /**
2323 import java.nio.charset.StandardCharsets;
2424 import java.util.regex.Pattern;
2525
26 import org.junit.Assert;
27 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.tools.dictionary.Dictionary;
3030 import opennlp.tools.formats.ResourceAsStreamFactory;
6262 }
6363
6464 @Test
65 public void testDefault() throws IOException {
65 void testDefault() throws IOException {
6666
6767 Dictionary dic = loadAbbDictionary();
6868 final String lang = "spa";
7070 TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null));
7171
7272 TokenizerFactory factory = model.getFactory();
73 Assert.assertTrue(factory.getAbbreviationDictionary() != null);
74 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
75
76 Assert.assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
77 Assert.assertEquals(lang, factory.getLanguageCode());
78 Assert.assertEquals(lang, model.getLanguage());
79 Assert.assertFalse(factory.isUseAlphaNumericOptmization());
80
81 ByteArrayOutputStream out = new ByteArrayOutputStream();
82 model.serialize(out);
83 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
84
85 TokenizerModel fromSerialized = new TokenizerModel(in);
86
87 factory = fromSerialized.getFactory();
88 Assert.assertTrue(factory.getAbbreviationDictionary() != null);
89 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
90
91 Assert.assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
92 Assert.assertEquals(lang, factory.getLanguageCode());
93 Assert.assertEquals(lang, model.getLanguage());
94 Assert.assertFalse(factory.isUseAlphaNumericOptmization());
95 }
96
97 @Test
98 public void testNullDict() throws IOException {
73 Assertions.assertTrue(factory.getAbbreviationDictionary() != null);
74 Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
75
76 Assertions.assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
77 Assertions.assertEquals(lang, factory.getLanguageCode());
78 Assertions.assertEquals(lang, model.getLanguage());
79 Assertions.assertFalse(factory.isUseAlphaNumericOptmization());
80
81 ByteArrayOutputStream out = new ByteArrayOutputStream();
82 model.serialize(out);
83 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
84
85 TokenizerModel fromSerialized = new TokenizerModel(in);
86
87 factory = fromSerialized.getFactory();
88 Assertions.assertTrue(factory.getAbbreviationDictionary() != null);
89 Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
90
91 Assertions.assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
92 Assertions.assertEquals(lang, factory.getLanguageCode());
93 Assertions.assertEquals(lang, model.getLanguage());
94 Assertions.assertFalse(factory.isUseAlphaNumericOptmization());
95 }
96
97 @Test
98 void testNullDict() throws IOException {
9999
100100 Dictionary dic = null;
101101 final String lang = "spa";
103103 TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null));
104104
105105 TokenizerFactory factory = model.getFactory();
106 Assert.assertNull(factory.getAbbreviationDictionary());
107 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
108
109 Assert.assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
110 Assert.assertEquals(lang, factory.getLanguageCode());
111 Assert.assertEquals(lang, model.getLanguage());
112 Assert.assertFalse(factory.isUseAlphaNumericOptmization());
113
114 ByteArrayOutputStream out = new ByteArrayOutputStream();
115 model.serialize(out);
116 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
117
118 TokenizerModel fromSerialized = new TokenizerModel(in);
119
120 factory = fromSerialized.getFactory();
121 Assert.assertNull(factory.getAbbreviationDictionary());
122 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
123
124 Assert.assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
125 Assert.assertEquals(lang, factory.getLanguageCode());
126 Assert.assertEquals(lang, model.getLanguage());
127 Assert.assertFalse(factory.isUseAlphaNumericOptmization());
128 }
129
130 @Test
131 public void testCustomPatternAndAlphaOpt() throws IOException {
106 Assertions.assertNull(factory.getAbbreviationDictionary());
107 Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
108
109 Assertions.assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
110 Assertions.assertEquals(lang, factory.getLanguageCode());
111 Assertions.assertEquals(lang, model.getLanguage());
112 Assertions.assertFalse(factory.isUseAlphaNumericOptmization());
113
114 ByteArrayOutputStream out = new ByteArrayOutputStream();
115 model.serialize(out);
116 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
117
118 TokenizerModel fromSerialized = new TokenizerModel(in);
119
120 factory = fromSerialized.getFactory();
121 Assertions.assertNull(factory.getAbbreviationDictionary());
122 Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
123
124 Assertions.assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
125 Assertions.assertEquals(lang, factory.getLanguageCode());
126 Assertions.assertEquals(lang, model.getLanguage());
127 Assertions.assertFalse(factory.isUseAlphaNumericOptmization());
128 }
129
130 @Test
131 void testCustomPatternAndAlphaOpt() throws IOException {
132132
133133 Dictionary dic = null;
134134 final String lang = "spa";
138138 Pattern.compile(pattern)));
139139
140140 TokenizerFactory factory = model.getFactory();
141 Assert.assertNull(factory.getAbbreviationDictionary());
142 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
143
144 Assert.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
145 Assert.assertEquals(lang, factory.getLanguageCode());
146 Assert.assertEquals(lang, model.getLanguage());
147 Assert.assertTrue(factory.isUseAlphaNumericOptmization());
148
149 ByteArrayOutputStream out = new ByteArrayOutputStream();
150 model.serialize(out);
151 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
152
153 TokenizerModel fromSerialized = new TokenizerModel(in);
154
155 factory = fromSerialized.getFactory();
156 Assert.assertNull(factory.getAbbreviationDictionary());
157 Assert.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
158 Assert.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
159 Assert.assertEquals(lang, factory.getLanguageCode());
160 Assert.assertEquals(lang, model.getLanguage());
161 Assert.assertTrue(factory.isUseAlphaNumericOptmization());
162 }
163
164 @Test
165 public void testDummyFactory() throws IOException {
141 Assertions.assertNull(factory.getAbbreviationDictionary());
142 Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
143
144 Assertions.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
145 Assertions.assertEquals(lang, factory.getLanguageCode());
146 Assertions.assertEquals(lang, model.getLanguage());
147 Assertions.assertTrue(factory.isUseAlphaNumericOptmization());
148
149 ByteArrayOutputStream out = new ByteArrayOutputStream();
150 model.serialize(out);
151 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
152
153 TokenizerModel fromSerialized = new TokenizerModel(in);
154
155 factory = fromSerialized.getFactory();
156 Assertions.assertNull(factory.getAbbreviationDictionary());
157 Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
158 Assertions.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
159 Assertions.assertEquals(lang, factory.getLanguageCode());
160 Assertions.assertEquals(lang, model.getLanguage());
161 Assertions.assertTrue(factory.isUseAlphaNumericOptmization());
162 }
163
164 @Test
165 void testDummyFactory() throws IOException {
166166
167167 Dictionary dic = loadAbbDictionary();
168168 final String lang = "spa";
172172 Pattern.compile(pattern)));
173173
174174 TokenizerFactory factory = model.getFactory();
175 Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
176 Assert.assertTrue(factory.getContextGenerator() instanceof DummyContextGenerator);
177 Assert.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
178 Assert.assertEquals(lang, factory.getLanguageCode());
179 Assert.assertEquals(lang, model.getLanguage());
180 Assert.assertTrue(factory.isUseAlphaNumericOptmization());
181
182 ByteArrayOutputStream out = new ByteArrayOutputStream();
183 model.serialize(out);
184 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
185
186 TokenizerModel fromSerialized = new TokenizerModel(in);
187
188 factory = fromSerialized.getFactory();
189 Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
190 Assert.assertTrue(factory.getContextGenerator() instanceof DummyContextGenerator);
191 Assert.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
192 Assert.assertEquals(lang, factory.getLanguageCode());
193 Assert.assertEquals(lang, model.getLanguage());
194 Assert.assertTrue(factory.isUseAlphaNumericOptmization());
195 }
196
197 @Test
198 public void testCreateDummyFactory() throws IOException {
175 Assertions.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
176 Assertions.assertTrue(factory.getContextGenerator() instanceof DummyContextGenerator);
177 Assertions.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
178 Assertions.assertEquals(lang, factory.getLanguageCode());
179 Assertions.assertEquals(lang, model.getLanguage());
180 Assertions.assertTrue(factory.isUseAlphaNumericOptmization());
181
182 ByteArrayOutputStream out = new ByteArrayOutputStream();
183 model.serialize(out);
184 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
185
186 TokenizerModel fromSerialized = new TokenizerModel(in);
187
188 factory = fromSerialized.getFactory();
189 Assertions.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
190 Assertions.assertTrue(factory.getContextGenerator() instanceof DummyContextGenerator);
191 Assertions.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
192 Assertions.assertEquals(lang, factory.getLanguageCode());
193 Assertions.assertEquals(lang, model.getLanguage());
194 Assertions.assertTrue(factory.isUseAlphaNumericOptmization());
195 }
196
197 @Test
198 void testCreateDummyFactory() throws IOException {
199199 Dictionary dic = loadAbbDictionary();
200200 final String lang = "spa";
201201 String pattern = "^[0-9A-Za-z]+$";
204204 DummyTokenizerFactory.class.getCanonicalName(), lang, dic, true,
205205 Pattern.compile(pattern));
206206
207 Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
208 Assert.assertTrue(factory.getContextGenerator() instanceof DummyContextGenerator);
209 Assert.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
210 Assert.assertEquals(lang, factory.getLanguageCode());
211 Assert.assertTrue(factory.isUseAlphaNumericOptmization());
207 Assertions.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
208 Assertions.assertTrue(factory.getContextGenerator() instanceof DummyContextGenerator);
209 Assertions.assertEquals(pattern, factory.getAlphaNumericPattern().pattern());
210 Assertions.assertEquals(lang, factory.getLanguageCode());
211 Assertions.assertTrue(factory.isUseAlphaNumericOptmization());
212212 }
213213 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 public class TokenizerMEIT {
2525
2626 @Test
27 public void testTokenizerDownloadedModel() throws IOException {
27 void testTokenizerDownloadedModel() throws IOException {
2828
2929 TokenizerME tokenizer = new TokenizerME("en");
3030
3131 String[] tokens = tokenizer.tokenize("test,");
3232
33 Assert.assertEquals(2, tokens.length);
34 Assert.assertEquals("test", tokens[0]);
35 Assert.assertEquals(",", tokens[1]);
33 Assertions.assertEquals(2, tokens.length);
34 Assertions.assertEquals("test", tokens[0]);
35 Assertions.assertEquals(",", tokens[1]);
3636 }
37
37
3838 }
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.formats.ResourceAsStreamFactory;
2626 import opennlp.tools.util.InputStreamFactory;
3131
3232 /**
3333 * Tests for the {@link TokenizerME} class.
34 *
34 * <p>
3535 * This test trains the tokenizer with a few sample tokens
3636 * and then predicts a token. This test checks if the
3737 * tokenizer code can be executed.
4141 public class TokenizerMETest {
4242
4343 @Test
44 public void testTokenizerSimpleModel() throws IOException {
44 void testTokenizerSimpleModel() throws IOException {
4545
4646 TokenizerModel model = TokenizerTestUtil.createSimpleMaxentTokenModel();
4747
4949
5050 String[] tokens = tokenizer.tokenize("test,");
5151
52 Assert.assertEquals(2, tokens.length);
53 Assert.assertEquals("test", tokens[0]);
54 Assert.assertEquals(",", tokens[1]);
52 Assertions.assertEquals(2, tokens.length);
53 Assertions.assertEquals("test", tokens[0]);
54 Assertions.assertEquals(",", tokens[1]);
5555 }
56
56
5757 @Test
58 public void testTokenizer() throws IOException {
58 void testTokenizer() throws IOException {
5959 TokenizerModel model = TokenizerTestUtil.createMaxentTokenModel();
6060
6161 TokenizerME tokenizer = new TokenizerME(model);
6262 String[] tokens = tokenizer.tokenize("Sounds like it's not properly thought through!");
6363
64 Assert.assertEquals(9, tokens.length);
65 Assert.assertEquals("Sounds", tokens[0]);
66 Assert.assertEquals("like", tokens[1]);
67 Assert.assertEquals("it", tokens[2]);
68 Assert.assertEquals("'s", tokens[3]);
69 Assert.assertEquals("not", tokens[4]);
70 Assert.assertEquals("properly", tokens[5]);
71 Assert.assertEquals("thought", tokens[6]);
72 Assert.assertEquals("through", tokens[7]);
73 Assert.assertEquals("!", tokens[8]);
64 Assertions.assertEquals(9, tokens.length);
65 Assertions.assertEquals("Sounds", tokens[0]);
66 Assertions.assertEquals("like", tokens[1]);
67 Assertions.assertEquals("it", tokens[2]);
68 Assertions.assertEquals("'s", tokens[3]);
69 Assertions.assertEquals("not", tokens[4]);
70 Assertions.assertEquals("properly", tokens[5]);
71 Assertions.assertEquals("thought", tokens[6]);
72 Assertions.assertEquals("through", tokens[7]);
73 Assertions.assertEquals("!", tokens[8]);
7474 }
75
76 @Test(expected = InsufficientTrainingDataException.class)
77 public void testInsufficientData() throws IOException {
7875
79 InputStreamFactory trainDataIn = new ResourceAsStreamFactory(
80 TokenizerModel.class, "/opennlp/tools/tokenize/token-insufficient.train");
76 @Test
77 void testInsufficientData() {
8178
82 ObjectStream<TokenSample> samples = new TokenSampleStream(
83 new PlainTextByLineStream(trainDataIn, StandardCharsets.UTF_8));
79 Assertions.assertThrows(InsufficientTrainingDataException.class, () -> {
8480
85 TrainingParameters mlParams = new TrainingParameters();
86 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
87 mlParams.put(TrainingParameters.CUTOFF_PARAM, 5);
81 InputStreamFactory trainDataIn = new ResourceAsStreamFactory(
82 TokenizerModel.class, "/opennlp/tools/tokenize/token-insufficient.train");
8883
89 TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams);
84 ObjectStream<TokenSample> samples = new TokenSampleStream(
85 new PlainTextByLineStream(trainDataIn, StandardCharsets.UTF_8));
86
87 TrainingParameters mlParams = new TrainingParameters();
88 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
89 mlParams.put(TrainingParameters.CUTOFF_PARAM, 5);
90
91 TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams);
92
93 });
94
9095
9196 }
9297
9398 @Test
94 public void testNewLineAwareTokenization() throws IOException {
99 void testNewLineAwareTokenization() throws IOException {
95100 TokenizerModel model = TokenizerTestUtil.createMaxentTokenModel();
96101 TokenizerME tokenizer = new TokenizerME(model);
97102 tokenizer.setKeepNewLines(true);
98103
99 Assert.assertEquals(2, tokenizer.tokenize("a\n").length);
100 Assert.assertArrayEquals(new String[] {"a", "\n"}, tokenizer.tokenize("a\n"));
101
102 Assert.assertEquals(3, tokenizer.tokenize("a\nb").length);
103 Assert.assertArrayEquals(new String[] {"a", "\n", "b"}, tokenizer.tokenize("a\nb"));
104
105 Assert.assertEquals(4, tokenizer.tokenize("a\n\n b").length);
106 Assert.assertArrayEquals(new String[] {"a", "\n", "\n", "b"}, tokenizer.tokenize("a\n\n b"));
107
108 Assert.assertEquals(7, tokenizer.tokenize("a\n\n b\n\n c").length);
109 Assert.assertArrayEquals(new String[] {"a", "\n", "\n", "b", "\n", "\n", "c"},
110 tokenizer.tokenize("a\n\n b\n\n c"));
104 Assertions.assertEquals(2, tokenizer.tokenize("a\n").length);
105 Assertions.assertArrayEquals(new String[] {"a", "\n"}, tokenizer.tokenize("a\n"));
106
107 Assertions.assertEquals(3, tokenizer.tokenize("a\nb").length);
108 Assertions.assertArrayEquals(new String[] {"a", "\n", "b"}, tokenizer.tokenize("a\nb"));
109
110 Assertions.assertEquals(4, tokenizer.tokenize("a\n\n b").length);
111 Assertions.assertArrayEquals(new String[] {"a", "\n", "\n", "b"}, tokenizer.tokenize("a\n\n b"));
112
113 Assertions.assertEquals(7, tokenizer.tokenize("a\n\n b\n\n c").length);
114 Assertions.assertArrayEquals(new String[] {"a", "\n", "\n", "b", "\n", "\n", "c"},
115 tokenizer.tokenize("a\n\n b\n\n c"));
111116 }
112117
113118 @Test
114 public void testTokenizationOfStringWithWindowsNewLineTokens() throws IOException {
119 void testTokenizationOfStringWithWindowsNewLineTokens() throws IOException {
115120 TokenizerModel model = TokenizerTestUtil.createMaxentTokenModel();
116121 TokenizerME tokenizer = new TokenizerME(model);
117122 tokenizer.setKeepNewLines(true);
118123
119 Assert.assertEquals(3, tokenizer.tokenize("a\r\n").length);
120 Assert.assertArrayEquals(new String[] {"a", "\r", "\n"}, tokenizer.tokenize("a\r\n"));
124 Assertions.assertEquals(3, tokenizer.tokenize("a\r\n").length);
125 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n"}, tokenizer.tokenize("a\r\n"));
121126
122 Assert.assertEquals(4, tokenizer.tokenize("a\r\nb").length);
123 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "b"}, tokenizer.tokenize("a\r\nb"));
127 Assertions.assertEquals(4, tokenizer.tokenize("a\r\nb").length);
128 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "b"}, tokenizer.tokenize("a\r\nb"));
124129
125 Assert.assertEquals(6, tokenizer.tokenize("a\r\n\r\n b").length);
126 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b"}, tokenizer
130 Assertions.assertEquals(6, tokenizer.tokenize("a\r\n\r\n b").length);
131 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b"}, tokenizer
127132 .tokenize("a\r\n\r\n b"));
128133
129 Assert.assertEquals(11, tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c").length);
130 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b", "\r", "\n", "\r", "\n", "c"},
131 tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c"));
134 Assertions.assertEquals(11, tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c").length);
135 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b", "\r", "\n", "\r", "\n", "c"},
136 tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c"));
132137 }
133138
134139 }
2020 import java.io.ByteArrayOutputStream;
2121 import java.io.IOException;
2222
23 import org.junit.Test;
23 import org.junit.jupiter.api.Test;
2424
2525 /**
2626 * Tests for the {@link TokenizerModel} class.
2828 public class TokenizerModelTest {
2929
3030 @Test
31 public void testSentenceModel() throws IOException {
31 void testSentenceModel() throws IOException {
3232
3333 TokenizerModel model = TokenizerTestUtil.createSimpleMaxentTokenModel();
3434
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 import opennlp.tools.util.ObjectStream;
2525 import opennlp.tools.util.ObjectStreamUtils;
3131 * Tests for the {@link WhitespaceTokenStream} class.
3232 */
3333 @Test
34 public void testWhitespace() throws IOException {
34 void testWhitespace() throws IOException {
3535 String text = " a b c d e f ";
3636 ObjectStream<TokenSample> sampleStream = new TokenSampleStream(
37 ObjectStreamUtils.createObjectStream(text));
37 ObjectStreamUtils.createObjectStream(text));
3838 WhitespaceTokenStream tokenStream = new WhitespaceTokenStream(sampleStream);
3939 String read = tokenStream.read();
40 Assert.assertEquals("a b c d e f", read);
40 Assertions.assertEquals("a b c d e f", read);
4141 }
4242
4343 @Test
44 public void testSeparatedString() throws IOException {
44 void testSeparatedString() throws IOException {
4545 String text = " a b<SPLIT>c d<SPLIT>e ";
4646 ObjectStream<TokenSample> sampleStream = new TokenSampleStream(
47 ObjectStreamUtils.createObjectStream(text));
47 ObjectStreamUtils.createObjectStream(text));
4848 WhitespaceTokenStream tokenStream = new WhitespaceTokenStream(sampleStream);
4949 String read = tokenStream.read();
50 Assert.assertEquals("a b c d e", read);
50 Assertions.assertEquals("a b c d e", read);
5151 }
5252
5353 /**
5454 * Tests for the {@link TokenizerStream} correctly tokenizes whitespace separated tokens.
5555 */
5656 @Test
57 public void testTokenizerStream() throws IOException {
57 void testTokenizerStream() throws IOException {
5858 String text = " a b c d e ";
5959 WhitespaceTokenizer instance = WhitespaceTokenizer.INSTANCE;
6060 TokenizerStream stream = new TokenizerStream(instance, ObjectStreamUtils.createObjectStream(text));
6161 TokenSample read = stream.read();
6262 Span[] tokenSpans = read.getTokenSpans();
6363
64 Assert.assertEquals(5, tokenSpans.length);
64 Assertions.assertEquals(5, tokenSpans.length);
6565
66 Assert.assertEquals("a", tokenSpans[0].getCoveredText(read.getText()));
67 Assert.assertEquals(new Span(1,2), tokenSpans[0]);
66 Assertions.assertEquals("a", tokenSpans[0].getCoveredText(read.getText()));
67 Assertions.assertEquals(new Span(1, 2), tokenSpans[0]);
6868
69 Assert.assertEquals("b", tokenSpans[1].getCoveredText(read.getText()));
70 Assert.assertEquals(new Span(3,4), tokenSpans[1]);
69 Assertions.assertEquals("b", tokenSpans[1].getCoveredText(read.getText()));
70 Assertions.assertEquals(new Span(3, 4), tokenSpans[1]);
7171
72 Assert.assertEquals("c", tokenSpans[2].getCoveredText(read.getText()));
73 Assert.assertEquals(new Span(5,6), tokenSpans[2]);
72 Assertions.assertEquals("c", tokenSpans[2].getCoveredText(read.getText()));
73 Assertions.assertEquals(new Span(5, 6), tokenSpans[2]);
7474
75 Assert.assertEquals("d", tokenSpans[3].getCoveredText(read.getText()));
76 Assert.assertEquals(new Span(8,9), tokenSpans[3]);
75 Assertions.assertEquals("d", tokenSpans[3].getCoveredText(read.getText()));
76 Assertions.assertEquals(new Span(8, 9), tokenSpans[3]);
7777
78 Assert.assertEquals("e", tokenSpans[4].getCoveredText(read.getText()));
79 Assert.assertEquals(new Span(13,14), tokenSpans[4]);
78 Assertions.assertEquals("e", tokenSpans[4].getCoveredText(read.getText()));
79 Assertions.assertEquals(new Span(13, 14), tokenSpans[4]);
8080 }
8181 }
1616
1717 package opennlp.tools.tokenize;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link WhitespaceTokenizer} class.
2525 public class WhitespaceTokenizerTest {
2626
2727 @Test
28 public void testOneToken() {
29 Assert.assertEquals("one", WhitespaceTokenizer.INSTANCE.tokenize("one")[0]);
30 Assert.assertEquals("one", WhitespaceTokenizer.INSTANCE.tokenize(" one")[0]);
31 Assert.assertEquals("one", WhitespaceTokenizer.INSTANCE.tokenize("one ")[0]);
28 void testOneToken() {
29 Assertions.assertEquals("one", WhitespaceTokenizer.INSTANCE.tokenize("one")[0]);
30 Assertions.assertEquals("one", WhitespaceTokenizer.INSTANCE.tokenize(" one")[0]);
31 Assertions.assertEquals("one", WhitespaceTokenizer.INSTANCE.tokenize("one ")[0]);
3232 }
3333
3434 /**
3535 * Tests if it can tokenize whitespace separated tokens.
3636 */
3737 @Test
38 public void testWhitespaceTokenization() {
38 void testWhitespaceTokenization() {
3939
4040 String text = "a b c d e f ";
4141
4242 String[] tokenizedText = WhitespaceTokenizer.INSTANCE.tokenize(text);
4343
44 Assert.assertTrue("a".equals(tokenizedText[0]));
45 Assert.assertTrue("b".equals(tokenizedText[1]));
46 Assert.assertTrue("c".equals(tokenizedText[2]));
47 Assert.assertTrue("d".equals(tokenizedText[3]));
48 Assert.assertTrue("e".equals(tokenizedText[4]));
49 Assert.assertTrue("f".equals(tokenizedText[5]));
44 Assertions.assertTrue("a".equals(tokenizedText[0]));
45 Assertions.assertTrue("b".equals(tokenizedText[1]));
46 Assertions.assertTrue("c".equals(tokenizedText[2]));
47 Assertions.assertTrue("d".equals(tokenizedText[3]));
48 Assertions.assertTrue("e".equals(tokenizedText[4]));
49 Assertions.assertTrue("f".equals(tokenizedText[5]));
5050
51 Assert.assertTrue(tokenizedText.length == 6);
51 Assertions.assertTrue(tokenizedText.length == 6);
5252 }
5353
5454 @Test
55 public void testTokenizationOfStringWithoutTokens() {
56 Assert.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize("").length); // empty
57 Assert.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize(" ").length); // space
58 Assert.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize(" ").length); // tab
59 Assert.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize(" ").length);
55 void testTokenizationOfStringWithoutTokens() {
56 Assertions.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize("").length); // empty
57 Assertions.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize(" ").length); // space
58 Assertions.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize(" ").length); // tab
59 Assertions.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize(" ").length);
6060 }
6161
6262 @Test
63 public void testTokenizationOfStringWithUnixNewLineTokens() {
63 void testTokenizationOfStringWithUnixNewLineTokens() {
6464 WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
6565 tokenizer.setKeepNewLines(true);
66
67 Assert.assertEquals(2, tokenizer.tokenize("a\n").length);
68 Assert.assertArrayEquals(new String[] {"a", "\n"}, tokenizer.tokenize("a\n"));
69
70 Assert.assertEquals(3, tokenizer.tokenize("a\nb").length);
71 Assert.assertArrayEquals(new String[] {"a", "\n", "b"}, tokenizer.tokenize("a\nb"));
72
73 Assert.assertEquals(4, tokenizer.tokenize("a\n\n b").length);
74 Assert.assertArrayEquals(new String[] {"a", "\n", "\n", "b"}, tokenizer.tokenize("a\n\n b"));
75
76 Assert.assertEquals(7, tokenizer.tokenize("a\n\n b\n\n c").length);
77 Assert.assertArrayEquals(new String[] {"a", "\n", "\n", "b", "\n", "\n", "c"},
78 tokenizer.tokenize("a\n\n b\n\n c"));
66
67 Assertions.assertEquals(2, tokenizer.tokenize("a\n").length);
68 Assertions.assertArrayEquals(new String[] {"a", "\n"}, tokenizer.tokenize("a\n"));
69
70 Assertions.assertEquals(3, tokenizer.tokenize("a\nb").length);
71 Assertions.assertArrayEquals(new String[] {"a", "\n", "b"}, tokenizer.tokenize("a\nb"));
72
73 Assertions.assertEquals(4, tokenizer.tokenize("a\n\n b").length);
74 Assertions.assertArrayEquals(new String[] {"a", "\n", "\n", "b"}, tokenizer.tokenize("a\n\n b"));
75
76 Assertions.assertEquals(7, tokenizer.tokenize("a\n\n b\n\n c").length);
77 Assertions.assertArrayEquals(new String[] {"a", "\n", "\n", "b", "\n", "\n", "c"},
78 tokenizer.tokenize("a\n\n b\n\n c"));
7979 }
8080
8181 @Test
82 public void testTokenizationOfStringWithWindowsNewLineTokens() {
82 void testTokenizationOfStringWithWindowsNewLineTokens() {
8383 WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
8484 tokenizer.setKeepNewLines(true);
85
86 Assert.assertEquals(3, tokenizer.tokenize("a\r\n").length);
87 Assert.assertArrayEquals(new String[] {"a", "\r", "\n"}, tokenizer.tokenize("a\r\n"));
88
89 Assert.assertEquals(4, tokenizer.tokenize("a\r\nb").length);
90 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "b"}, tokenizer.tokenize("a\r\nb"));
91
92 Assert.assertEquals(6, tokenizer.tokenize("a\r\n\r\n b").length);
93 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b"}, tokenizer
85
86 Assertions.assertEquals(3, tokenizer.tokenize("a\r\n").length);
87 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n"}, tokenizer.tokenize("a\r\n"));
88
89 Assertions.assertEquals(4, tokenizer.tokenize("a\r\nb").length);
90 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "b"}, tokenizer.tokenize("a\r\nb"));
91
92 Assertions.assertEquals(6, tokenizer.tokenize("a\r\n\r\n b").length);
93 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b"}, tokenizer
9494 .tokenize("a\r\n\r\n b"));
95
96 Assert.assertEquals(11, tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c").length);
97 Assert.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b", "\r", "\n", "\r", "\n", "c"},
98 tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c"));
95
96 Assertions.assertEquals(11, tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c").length);
97 Assertions.assertArrayEquals(new String[] {"a", "\r", "\n", "\r", "\n", "b", "\r", "\n", "\r", "\n", "c"},
98 tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c"));
9999 }
100100 }
1919 import java.util.HashSet;
2020 import java.util.Set;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 public class WordpieceTokenizerTest {
2626
2727 @Test
28 public void testSentence() {
28 void testSentence() {
2929
3030 final Tokenizer tokenizer = new WordpieceTokenizer(getVocabulary());
3131 final String[] tokens = tokenizer.tokenize("the quick brown fox jumps over the very lazy dog");
3232
3333 final String[] expected = {"[CLS]", "the", "quick", "brown", "fox", "jumps", "over", "the",
34 "[UNK]", "lazy", "dog", "[SEP]"};
34 "[UNK]", "lazy", "dog", "[SEP]"};
3535
36 Assert.assertArrayEquals(expected, tokens);
36 Assertions.assertArrayEquals(expected, tokens);
3737
3838 }
3939
4040 @Test
41 public void testSentenceWithPunctuation() {
41 void testSentenceWithPunctuation() {
4242
4343 final Tokenizer tokenizer = new WordpieceTokenizer(getVocabulary());
4444 final String[] tokens = tokenizer.tokenize("The quick brown fox jumps over the very lazy dog.");
4545
4646 final String[] expected = {"[CLS]", "[UNK]", "quick", "brown", "fox", "jumps", "over", "the",
47 "[UNK]", "lazy", "dog", "[UNK]", "[SEP]"};
47 "[UNK]", "lazy", "dog", "[UNK]", "[SEP]"};
4848
49 Assert.assertArrayEquals(expected, tokens);
49 Assertions.assertArrayEquals(expected, tokens);
5050
5151 }
5252
2222 import java.util.Iterator;
2323 import java.util.List;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.ml.model.Event;
2929
3838 * return iterators with events and empty iterators.
3939 */
4040 @Test
41 public void testStandardCase() throws IOException {
41 void testStandardCase() throws IOException {
4242
4343 List<RESULT> samples = new ArrayList<>();
4444 samples.add(RESULT.EVENTS);
5252 eventCounter++;
5353 }
5454
55 Assert.assertEquals(2, eventCounter);
55 Assertions.assertEquals(2, eventCounter);
5656 }
5757 }
5858
6262 * only returns empty iterators.
6363 */
6464 @Test
65 public void testEmtpyEventStream() throws IOException {
65 void testEmtpyEventStream() throws IOException {
6666 List<RESULT> samples = new ArrayList<>();
6767 samples.add(RESULT.EMPTY);
6868
6969 try (TestEventStream eventStream = new TestEventStream(new CollectionObjectStream<>(samples))) {
70 Assert.assertNull(eventStream.read());
70 Assertions.assertNull(eventStream.read());
7171
7272 // now check if it can handle multiple empty event iterators
7373 samples.add(RESULT.EMPTY);
7474 samples.add(RESULT.EMPTY);
7575 }
7676 try (TestEventStream eventStream = new TestEventStream(new CollectionObjectStream<>(samples))) {
77 Assert.assertNull(eventStream.read());
77 Assertions.assertNull(eventStream.read());
7878 }
7979 }
8080
117117 return emptyList.iterator();
118118 } else {
119119 // throws runtime exception, execution stops here
120 Assert.fail();
120 Assertions.fail();
121121
122122 return null;
123123 }
1818
1919 import java.io.ByteArrayInputStream;
2020 import java.io.File;
21 import java.io.IOException;
2221 import java.io.InputStream;
2322
2423 import java.nio.charset.Charset;
4746 }
4847
4948 @Override
50 public InputStream createInputStream() throws IOException {
49 public InputStream createInputStream() {
5150 if (inputSourceFile != null) {
5251 return getClass().getClassLoader().getResourceAsStream(inputSourceFile.getPath());
5352 }
2323 import java.util.Set;
2424 import java.util.TreeSet;
2525
26 import org.junit.Assert;
27 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.Test;
2828
2929 public class ObjectStreamUtilsTest {
3030
3131 @Test
32 public void buildStreamTest() throws IOException {
33 String[] data = {"dog","cat","pig","frog"};
34
32 void buildStreamTest() throws IOException {
33 String[] data = {"dog", "cat", "pig", "frog"};
34
3535 // make a stream out of the data array...
3636 ObjectStream<String> stream = ObjectStreamUtils.createObjectStream(data);
3737 compare(stream, data);
38
38
3939 // make a stream out of a list...
4040 List<String> dataList = Arrays.asList(data);
4141 stream = ObjectStreamUtils.createObjectStream(Arrays.asList(data));
4242 compare(stream, data);
43
43
4444 // make a stream out of a set...
4545 // A treeSet will order the set in Alphabetical order, so
4646 // we can compare it with the sorted Array, but this changes the
5151 }
5252
5353 @Test
54 public void concatenateStreamTest() throws IOException {
55 String[] data1 = {"dog1","cat1","pig1","frog1"};
56 String[] data2 = {"dog2","cat2","pig2","frog2"};
57 String[] expected = {"dog1","cat1","pig1","frog1","dog2","cat2","pig2","frog2"};
54 void concatenateStreamTest() throws IOException {
55 String[] data1 = {"dog1", "cat1", "pig1", "frog1"};
56 String[] data2 = {"dog2", "cat2", "pig2", "frog2"};
57 String[] expected = {"dog1", "cat1", "pig1", "frog1", "dog2", "cat2", "pig2", "frog2"};
5858
5959 // take individual streams and concatenate them as 1 stream.
6060 // Note: this is much easier than trying to create an array of
6767
6868 // test that collections of streams can be concatenated...
6969 List<ObjectStream<String>> listOfStreams = new ArrayList<>();
70 listOfStreams.add(ObjectStreamUtils.createObjectStream(data1) );
71 listOfStreams.add(ObjectStreamUtils.createObjectStream(data2) );
70 listOfStreams.add(ObjectStreamUtils.createObjectStream(data1));
71 listOfStreams.add(ObjectStreamUtils.createObjectStream(data2));
7272 stream = ObjectStreamUtils.concatenateObjectStream(listOfStreams);
7373 compare(stream, expected);
7474
75
75
7676 // test that sets of streams can be concatenated..
7777 Set<ObjectStream<String>> streamSet = new HashSet<>();
78 streamSet.add(ObjectStreamUtils.createObjectStream(data1) );
79 streamSet.add(ObjectStreamUtils.createObjectStream(data2) );
78 streamSet.add(ObjectStreamUtils.createObjectStream(data1));
79 streamSet.add(ObjectStreamUtils.createObjectStream(data2));
8080 stream = ObjectStreamUtils.concatenateObjectStream(streamSet);
8181 // The order the of the streams in the set is not know a priori
8282 // just check that the dog, cat, pig. frog is in the write order...
8383 compareUpToLastCharacter(stream, expected);
84
84
8585 }
86
87
88
89 private void compare(ObjectStream<String> stream,String[] expectedValues) throws IOException {
86
87
88 private void compare(ObjectStream<String> stream, String[] expectedValues) throws IOException {
9089 String value = "";
9190 int i = 0;
92 while ( (value = stream.read()) != null) {
93 Assert.assertTrue("The stream is longer than expected at index: " + i +
91 while ((value = stream.read()) != null) {
92 Assertions.assertTrue(i < expectedValues.length, "The stream is longer than expected at index: " + i +
9493 " expected length: " + expectedValues.length +
95 " expectedValues" + Arrays.toString(expectedValues),i < expectedValues.length);
96 Assert.assertEquals(expectedValues[i++], value);
94 " expectedValues" + Arrays.toString(expectedValues));
95 Assertions.assertEquals(expectedValues[i++], value);
9796 }
9897 }
99
98
10099 private void compareUpToLastCharacter(ObjectStream<String> stream,
101 String[] expectedValues) throws IOException {
102
100 String[] expectedValues) throws IOException {
101
103102 String value = "";
104103 int i = 0;
105 while ( (value = stream.read()) != null) {
106 Assert.assertTrue("The stream is longer than expected at index: " + i +
104 while ((value = stream.read()) != null) {
105 Assertions.assertTrue(i < expectedValues.length, "The stream is longer than expected at index: " + i +
107106 " expected length: " + expectedValues.length +
108 " expectedValues" + Arrays.toString(expectedValues),i < expectedValues.length);
109 Assert.assertEquals(
110 expectedValues[i].substring(0, expectedValues[i].length() - 1),
107 " expectedValues" + Arrays.toString(expectedValues));
108 Assertions.assertEquals(
109 expectedValues[i].substring(0, expectedValues[i].length() - 1),
111110 value.substring(0, value.length() - 1));
112111 i++;
113112 }
114113 }
115
114
116115 }
1818
1919 import java.io.IOException;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 public class ParagraphStreamTest {
2525
2626 @Test
27 public void testSimpleReading() throws IOException {
27 void testSimpleReading() throws IOException {
2828 try (ParagraphStream paraStream = new ParagraphStream(
29 ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5"))) {
30 Assert.assertEquals("1\n2\n", paraStream.read());
31 Assert.assertEquals("4\n5\n", paraStream.read());
32 Assert.assertNull(paraStream.read());
29 ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5"))) {
30 Assertions.assertEquals("1\n2\n", paraStream.read());
31 Assertions.assertEquals("4\n5\n", paraStream.read());
32 Assertions.assertNull(paraStream.read());
3333 }
3434 try (ParagraphStream paraStream = new ParagraphStream(
35 ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) {
36 Assert.assertEquals("1\n2\n", paraStream.read());
37 Assert.assertEquals("4\n5\n", paraStream.read());
38 Assert.assertNull(paraStream.read());
35 ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) {
36 Assertions.assertEquals("1\n2\n", paraStream.read());
37 Assertions.assertEquals("4\n5\n", paraStream.read());
38 Assertions.assertNull(paraStream.read());
3939 }
4040 }
4141
4242 @Test
43 public void testReset() throws IOException {
43 void testReset() throws IOException {
4444 try (ParagraphStream paraStream = new ParagraphStream(
45 ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) {
46 Assert.assertEquals("1\n2\n", paraStream.read());
45 ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) {
46 Assertions.assertEquals("1\n2\n", paraStream.read());
4747 paraStream.reset();
4848
49 Assert.assertEquals("1\n2\n", paraStream.read());
50 Assert.assertEquals("4\n5\n", paraStream.read());
51 Assert.assertNull(paraStream.read());
49 Assertions.assertEquals("1\n2\n", paraStream.read());
50 Assertions.assertEquals("4\n5\n", paraStream.read());
51 Assertions.assertNull(paraStream.read());
5252 }
5353 }
5454 }
1919 import java.io.IOException;
2020 import java.nio.charset.StandardCharsets;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 /**
2626 * Tests for the {@link PlainTextByLineStream} class.
2828 public class PlainTextByLineStreamTest {
2929
3030 static final String testString = "line1" +
31 '\n' +
32 "line2" +
33 '\n' +
34 "line3" +
35 "\r\n" +
36 "line4" +
37 '\n';
31 '\n' +
32 "line2" +
33 '\n' +
34 "line3" +
35 "\r\n" +
36 "line4" +
37 '\n';
3838
3939 @Test
40 public void testLineSegmentation() throws IOException {
40 void testLineSegmentation() throws IOException {
4141 ObjectStream<String> stream =
42 new PlainTextByLineStream(new MockInputStreamFactory(testString), StandardCharsets.UTF_8);
42 new PlainTextByLineStream(new MockInputStreamFactory(testString), StandardCharsets.UTF_8);
4343
44 Assert.assertEquals("line1", stream.read());
45 Assert.assertEquals("line2", stream.read());
46 Assert.assertEquals("line3", stream.read());
47 Assert.assertEquals("line4", stream.read());
48 Assert.assertNull(stream.read());
44 Assertions.assertEquals("line1", stream.read());
45 Assertions.assertEquals("line2", stream.read());
46 Assertions.assertEquals("line3", stream.read());
47 Assertions.assertEquals("line4", stream.read());
48 Assertions.assertNull(stream.read());
4949
5050 stream.close();
5151 }
5252
5353 @Test
54 public void testReset() throws IOException {
54 void testReset() throws IOException {
5555 ObjectStream<String> stream =
56 new PlainTextByLineStream(new MockInputStreamFactory(testString), StandardCharsets.UTF_8);
56 new PlainTextByLineStream(new MockInputStreamFactory(testString), StandardCharsets.UTF_8);
5757
58 Assert.assertEquals("line1", stream.read());
59 Assert.assertEquals("line2", stream.read());
60 Assert.assertEquals("line3", stream.read());
58 Assertions.assertEquals("line1", stream.read());
59 Assertions.assertEquals("line2", stream.read());
60 Assertions.assertEquals("line3", stream.read());
6161 stream.reset();
6262
63 Assert.assertEquals("line1", stream.read());
64 Assert.assertEquals("line2", stream.read());
65 Assert.assertEquals("line3", stream.read());
66 Assert.assertEquals("line4", stream.read());
67 Assert.assertNull(stream.read());
63 Assertions.assertEquals("line1", stream.read());
64 Assertions.assertEquals("line2", stream.read());
65 Assertions.assertEquals("line3", stream.read());
66 Assertions.assertEquals("line4", stream.read());
67 Assertions.assertNull(stream.read());
6868
6969 stream.close();
7070 }
1616
1717 package opennlp.tools.util;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link Sequence} class.
2828 * Tests the copy constructor {@link Sequence#Sequence(Sequence)}.
2929 */
3030 @Test
31 public void testCopyConstructor() {
31 void testCopyConstructor() {
3232 Sequence sequence = new Sequence();
3333 sequence.add("a", 10);
3434 sequence.add("b", 20);
3535
3636 Sequence copy = new Sequence(sequence);
3737
38 Assert.assertEquals(sequence.getOutcomes(), copy.getOutcomes());
39 Assert.assertArrayEquals(sequence.getProbs(), copy.getProbs(), 0.0);
40 Assert.assertTrue(sequence.compareTo(copy) == 0);
38 Assertions.assertEquals(sequence.getOutcomes(), copy.getOutcomes());
39 Assertions.assertArrayEquals(copy.getProbs(), sequence.getProbs(), 0.0);
40 Assertions.assertTrue(sequence.compareTo(copy) == 0);
4141 }
4242
4343 /**
4545 * tests {@link Sequence#getOutcomes()} and {@link Sequence#getProbs()}.
4646 */
4747 @Test
48 public void testAddMethod() {
48 void testAddMethod() {
4949 Sequence sequence = new Sequence();
5050 sequence.add("a", 10d);
5151
5252 // check if insert was successful
53 Assert.assertEquals("a", sequence.getOutcomes().get(0));
54 Assert.assertEquals(10d, sequence.getProbs()[0], 0d);
53 Assertions.assertEquals("a", sequence.getOutcomes().get(0));
54 Assertions.assertEquals(10d, sequence.getProbs()[0]);
5555 }
5656
5757 /**
5858 * Tests {@link Sequence#compareTo(Sequence)}.
5959 */
6060 @Test
61 public void testCompareTo() {
61 void testCompareTo() {
6262 Sequence lowScore = new Sequence();
6363 lowScore.add("A", 1d);
6464 lowScore.add("B", 2d);
6969 lowScore.add("B", 8d);
7070 lowScore.add("C", 9d);
7171
72 Assert.assertEquals(-1, lowScore.compareTo(highScore));
73 Assert.assertEquals(1, highScore.compareTo(lowScore));
72 Assertions.assertEquals(-1, lowScore.compareTo(highScore));
73 Assertions.assertEquals(1, highScore.compareTo(lowScore));
7474 }
7575
7676 /**
7777 * Checks that {@link Sequence#toString()} is executable.
7878 */
7979 @Test
80 public void testToString() {
80 void testToString() {
8181 new Sequence().toString();
8282
8383 Sequence sequence = new Sequence();
1616
1717 package opennlp.tools.util;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link Span} class.
2424 */
25
2526 public class SpanTest {
2627
2728 /**
2829 * Test for {@link Span#getStart()}.
2930 */
3031 @Test
31 public void testGetStart() {
32 Assert.assertEquals(5, new Span(5, 6).getStart());
32 void testGetStart() {
33 Assertions.assertEquals(5, new Span(5, 6).getStart());
3334 }
3435
3536 /**
3637 * Test for {@link Span#getEnd()}.
3738 */
3839 @Test
39 public void testGetEnd() {
40 Assert.assertEquals(6, new Span(5, 6).getEnd());
40 void testGetEnd() {
41 Assertions.assertEquals(6, new Span(5, 6).getEnd());
4142 }
4243
4344 /**
4445 * Test for {@link Span#length()}.
4546 */
4647 @Test
47 public void testLength() {
48 Assert.assertEquals(11, new Span(10, 21).length());
48 void testLength() {
49 Assertions.assertEquals(11, new Span(10, 21).length());
4950 }
5051
5152 /**
5253 * Test for {@link Span#contains(Span)}.
5354 */
5455 @Test
55 public void testContains() {
56 void testContains() {
5657 Span a = new Span(500, 900);
5758 Span b = new Span(520, 600);
5859
59 Assert.assertEquals(true, a.contains(b));
60 Assertions.assertEquals(true, a.contains(b));
6061 }
6162
6263 /**
6364 * Test for {@link Span#contains(Span)}.
6465 */
6566 @Test
66 public void testContainsWithEqual() {
67 void testContainsWithEqual() {
6768 Span a = new Span(500, 900);
68 Assert.assertEquals(true, a.contains(a));
69 Assertions.assertEquals(true, a.contains(a));
6970 }
7071
7172 /**
7273 * Test for {@link Span#contains(Span)}.
7374 */
7475 @Test
75 public void testContainsWithLowerIntersect() {
76 void testContainsWithLowerIntersect() {
7677 Span a = new Span(500, 900);
7778 Span b = new Span(450, 1000);
78 Assert.assertEquals(false, a.contains(b));
79 Assertions.assertEquals(false, a.contains(b));
7980 }
8081
8182 /**
8283 * Test for {@link Span#contains(Span)}.
8384 */
8485 @Test
85 public void testContainsWithHigherIntersect() {
86 void testContainsWithHigherIntersect() {
8687 Span a = new Span(500, 900);
8788 Span b = new Span(500, 1000);
88 Assert.assertEquals(false, a.contains(b));
89 Assertions.assertEquals(false, a.contains(b));
8990 }
9091
9192 /**
9293 * Test for {@link Span#contains(int)}.
9394 */
9495 @Test
95 public void testContainsInt() {
96 void testContainsInt() {
9697 Span a = new Span(10, 300);
9798
9899 /* NOTE: here the span does not contain the endpoint marked as the end
100101 * true end for the span. The indexes used must observe the same
101102 * requirements for the contains function.
102103 */
103 Assert.assertFalse(a.contains(9));
104 Assert.assertTrue(a.contains(10));
105 Assert.assertTrue(a.contains(200));
106 Assert.assertTrue(a.contains(299));
107 Assert.assertFalse(a.contains(300));
104 Assertions.assertFalse(a.contains(9));
105 Assertions.assertTrue(a.contains(10));
106 Assertions.assertTrue(a.contains(200));
107 Assertions.assertTrue(a.contains(299));
108 Assertions.assertFalse(a.contains(300));
108109 }
109110
110111 /**
111112 * Test for {@link Span#startsWith(Span)}.
112113 */
113114 @Test
114 public void testStartsWith() {
115 void testStartsWith() {
115116 Span a = new Span(10, 50);
116117 Span b = new Span(10, 12);
117118
118 Assert.assertTrue(a.startsWith(a));
119 Assert.assertTrue(a.startsWith(b));
120 Assert.assertFalse(b.startsWith(a));
119 Assertions.assertTrue(a.startsWith(a));
120 Assertions.assertTrue(a.startsWith(b));
121 Assertions.assertFalse(b.startsWith(a));
121122 }
122123
123124 /**
124125 * Test for {@link Span#intersects(Span)}.
125126 */
126127 @Test
127 public void testIntersects() {
128 void testIntersects() {
128129 Span a = new Span(10, 50);
129130 Span b = new Span(40, 100);
130131
131 Assert.assertTrue(a.intersects(b));
132 Assert.assertTrue(b.intersects(a));
132 Assertions.assertTrue(a.intersects(b));
133 Assertions.assertTrue(b.intersects(a));
133134
134135 Span c = new Span(10, 20);
135136 Span d = new Span(40, 50);
136137
137 Assert.assertFalse(c.intersects(d));
138 Assert.assertFalse(d.intersects(c));
139 Assert.assertTrue(b.intersects(d));
138 Assertions.assertFalse(c.intersects(d));
139 Assertions.assertFalse(d.intersects(c));
140 Assertions.assertTrue(b.intersects(d));
140141 }
141142
142143 /**
143144 * Test for {@link Span#crosses(Span)}.
144145 */
145146 @Test
146 public void testCrosses() {
147 void testCrosses() {
147148 Span a = new Span(10, 50);
148149 Span b = new Span(40, 100);
149150
150 Assert.assertTrue(a.crosses(b));
151 Assert.assertTrue(b.crosses(a));
151 Assertions.assertTrue(a.crosses(b));
152 Assertions.assertTrue(b.crosses(a));
152153
153154 Span c = new Span(10, 20);
154155 Span d = new Span(40, 50);
155156
156 Assert.assertFalse(c.crosses(d));
157 Assert.assertFalse(d.crosses(c));
158 Assert.assertFalse(b.crosses(d));
159 }
160
161 /**
162 * Test for {@link Span#compareTo(Object)}.
163 */
164 @Test
165 public void testCompareToLower() {
157 Assertions.assertFalse(c.crosses(d));
158 Assertions.assertFalse(d.crosses(c));
159 Assertions.assertFalse(b.crosses(d));
160 }
161
162 /**
163 * Test for {@link Span#compareTo(Object)}.
164 */
165 @Test
166 void testCompareToLower() {
166167 Span a = new Span(100, 1000);
167168 Span b = new Span(10, 50);
168 Assert.assertEquals(true, a.compareTo(b) > 0);
169 }
170
171 /**
172 * Test for {@link Span#compareTo(Object)}.
173 */
174 @Test
175 public void testCompareToHigher() {
169 Assertions.assertEquals(true, a.compareTo(b) > 0);
170 }
171
172 /**
173 * Test for {@link Span#compareTo(Object)}.
174 */
175 @Test
176 void testCompareToHigher() {
176177 Span a = new Span(100, 200);
177178 Span b = new Span(300, 400);
178 Assert.assertEquals(true, a.compareTo(b) < 0);
179 }
180
181 /**
182 * Test for {@link Span#compareTo(Object)}.
183 */
184 @Test
185 public void testCompareToEquals() {
179 Assertions.assertEquals(true, a.compareTo(b) < 0);
180 }
181
182 /**
183 * Test for {@link Span#compareTo(Object)}.
184 */
185 @Test
186 void testCompareToEquals() {
186187 Span a = new Span(30, 1000);
187188 Span b = new Span(30, 1000);
188 Assert.assertEquals(true, a.compareTo(b) == 0);
189 Assertions.assertEquals(true, a.compareTo(b) == 0);
189190 }
190191
191192 ///
194195 * Test for {@link Span#compareTo(Object)}.
195196 */
196197 @Test
197 public void testCompareToEqualsSameType() {
198 void testCompareToEqualsSameType() {
198199 Span a = new Span(30, 1000, "a");
199200 Span b = new Span(30, 1000, "a");
200 Assert.assertEquals(true, a.compareTo(b) == 0);
201 }
202
203 /**
204 * Test for {@link Span#compareTo(Object)}.
205 */
206 @Test
207 public void testCompareToEqualsDiffType1() {
201 Assertions.assertEquals(true, a.compareTo(b) == 0);
202 }
203
204 /**
205 * Test for {@link Span#compareTo(Object)}.
206 */
207 @Test
208 void testCompareToEqualsDiffType1() {
208209 Span a = new Span(30, 1000, "a");
209210 Span b = new Span(30, 1000, "b");
210 Assert.assertEquals(true, a.compareTo(b) == -1);
211 }
212
213 /**
214 * Test for {@link Span#compareTo(Object)}.
215 */
216 @Test
217 public void testCompareToEqualsDiffType2() {
211 Assertions.assertEquals(true, a.compareTo(b) == -1);
212 }
213
214 /**
215 * Test for {@link Span#compareTo(Object)}.
216 */
217 @Test
218 void testCompareToEqualsDiffType2() {
218219 Span a = new Span(30, 1000, "b");
219220 Span b = new Span(30, 1000, "a");
220 Assert.assertEquals(true, a.compareTo(b) == 1);
221 }
222
223 /**
224 * Test for {@link Span#compareTo(Object)}.
225 */
226 @Test
227 public void testCompareToEqualsNullType1() {
221 Assertions.assertEquals(true, a.compareTo(b) == 1);
222 }
223
224 /**
225 * Test for {@link Span#compareTo(Object)}.
226 */
227 @Test
228 void testCompareToEqualsNullType1() {
228229 Span a = new Span(30, 1000);
229230 Span b = new Span(30, 1000, "b");
230 Assert.assertEquals(true, a.compareTo(b) == 1);
231 }
232
233 /**
234 * Test for {@link Span#compareTo(Object)}.
235 */
236 @Test
237 public void testCompareToEqualsNullType2() {
231 Assertions.assertEquals(true, a.compareTo(b) == 1);
232 }
233
234 /**
235 * Test for {@link Span#compareTo(Object)}.
236 */
237 @Test
238 void testCompareToEqualsNullType2() {
238239 Span a = new Span(30, 1000, "b");
239240 Span b = new Span(30, 1000);
240 Assert.assertEquals(true, a.compareTo(b) == -1);
241 Assertions.assertEquals(true, a.compareTo(b) == -1);
241242 }
242243
243244 /**
244245 * Test for {@link Span#hashCode()}.
245246 */
246247 @Test
247 public void testhHashCode() {
248 Assert.assertEquals(new Span(10, 11), new Span(10, 11));
248 void testhHashCode() {
249 Assertions.assertEquals(new Span(10, 11), new Span(10, 11));
249250 }
250251
251252 /**
252253 * Test for {@link Span#equals(Object)}.
253254 */
254255 @Test
255 public void testEqualsWithNull() {
256 void testEqualsWithNull() {
256257 Span a = new Span(0, 0);
257 Assert.assertEquals(a.equals(null), false);
258 Assertions.assertEquals(a.equals(null), false);
258259 }
259260
260261 /**
261262 * Test for {@link Span#equals(Object)}.
262263 */
263264 @Test
264 public void testEquals() {
265 void testEquals() {
265266 Span a1 = new Span(100, 1000, "test");
266267 Span a2 = new Span(100, 1000, "test");
267 Assert.assertTrue(a1.equals(a2));
268 Assertions.assertTrue(a1.equals(a2));
268269
269270 // end is different
270271 Span b1 = new Span(100, 100, "test");
271 Assert.assertFalse(a1.equals(b1));
272 Assertions.assertFalse(a1.equals(b1));
272273
273274 // type is different
274275 Span c1 = new Span(100, 1000, "Test");
275 Assert.assertFalse(a1.equals(c1));
276 Assertions.assertFalse(a1.equals(c1));
276277
277278 Span d1 = new Span(100, 1000);
278 Assert.assertFalse(d1.equals(a1));
279 Assert.assertFalse(a1.equals(d1));
279 Assertions.assertFalse(d1.equals(a1));
280 Assertions.assertFalse(a1.equals(d1));
280281
281282 }
282283
284285 * Test for {@link Span#toString()}.
285286 */
286287 @Test
287 public void testToString() {
288 Assert.assertEquals("[50..100)", new Span(50, 100).toString());
289 Assert.assertEquals("[50..100) myType", new Span(50, 100, "myType").toString());
290 }
291
292 @Test
293 public void testTrim() {
288 void testToString() {
289 Assertions.assertEquals("[50..100)", new Span(50, 100).toString());
290 Assertions.assertEquals("[50..100) myType", new Span(50, 100, "myType").toString());
291 }
292
293 @Test
294 void testTrim() {
294295 String string1 = " 12 34 ";
295296 Span span1 = new Span(0, string1.length());
296 Assert.assertEquals("12 34", span1.trim(string1).getCoveredText(string1));
297 }
298
299 @Test
300 public void testTrimWhitespaceSpan() {
297 Assertions.assertEquals("12 34", span1.trim(string1).getCoveredText(string1));
298 }
299
300 @Test
301 void testTrimWhitespaceSpan() {
301302 String string1 = " ";
302303 Span span1 = new Span(0, string1.length());
303 Assert.assertEquals("", span1.trim(string1).getCoveredText(string1));
304 Assertions.assertEquals("", span1.trim(string1).getCoveredText(string1));
304305 }
305306
306307 /**
307308 * Test if it fails to construct span with invalid start
308309 */
309 @Test(expected = IllegalArgumentException.class)
310 public void testTooSmallStart() throws Exception {
311 new Span(-1, 100);
310 @Test
311 void testTooSmallStart() {
312 Assertions.assertThrows(IllegalArgumentException.class, () -> {
313 new Span(-1, 100);
314 });
312315 }
313316
314317 /**
315318 * Test if it fails to construct span with invalid end
316319 */
317 @Test(expected = IllegalArgumentException.class)
318 public void testTooSmallEnd() throws Exception {
319 new Span(50, -1);
320 @Test
321 void testTooSmallEnd() {
322 Assertions.assertThrows(IllegalArgumentException.class, () -> {
323 new Span(50, -1);
324 });
320325 }
321326
322327 /**
323328 * Test if it fails to construct span with start > end
324329 */
325 @Test(expected = IllegalArgumentException.class)
326 public void testStartLargerThanEnd() throws Exception {
327 new Span(100, 50);
330 @Test
331 void testStartLargerThanEnd() {
332 Assertions.assertThrows(IllegalArgumentException.class, () -> {
333 new Span(100, 50);
334 });
328335 }
329336 }
1818
1919 import java.util.Iterator;
2020
21 import org.junit.Assert;
22 import org.junit.Test;
21 import org.junit.jupiter.api.Assertions;
22 import org.junit.jupiter.api.Test;
2323
2424 /**
2525 * Tests for the {@link StringList} class.
3030 * Tests {@link StringList} which uses {@link String#intern}.
3131 */
3232 @Test
33 public void testIntern() {
33 void testIntern() {
3434 StringList l1 = new StringList("a");
3535 StringList l2 = new StringList("a", "b");
36 Assert.assertTrue(l1.getToken(0) == l2.getToken(0));
36 Assertions.assertTrue(l1.getToken(0) == l2.getToken(0));
3737 }
3838
3939 /**
4040 * Tests {@link StringList#getToken(int)}.
4141 */
4242 @Test
43 public void testGetToken() {
43 void testGetToken() {
4444 StringList l = new StringList("a", "b");
45 Assert.assertEquals(2, l.size());
46 Assert.assertEquals("a", l.getToken(0));
47 Assert.assertEquals("b", l.getToken(1));
45 Assertions.assertEquals(2, l.size());
46 Assertions.assertEquals("a", l.getToken(0));
47 Assertions.assertEquals("b", l.getToken(1));
4848 }
4949
5050 /**
5151 * Tests {@link StringList#iterator()}.
5252 */
5353 @Test
54 public void testIterator() {
54 void testIterator() {
5555 StringList l = new StringList("a");
5656 Iterator<String> it = l.iterator();
57 Assert.assertTrue(it.hasNext());
58 Assert.assertEquals("a", it.next());
59 Assert.assertFalse(it.hasNext());
57 Assertions.assertTrue(it.hasNext());
58 Assertions.assertEquals("a", it.next());
59 Assertions.assertFalse(it.hasNext());
6060
6161 // now test with more than one string
6262 l = new StringList("a", "b", "c");
6363 it = l.iterator();
6464
65 Assert.assertTrue(it.hasNext());
66 Assert.assertEquals("a", it.next());
67 Assert.assertTrue(it.hasNext());
68 Assert.assertEquals("b", it.next());
69 Assert.assertTrue(it.hasNext());
70 Assert.assertEquals("c", it.next());
71 Assert.assertFalse(it.hasNext());
65 Assertions.assertTrue(it.hasNext());
66 Assertions.assertEquals("a", it.next());
67 Assertions.assertTrue(it.hasNext());
68 Assertions.assertEquals("b", it.next());
69 Assertions.assertTrue(it.hasNext());
70 Assertions.assertEquals("c", it.next());
71 Assertions.assertFalse(it.hasNext());
7272 }
7373
7474 /**
7575 * Tests {@link StringList#compareToIgnoreCase(StringList)}.
7676 */
7777 @Test
78 public void testCompareToIgnoreCase() {
79 Assert.assertTrue(new StringList("a", "b").compareToIgnoreCase(
78 void testCompareToIgnoreCase() {
79 Assertions.assertTrue(new StringList("a", "b").compareToIgnoreCase(
8080 new StringList("A", "B")));
8181 }
8282
8484 * Tests {@link StringList#equals(Object)}.
8585 */
8686 @Test
87 public void testEquals() {
88 Assert.assertEquals(new StringList("a", "b"),
87 void testEquals() {
88 Assertions.assertEquals(new StringList("a", "b"),
8989 new StringList("a", "b"));
9090
91 Assert.assertFalse(new StringList("a", "b").equals(
91 Assertions.assertFalse(new StringList("a", "b").equals(
9292 new StringList("A", "B")));
9393 }
9494
9696 * Tests {@link StringList#hashCode()}.
9797 */
9898 @Test
99 public void testHashCode() {
100 Assert.assertEquals(new StringList("a", "b").hashCode(),
99 void testHashCode() {
100 Assertions.assertEquals(new StringList("a", "b").hashCode(),
101101 new StringList("a", "b").hashCode());
102 Assert.assertNotEquals(new StringList("a", "b").hashCode(),
102 Assertions.assertNotEquals(new StringList("a", "b").hashCode(),
103103 new StringList("a", "c").hashCode());
104104 }
105105
107107 * Tests {@link StringList#toString()}.
108108 */
109109 @Test
110 public void testToString() {
111 Assert.assertEquals("[a]", new StringList("a").toString());
112 Assert.assertEquals("[a,b]", new StringList("a", "b").toString());
110 void testToString() {
111 Assertions.assertEquals("[a]", new StringList("a").toString());
112 Assertions.assertEquals("[a,b]", new StringList("a", "b").toString());
113113 }
114114 }
1616
1717 package opennlp.tools.util;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link StringUtil} class.
2424 */
25
2526 public class StringUtilTest {
2627
2728 @Test
28 public void testNoBreakSpace() {
29 Assert.assertTrue(StringUtil.isWhitespace(0x00A0));
30 Assert.assertTrue(StringUtil.isWhitespace(0x2007));
31 Assert.assertTrue(StringUtil.isWhitespace(0x202F));
29 void testNoBreakSpace() {
30 Assertions.assertTrue(StringUtil.isWhitespace(0x00A0));
31 Assertions.assertTrue(StringUtil.isWhitespace(0x2007));
32 Assertions.assertTrue(StringUtil.isWhitespace(0x202F));
3233
33 Assert.assertTrue(StringUtil.isWhitespace((char) 0x00A0));
34 Assert.assertTrue(StringUtil.isWhitespace((char) 0x2007));
35 Assert.assertTrue(StringUtil.isWhitespace((char) 0x202F));
34 Assertions.assertTrue(StringUtil.isWhitespace((char) 0x00A0));
35 Assertions.assertTrue(StringUtil.isWhitespace((char) 0x2007));
36 Assertions.assertTrue(StringUtil.isWhitespace((char) 0x202F));
3637 }
3738
3839 @Test
39 public void testToLowerCase() {
40 Assert.assertEquals("test", StringUtil.toLowerCase("TEST"));
41 Assert.assertEquals("simple", StringUtil.toLowerCase("SIMPLE"));
40 void testToLowerCase() {
41 Assertions.assertEquals("test", StringUtil.toLowerCase("TEST"));
42 Assertions.assertEquals("simple", StringUtil.toLowerCase("SIMPLE"));
4243 }
4344
4445 @Test
45 public void testToUpperCase() {
46 Assert.assertEquals("TEST", StringUtil.toUpperCase("test"));
47 Assert.assertEquals("SIMPLE", StringUtil.toUpperCase("simple"));
46 void testToUpperCase() {
47 Assertions.assertEquals("TEST", StringUtil.toUpperCase("test"));
48 Assertions.assertEquals("SIMPLE", StringUtil.toUpperCase("simple"));
4849 }
4950
5051 @Test
51 public void testIsEmpty() {
52 Assert.assertTrue(StringUtil.isEmpty(""));
53 Assert.assertTrue(!StringUtil.isEmpty("a"));
54 }
55
56 @Test(expected = NullPointerException.class)
57 public void testIsEmptyWithNullString() {
58 // should raise a NPE
59 StringUtil.isEmpty(null);
52 void testIsEmpty() {
53 Assertions.assertTrue(StringUtil.isEmpty(""));
54 Assertions.assertTrue(!StringUtil.isEmpty("a"));
6055 }
6156
6257 @Test
63 public void testLowercaseBeyondBMP() throws Exception {
64 int[] codePoints = new int[]{65,66578,67}; //A,Deseret capital BEE,C
65 int[] expectedCodePoints = new int[]{97,66618,99};//a,Deseret lowercase b,c
58 void testIsEmptyWithNullString() {
59 // should raise a NPE
60 Assertions.assertThrows(NullPointerException.class, () -> {
61 // should raise a NPE
62 StringUtil.isEmpty(null);
63 });
64 }
65
66 @Test
67 void testLowercaseBeyondBMP() {
68 int[] codePoints = new int[] {65, 66578, 67}; //A,Deseret capital BEE,C
69 int[] expectedCodePoints = new int[] {97, 66618, 99};//a,Deseret lowercase b,c
6670 String input = new String(codePoints, 0, codePoints.length);
6771 String lc = StringUtil.toLowerCase(input);
68 Assert.assertArrayEquals(expectedCodePoints, lc.codePoints().toArray());
72 Assertions.assertArrayEquals(expectedCodePoints, lc.codePoints().toArray());
6973 }
7074 }
2020 import java.util.HashMap;
2121 import java.util.Map;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.ml.EventTrainer;
2727
2828 public class TrainingParametersTest {
2929
3030 @Test
31 public void testConstructors() throws Exception {
31 void testConstructors() throws Exception {
3232 TrainingParameters tp1 =
3333 new TrainingParameters(build("key1=val1,key2=val2,key3=val3"));
3434
4343 }
4444
4545 @Test
46 public void testDefault() {
46 void testDefault() {
4747 TrainingParameters tr = TrainingParameters.defaultParams();
4848
49 Assert.assertEquals(4, tr.getSettings().size());
50 Assert.assertEquals("MAXENT", tr.algorithm());
51 Assert.assertEquals(EventTrainer.EVENT_VALUE,
49 Assertions.assertEquals(4, tr.getSettings().size());
50 Assertions.assertEquals("MAXENT", tr.algorithm());
51 Assertions.assertEquals(EventTrainer.EVENT_VALUE,
5252 tr.getStringParameter(TrainingParameters.TRAINER_TYPE_PARAM,
5353 "v11")); // use different defaults
54 Assert.assertEquals(100,
54 Assertions.assertEquals(100,
5555 tr.getIntParameter(TrainingParameters.ITERATIONS_PARAM,
5656 200)); // use different defaults
57 Assert.assertEquals(5,
57 Assertions.assertEquals(5,
5858 tr.getIntParameter(TrainingParameters.CUTOFF_PARAM,
5959 200)); // use different defaults
6060 }
6161
6262 @Test
63 public void testGetAlgorithm() {
63 void testGetAlgorithm() {
6464 TrainingParameters tp = build("Algorithm=Perceptron,n1.Algorithm=SVM");
6565
66 Assert.assertEquals("Perceptron", tp.algorithm());
67 Assert.assertEquals("SVM", tp.algorithm("n1"));
66 Assertions.assertEquals("Perceptron", tp.algorithm());
67 Assertions.assertEquals("SVM", tp.algorithm("n1"));
6868 }
6969
7070 @Test
71 public void testGetSettings() {
71 void testGetAlgorithmCaseInsensitive() {
72 TrainingParameters tp = build("ALGORITHM=Perceptron,n1.Algorithm=SVM");
73
74 Assertions.assertEquals("Perceptron", tp.algorithm());
75 Assertions.assertEquals("SVM", tp.algorithm("n1"));
76 }
77
78 @Test
79 void testGetSettings() {
7280 TrainingParameters tp = build("k1=v1,n1.k2=v2,n2.k3=v3,n1.k4=v4");
7381
7482 assertEquals(buildMap("k1=v1"), tp.getSettings());
7583 assertEquals(buildMap("k2=v2,k4=v4"), tp.getSettings("n1"));
7684 assertEquals(buildMap("k3=v3"), tp.getSettings("n2"));
77 Assert.assertTrue(tp.getSettings("n3").isEmpty());
85 Assertions.assertTrue(tp.getSettings("n3").isEmpty());
7886 }
7987
8088 @Test
81 public void testGetParameters() {
89 void testGetParameters() {
8290 TrainingParameters tp = build("k1=v1,n1.k2=v2,n2.k3=v3,n1.k4=v4");
8391
8492 assertEquals(build("k1=v1"), tp.getParameters(null));
8593 assertEquals(build("k2=v2,k4=v4"), tp.getParameters("n1"));
8694 assertEquals(build("k3=v3"), tp.getParameters("n2"));
87 Assert.assertTrue(tp.getParameters("n3").getSettings().isEmpty());
95 Assertions.assertTrue(tp.getParameters("n3").getSettings().isEmpty());
8896 }
8997
9098 @Test
91 public void testPutGet() {
99 void testPutGet() {
92100 TrainingParameters tp =
93101 build("k1=v1,int.k2=123,str.k2=v3,str.k3=v4,boolean.k4=false,double.k5=123.45,k21=234.5");
94102
95 Assert.assertEquals("v1", tp.getStringParameter("k1", "def"));
96 Assert.assertEquals("def", tp.getStringParameter("k2", "def"));
97 Assert.assertEquals("v3", tp.getStringParameter("str", "k2", "def"));
98 Assert.assertEquals("def", tp.getStringParameter("str", "k4", "def"));
103 Assertions.assertEquals("v1", tp.getStringParameter("k1", "def"));
104 Assertions.assertEquals("def", tp.getStringParameter("k2", "def"));
105 Assertions.assertEquals("v3", tp.getStringParameter("str", "k2", "def"));
106 Assertions.assertEquals("def", tp.getStringParameter("str", "k4", "def"));
99107
100 Assert.assertEquals(-100, tp.getIntParameter("k11", -100));
108 Assertions.assertEquals(-100, tp.getIntParameter("k11", -100));
101109 tp.put("k11", 234);
102 Assert.assertEquals(234, tp.getIntParameter("k11", -100));
103 Assert.assertEquals(123, tp.getIntParameter("int", "k2", -100));
104 Assert.assertEquals(-100, tp.getIntParameter("int", "k4", -100));
110 Assertions.assertEquals(234, tp.getIntParameter("k11", -100));
111 Assertions.assertEquals(123, tp.getIntParameter("int", "k2", -100));
112 Assertions.assertEquals(-100, tp.getIntParameter("int", "k4", -100));
105113
106 Assert.assertEquals(234.5, tp.getDoubleParameter("k21", -100), 0.001);
114 Assertions.assertEquals(tp.getDoubleParameter("k21", -100), 0.001, 234.5);
107115 tp.put("k21", 345.6);
108 Assert.assertEquals(345.6, tp.getDoubleParameter("k21", -100), 0.001); // should be changed
116 Assertions.assertEquals(tp.getDoubleParameter("k21", -100), 0.001, 345.6); // should be changed
109117 tp.putIfAbsent("k21", 456.7);
110 Assert.assertEquals(345.6, tp.getDoubleParameter("k21", -100), 0.001); // should be unchanged
111 Assert.assertEquals(123.45, tp.getDoubleParameter("double", "k5", -100), 0.001);
118 Assertions.assertEquals(tp.getDoubleParameter("k21", -100), 0.001, 345.6); // should be unchanged
119 Assertions.assertEquals(tp.getDoubleParameter("double", "k5", -100), 0.001, 123.45);
112120
113 Assert.assertEquals(true, tp.getBooleanParameter("k31", true));
121 Assertions.assertEquals(true, tp.getBooleanParameter("k31", true));
114122 tp.put("k31", false);
115 Assert.assertEquals(false, tp.getBooleanParameter("k31", true));
116 Assert.assertEquals(false, tp.getBooleanParameter("boolean", "k4", true));
123 Assertions.assertEquals(false, tp.getBooleanParameter("k31", true));
124 Assertions.assertEquals(false, tp.getBooleanParameter("boolean", "k4", true));
117125 }
118126
119127 // format: k1=v1,k2=v2,...
134142 }
135143
136144 private static void assertEquals(Map<String, String> map1, Map<String, String> map2) {
137 Assert.assertNotNull(map1);
138 Assert.assertNotNull(map2);
139 Assert.assertEquals(map1.size(), map2.size());
145 Assertions.assertNotNull(map1);
146 Assertions.assertNotNull(map2);
147 Assertions.assertEquals(map1.size(), map2.size());
140148 for (String key : map1.keySet()) {
141 Assert.assertEquals(map1.get(key), map2.get(key));
149 Assertions.assertEquals(map1.get(key), map2.get(key));
142150 }
143151 }
144152
145153 private static void assertEquals(Map<String, String> map, TrainingParameters actual) {
146 Assert.assertNotNull(actual);
154 Assertions.assertNotNull(actual);
147155 assertEquals(map, actual.getSettings());
148156 }
149157
150158 private static void assertEquals(TrainingParameters expected, TrainingParameters actual) {
151159 if (expected == null) {
152 Assert.assertNull(actual);
160 Assertions.assertNull(actual);
153161 } else {
154162 assertEquals(expected.getSettings(), actual);
155163 }
1616
1717 package opennlp.tools.util;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link Version} class.
2525 public class VersionTest {
2626
2727 @Test
28 public void testParse() {
28 void testParse() {
2929 Version referenceVersion = Version.currentVersion();
30 Assert.assertEquals(referenceVersion, Version.parse(referenceVersion.toString()));
30 Assertions.assertEquals(referenceVersion, Version.parse(referenceVersion.toString()));
3131
32 Assert.assertEquals(new Version(1,5,2, false),
32 Assertions.assertEquals(new Version(1, 5, 2, false),
3333 Version.parse("1.5.2-incubating"));
34 Assert.assertEquals(new Version(1,5,2, false),
34 Assertions.assertEquals(new Version(1, 5, 2, false),
3535 Version.parse("1.5.2"));
3636 }
3737
3838 @Test
39 public void testParseSnapshot() {
40 Assert.assertEquals(new Version(1,5,2, true),
39 void testParseSnapshot() {
40 Assertions.assertEquals(new Version(1, 5, 2, true),
4141 Version.parse("1.5.2-incubating-SNAPSHOT"));
42 Assert.assertEquals(new Version(1,5,2, true),
42 Assertions.assertEquals(new Version(1, 5, 2, true),
4343 Version.parse("1.5.2-SNAPSHOT"));
4444 }
4545
4646 @Test
47 public void testParseInvalidVersion() {
47 void testParseInvalidVersion() {
4848 try {
4949 Version.parse("1.5.");
50 }
51 catch (NumberFormatException e) {
50 } catch (NumberFormatException e) {
5251 return;
5352 }
5453
55 Assert.assertFalse(false);
54 Assertions.assertFalse(false);
5655 }
5756
5857 @Test
59 public void testParseInvalidVersion2() {
58 void testParseInvalidVersion2() {
6059 try {
6160 Version.parse("1.5");
62 }
63 catch (NumberFormatException e) {
61 } catch (NumberFormatException e) {
6462 return;
6563 }
6664
67 Assert.assertTrue(false);
65 Assertions.assertTrue(false);
6866 }
6967 }
2323 import java.util.List;
2424 import java.util.NoSuchElementException;
2525
26 import org.junit.Assert;
27 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.tools.util.ObjectStream;
3030 import opennlp.tools.util.eval.CrossValidationPartitioner.TrainingSampleStream;
3535 public class CrossValidationPartitionerTest {
3636
3737 @Test
38 public void testEmptyDataSet() throws IOException {
38 void testEmptyDataSet() throws IOException {
3939 Collection<String> emptyCollection = Collections.emptySet();
4040
4141 CrossValidationPartitioner<String> partitioner =
4242 new CrossValidationPartitioner<>(emptyCollection, 2);
4343
44 Assert.assertTrue(partitioner.hasNext());
45 Assert.assertNull(partitioner.next().read());
46
47 Assert.assertTrue(partitioner.hasNext());
48 Assert.assertNull(partitioner.next().read());
49
50 Assert.assertFalse(partitioner.hasNext());
44 Assertions.assertTrue(partitioner.hasNext());
45 Assertions.assertNull(partitioner.next().read());
46
47 Assertions.assertTrue(partitioner.hasNext());
48 Assertions.assertNull(partitioner.next().read());
49
50 Assertions.assertFalse(partitioner.hasNext());
5151
5252 try {
5353 // Should throw NoSuchElementException
5454 partitioner.next();
5555
5656 // ups, hasn't thrown one
57 Assert.fail();
58 }
59 catch (NoSuchElementException e) {
57 Assertions.fail();
58 } catch (NoSuchElementException e) {
6059 // expected
6160 }
6261 }
6564 * Test 3-fold cross validation on a small sample data set.
6665 */
6766 @Test
68 public void test3FoldCV() throws IOException {
67 void test3FoldCV() throws IOException {
6968 List<String> data = new LinkedList<>();
7069 data.add("01");
7170 data.add("02");
8180 CrossValidationPartitioner<String> partitioner = new CrossValidationPartitioner<>(data, 3);
8281
8382 // first partition
84 Assert.assertTrue(partitioner.hasNext());
83 Assertions.assertTrue(partitioner.hasNext());
8584 TrainingSampleStream<String> firstTraining = partitioner.next();
8685
87 Assert.assertEquals("02", firstTraining.read());
88 Assert.assertEquals("03", firstTraining.read());
89 Assert.assertEquals("05", firstTraining.read());
90 Assert.assertEquals("06", firstTraining.read());
91 Assert.assertEquals("08", firstTraining.read());
92 Assert.assertEquals("09", firstTraining.read());
93 Assert.assertNull(firstTraining.read());
86 Assertions.assertEquals("02", firstTraining.read());
87 Assertions.assertEquals("03", firstTraining.read());
88 Assertions.assertEquals("05", firstTraining.read());
89 Assertions.assertEquals("06", firstTraining.read());
90 Assertions.assertEquals("08", firstTraining.read());
91 Assertions.assertEquals("09", firstTraining.read());
92 Assertions.assertNull(firstTraining.read());
9493
9594 ObjectStream<String> firstTest = firstTraining.getTestSampleStream();
9695
97 Assert.assertEquals("01", firstTest.read());
98 Assert.assertEquals("04", firstTest.read());
99 Assert.assertEquals("07", firstTest.read());
100 Assert.assertEquals("10", firstTest.read());
101 Assert.assertNull(firstTest.read());
96 Assertions.assertEquals("01", firstTest.read());
97 Assertions.assertEquals("04", firstTest.read());
98 Assertions.assertEquals("07", firstTest.read());
99 Assertions.assertEquals("10", firstTest.read());
100 Assertions.assertNull(firstTest.read());
102101
103102 // second partition
104 Assert.assertTrue(partitioner.hasNext());
103 Assertions.assertTrue(partitioner.hasNext());
105104 TrainingSampleStream<String> secondTraining = partitioner.next();
106105
107 Assert.assertEquals("01", secondTraining.read());
108 Assert.assertEquals("03", secondTraining.read());
109 Assert.assertEquals("04", secondTraining.read());
110 Assert.assertEquals("06", secondTraining.read());
111 Assert.assertEquals("07", secondTraining.read());
112 Assert.assertEquals("09", secondTraining.read());
113 Assert.assertEquals("10", secondTraining.read());
114
115 Assert.assertNull(secondTraining.read());
106 Assertions.assertEquals("01", secondTraining.read());
107 Assertions.assertEquals("03", secondTraining.read());
108 Assertions.assertEquals("04", secondTraining.read());
109 Assertions.assertEquals("06", secondTraining.read());
110 Assertions.assertEquals("07", secondTraining.read());
111 Assertions.assertEquals("09", secondTraining.read());
112 Assertions.assertEquals("10", secondTraining.read());
113
114 Assertions.assertNull(secondTraining.read());
116115
117116 ObjectStream<String> secondTest = secondTraining.getTestSampleStream();
118117
119 Assert.assertEquals("02", secondTest.read());
120 Assert.assertEquals("05", secondTest.read());
121 Assert.assertEquals("08", secondTest.read());
122 Assert.assertNull(secondTest.read());
118 Assertions.assertEquals("02", secondTest.read());
119 Assertions.assertEquals("05", secondTest.read());
120 Assertions.assertEquals("08", secondTest.read());
121 Assertions.assertNull(secondTest.read());
123122
124123 // third partition
125 Assert.assertTrue(partitioner.hasNext());
124 Assertions.assertTrue(partitioner.hasNext());
126125 TrainingSampleStream<String> thirdTraining = partitioner.next();
127126
128 Assert.assertEquals("01", thirdTraining.read());
129 Assert.assertEquals("02", thirdTraining.read());
130 Assert.assertEquals("04", thirdTraining.read());
131 Assert.assertEquals("05", thirdTraining.read());
132 Assert.assertEquals("07", thirdTraining.read());
133 Assert.assertEquals("08", thirdTraining.read());
134 Assert.assertEquals("10", thirdTraining.read());
135 Assert.assertNull(thirdTraining.read());
127 Assertions.assertEquals("01", thirdTraining.read());
128 Assertions.assertEquals("02", thirdTraining.read());
129 Assertions.assertEquals("04", thirdTraining.read());
130 Assertions.assertEquals("05", thirdTraining.read());
131 Assertions.assertEquals("07", thirdTraining.read());
132 Assertions.assertEquals("08", thirdTraining.read());
133 Assertions.assertEquals("10", thirdTraining.read());
134 Assertions.assertNull(thirdTraining.read());
136135
137136 ObjectStream<String> thirdTest = thirdTraining.getTestSampleStream();
138137
139 Assert.assertEquals("03", thirdTest.read());
140 Assert.assertEquals("06", thirdTest.read());
141 Assert.assertEquals("09", thirdTest.read());
142 Assert.assertNull(thirdTest.read());
143
144 Assert.assertFalse(partitioner.hasNext());
145 }
146
147 @Test
148 public void testFailSafty() throws IOException {
138 Assertions.assertEquals("03", thirdTest.read());
139 Assertions.assertEquals("06", thirdTest.read());
140 Assertions.assertEquals("09", thirdTest.read());
141 Assertions.assertNull(thirdTest.read());
142
143 Assertions.assertFalse(partitioner.hasNext());
144 }
145
146 @Test
147 void testFailSafty() throws IOException {
149148 List<String> data = new LinkedList<>();
150149 data.add("01");
151150 data.add("02");
158157 // Test that iterator from previous partition fails
159158 // if it is accessed
160159 TrainingSampleStream<String> firstTraining = partitioner.next();
161 Assert.assertEquals("02", firstTraining.read());
160 Assertions.assertEquals("02", firstTraining.read());
162161
163162 TrainingSampleStream<String> secondTraining = partitioner.next();
164163
165164 try {
166165 firstTraining.read();
167 Assert.fail();
168 }
169 catch (IllegalStateException expected) {
166 Assertions.fail();
167 } catch (IllegalStateException expected) {
170168 // the read above is expected to throw an exception
171169 }
172170
173171 try {
174172 firstTraining.getTestSampleStream();
175 Assert.fail();
176 }
177 catch (IllegalStateException expected) {
173 Assertions.fail();
174 } catch (IllegalStateException expected) {
178175 // the read above is expected to throw an exception
179176 }
180177
183180
184181 try {
185182 secondTraining.read();
186 Assert.fail();
187 }
188 catch (IllegalStateException expected) {
183 Assertions.fail();
184 } catch (IllegalStateException expected) {
189185 // the read above is expected to throw an exception
190186 }
191187
194190 TrainingSampleStream<String> thirdTraining = partitioner.next();
195191 ObjectStream<String> thridTest = thirdTraining.getTestSampleStream();
196192
197 Assert.assertTrue(partitioner.hasNext());
193 Assertions.assertTrue(partitioner.hasNext());
198194 partitioner.next();
199195
200196 try {
201197 thridTest.read();
202 Assert.fail();
203 }
204 catch (IllegalStateException expected) {
205 // the read above is expected to throw an exception
206 }
207 }
208
209 @Test
210 public void testToString() {
198 Assertions.fail();
199 } catch (IllegalStateException expected) {
200 // the read above is expected to throw an exception
201 }
202 }
203
204 @Test
205 void testToString() {
211206 Collection<String> emptyCollection = Collections.emptySet();
212207 new CrossValidationPartitioner<>(emptyCollection, 10).toString();
213208 }
1616
1717 package opennlp.tools.util.eval;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 import opennlp.tools.util.Span;
2323
7878 */
7979 @Test
8080 public void testCountTruePositives() {
81 Assert.assertEquals(0, FMeasure.countTruePositives(new Object[] {}, new Object[] {}));
82 Assert.assertEquals(gold.length, FMeasure.countTruePositives(gold, gold));
83 Assert.assertEquals(0, FMeasure.countTruePositives(gold, predictedCompletelyDistinct));
84 Assert.assertEquals(2, FMeasure.countTruePositives(gold, predicted));
81 Assertions.assertEquals(0, FMeasure.countTruePositives(new Object[] {}, new Object[] {}));
82 Assertions.assertEquals(gold.length, FMeasure.countTruePositives(gold, gold));
83 Assertions.assertEquals(0, FMeasure.countTruePositives(gold, predictedCompletelyDistinct));
84 Assertions.assertEquals(2, FMeasure.countTruePositives(gold, predicted));
8585 }
8686
8787 /**
8989 */
9090 @Test
9191 public void testPrecision() {
92 Assert.assertEquals(1.0d, FMeasure.precision(gold, gold), DELTA);
93 Assert.assertEquals(0, FMeasure.precision(gold, predictedCompletelyDistinct), DELTA);
94 Assert.assertEquals(Double.NaN, FMeasure.precision(gold, new Object[] {}), DELTA);
95 Assert.assertEquals(0, FMeasure.precision(new Object[] {}, gold), DELTA);
96 Assert.assertEquals(2d / predicted.length, FMeasure.precision(gold, predicted), DELTA);
92 Assertions.assertEquals(1.0d, FMeasure.precision(gold, gold), DELTA);
93 Assertions.assertEquals(0, FMeasure.precision(gold, predictedCompletelyDistinct), DELTA);
94 Assertions.assertEquals(Double.NaN, FMeasure.precision(gold, new Object[] {}), DELTA);
95 Assertions.assertEquals(0, FMeasure.precision(new Object[] {}, gold), DELTA);
96 Assertions.assertEquals(2d / predicted.length, FMeasure.precision(gold, predicted), DELTA);
9797 }
9898
9999 /**
101101 */
102102 @Test
103103 public void testRecall() {
104 Assert.assertEquals(1.0d, FMeasure.recall(gold, gold), DELTA);
105 Assert.assertEquals(0, FMeasure.recall(gold, predictedCompletelyDistinct), DELTA);
106 Assert.assertEquals(0, FMeasure.recall(gold, new Object[] {}), DELTA);
107 Assert.assertEquals(Double.NaN, FMeasure.recall(new Object[] {}, gold), DELTA);
108 Assert.assertEquals(2d / gold.length, FMeasure.recall(gold, predicted), DELTA);
104 Assertions.assertEquals(1.0d, FMeasure.recall(gold, gold), DELTA);
105 Assertions.assertEquals(0, FMeasure.recall(gold, predictedCompletelyDistinct), DELTA);
106 Assertions.assertEquals(0, FMeasure.recall(gold, new Object[] {}), DELTA);
107 Assertions.assertEquals(Double.NaN, FMeasure.recall(new Object[] {}, gold), DELTA);
108 Assertions.assertEquals(2d / gold.length, FMeasure.recall(gold, predicted), DELTA);
109109 }
110110
111111 @Test
112112 public void testEmpty() {
113113 FMeasure fm = new FMeasure();
114 Assert.assertEquals(-1, fm.getFMeasure(), DELTA);
115 Assert.assertEquals(0, fm.getRecallScore(), DELTA);
116 Assert.assertEquals(0, fm.getPrecisionScore(), DELTA);
114 Assertions.assertEquals(-1, fm.getFMeasure(), DELTA);
115 Assertions.assertEquals(0, fm.getRecallScore(), DELTA);
116 Assertions.assertEquals(0, fm.getPrecisionScore(), DELTA);
117117 }
118118
119119 @Test
120120 public void testPerfect() {
121121 FMeasure fm = new FMeasure();
122122 fm.updateScores(gold, gold);
123 Assert.assertEquals(1, fm.getFMeasure(), DELTA);
124 Assert.assertEquals(1, fm.getRecallScore(), DELTA);
125 Assert.assertEquals(1, fm.getPrecisionScore(), DELTA);
123 Assertions.assertEquals(1, fm.getFMeasure(), DELTA);
124 Assertions.assertEquals(1, fm.getRecallScore(), DELTA);
125 Assertions.assertEquals(1, fm.getPrecisionScore(), DELTA);
126126 }
127127
128128 @Test
146146 double tp2 = FMeasure.countTruePositives(goldToMerge, predictedToMerge);
147147
148148
149 Assert.assertEquals((tp1 + tp2) / (target1 + target2), fm.getRecallScore(), DELTA);
150 Assert.assertEquals((tp1 + tp2) / (selected1 + selected2), fm.getPrecisionScore(), DELTA);
149 Assertions.assertEquals((tp1 + tp2) / (target1 + target2), fm.getRecallScore(), DELTA);
150 Assertions.assertEquals((tp1 + tp2) / (selected1 + selected2), fm.getPrecisionScore(), DELTA);
151151
152 Assert.assertEquals(fm.getRecallScore(), fmMerge.getRecallScore(), DELTA);
153 Assert.assertEquals(fm.getPrecisionScore(), fmMerge.getPrecisionScore(), DELTA);
152 Assertions.assertEquals(fm.getRecallScore(), fmMerge.getRecallScore(), DELTA);
153 Assertions.assertEquals(fm.getPrecisionScore(), fmMerge.getPrecisionScore(), DELTA);
154154 }
155155 }
1616
1717 package opennlp.tools.util.eval;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 /**
2323 * Tests for the {@link Mean} class.
2828 public void testMeanCalculation() {
2929 Mean a = new Mean();
3030 a.add(1);
31 Assert.assertEquals(1, a.count());
32 Assert.assertEquals(1d, a.mean(), 0.00001d);
31 Assertions.assertEquals(1, a.count());
32 Assertions.assertEquals(1d, a.mean(), 0.00001d);
3333
3434 a.add(1);
35 Assert.assertEquals(2, a.count());
36 Assert.assertEquals(1d, a.mean(), 0.00001d);
35 Assertions.assertEquals(2, a.count());
36 Assertions.assertEquals(1d, a.mean(), 0.00001d);
3737 a.toString();
3838
3939 Mean b = new Mean();
4040 b.add(0.5);
41 Assert.assertEquals(1, b.count());
42 Assert.assertEquals(0.5d, b.mean(), 0.00001d);
41 Assertions.assertEquals(1, b.count());
42 Assertions.assertEquals(0.5d, b.mean(), 0.00001d);
4343
4444 b.add(2);
45 Assert.assertEquals(2, b.count());
46 Assert.assertEquals(1.25d, b.mean(), 0.00001d);
45 Assertions.assertEquals(2, b.count());
46 Assertions.assertEquals(1.25d, b.mean(), 0.00001d);
4747 b.toString();
4848
4949 Mean c = new Mean();
50 Assert.assertEquals(0, c.count());
51 Assert.assertEquals(0d, c.mean(), 0.00001d);
50 Assertions.assertEquals(0, c.count());
51 Assertions.assertEquals(0d, c.mean(), 0.00001d);
5252 c.toString();
5353 }
5454
1616
1717 package opennlp.tools.util.ext;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 public class ExtensionLoaderTest {
2323
2626 String generateTestString();
2727 }
2828
29 static class TestStringGeneratorImpl implements TestStringGenerator {
29 static public class TestStringGeneratorImpl implements TestStringGenerator {
3030 public String generateTestString() {
3131 return "test";
3232 }
3333 }
3434
3535 @Test
36 public void testLoadingStringGenerator() throws ClassNotFoundException {
36 void testLoadingStringGenerator() {
3737 TestStringGenerator g = ExtensionLoader.instantiateExtension(TestStringGenerator.class,
3838 TestStringGeneratorImpl.class.getName());
39 Assert.assertEquals("test", g.generateTestString());
39 Assertions.assertEquals("test", g.generateTestString());
4040 }
4141
4242 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class BigramNameFeatureGeneratorTest {
2727
2828 private List<String> features;
2929 static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3030
31 @Before
32 public void setUp() throws Exception {
31 @BeforeEach
32 void setUp() {
3333 features = new ArrayList<>();
3434 }
3535
3636 @Test
37 public void testBegin() {
37 void testBegin() {
3838
3939 final int testTokenIndex = 0;
4040
4242
4343 generator.createFeatures(features, testSentence, testTokenIndex, null);
4444
45 Assert.assertEquals(2, features.size());
46 Assert.assertEquals("w,nw=This,is", features.get(0));
47 Assert.assertEquals("wc,nc=ic,lc", features.get(1));
45 Assertions.assertEquals(2, features.size());
46 Assertions.assertEquals("w,nw=This,is", features.get(0));
47 Assertions.assertEquals("wc,nc=ic,lc", features.get(1));
4848 }
4949
5050 @Test
51 public void testMiddle() {
51 void testMiddle() {
5252
5353 final int testTokenIndex = 2;
5454
5656
5757 generator.createFeatures(features, testSentence, testTokenIndex, null);
5858
59 Assert.assertEquals(4, features.size());
60 Assert.assertEquals("pw,w=is,an", features.get(0));
61 Assert.assertEquals("pwc,wc=lc,lc", features.get(1));
62 Assert.assertEquals("w,nw=an,example", features.get(2));
63 Assert.assertEquals("wc,nc=lc,lc", features.get(3));
59 Assertions.assertEquals(4, features.size());
60 Assertions.assertEquals("pw,w=is,an", features.get(0));
61 Assertions.assertEquals("pwc,wc=lc,lc", features.get(1));
62 Assertions.assertEquals("w,nw=an,example", features.get(2));
63 Assertions.assertEquals("wc,nc=lc,lc", features.get(3));
6464 }
6565
6666 @Test
67 public void testEnd() {
67 void testEnd() {
6868
6969 final int testTokenIndex = 4;
7070
7272
7373 generator.createFeatures(features, testSentence, testTokenIndex, null);
7474
75 Assert.assertEquals(2, features.size());
76 Assert.assertEquals("pw,w=example,sentence", features.get(0));
77 Assert.assertEquals("pwc,wc=lc,lc", features.get(1));
75 Assertions.assertEquals(2, features.size());
76 Assertions.assertEquals("pw,w=example,sentence", features.get(0));
77 Assertions.assertEquals("pwc,wc=lc,lc", features.get(1));
7878 }
7979
8080 @Test
81 public void testShort() {
81 void testShort() {
8282
8383 String[] shortSentence = new String[] {"word"};
8484
8888
8989 generator.createFeatures(features, shortSentence, testTokenIndex, null);
9090
91 Assert.assertEquals(0, features.size());
91 Assertions.assertEquals(0, features.size());
9292 }
9393 }
2020 import java.util.ArrayList;
2121 import java.util.List;
2222
23 import org.junit.Assert;
24 import org.junit.Before;
25 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.BeforeEach;
25 import org.junit.jupiter.api.Test;
2626
2727 import opennlp.tools.formats.ResourceAsStreamFactory;
2828
2929 public class BrownBigramFeatureGeneratorTest {
3030
3131 private AdaptiveFeatureGenerator generator;
32
33 @Before
34 public void setup() throws IOException {
32
33 @BeforeEach
34 void setup() throws IOException {
3535
3636 ResourceAsStreamFactory stream = new ResourceAsStreamFactory(
3737 getClass(), "/opennlp/tools/formats/brown-cluster.txt");
3838
39 BrownCluster brownCluster = new BrownCluster(stream.createInputStream());
40
39 BrownCluster brownCluster = new BrownCluster(stream.createInputStream());
40
4141 generator = new BrownBigramFeatureGenerator(brownCluster);
4242
4343 }
4444
4545 @Test
46 public void createFeaturesTest() throws IOException {
46 void createFeaturesTest() {
4747
4848 String[] tokens = new String[] {"he", "went", "with", "you"};
4949
5050 List<String> features = new ArrayList<>();
5151 generator.createFeatures(features, tokens, 3, null);
5252
53 Assert.assertEquals(2, features.size());
54 Assert.assertTrue(features.contains("pbrowncluster,browncluster=0101,0010"));
55 Assert.assertTrue(features.contains("pbrowncluster,browncluster=01010,00101"));
56
53 Assertions.assertEquals(2, features.size());
54 Assertions.assertTrue(features.contains("pbrowncluster,browncluster=0101,0010"));
55 Assertions.assertTrue(features.contains("pbrowncluster,browncluster=01010,00101"));
56
5757 }
58
58
5959 @Test
60 public void createFeaturesSuccessiveTokensTest() throws IOException {
60 void createFeaturesSuccessiveTokensTest() {
6161
6262 final String[] testSentence = new String[] {"he", "went", "with", "you", "in", "town"};
6363
6464 List<String> features = new ArrayList<>();
6565 generator.createFeatures(features, testSentence, 3, null);
6666
67 Assert.assertEquals(3, features.size());
68 Assert.assertTrue(features.contains("pbrowncluster,browncluster=0101,0010"));
69 Assert.assertTrue(features.contains("pbrowncluster,browncluster=01010,00101"));
70 Assert.assertTrue(features.contains("browncluster,nbrowncluster=0010,0000"));
71
67 Assertions.assertEquals(3, features.size());
68 Assertions.assertTrue(features.contains("pbrowncluster,browncluster=0101,0010"));
69 Assertions.assertTrue(features.contains("pbrowncluster,browncluster=01010,00101"));
70 Assertions.assertTrue(features.contains("browncluster,nbrowncluster=0010,0000"));
71
7272 }
73
73
7474 @Test
75 public void noFeaturesTest() throws IOException {
75 void noFeaturesTest() {
7676
7777 final String[] testSentence = new String[] {"he", "went", "with", "you"};
7878
7979 List<String> features = new ArrayList<>();
8080 generator.createFeatures(features, testSentence, 0, null);
8181
82 Assert.assertEquals(0, features.size());
83
82 Assertions.assertEquals(0, features.size());
83
8484 }
8585
8686 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 /**
2727 * Test for the {@link CachedFeatureGenerator} class.
3737
3838 private List<String> features;
3939
40 @Before
41 public void setUp() throws Exception {
40 @BeforeEach
41 void setUp() {
4242
4343 testSentence1 = new String[] {"a1", "b1", "c1", "d1"};
4444
5151 * Tests if cache works for one sentence and two different token indexes.
5252 */
5353 @Test
54 public void testCachingOfSentence() {
54 void testCachingOfSentence() {
5555 CachedFeatureGenerator generator = new CachedFeatureGenerator(identityGenerator);
5656
5757 int testIndex = 0;
5959 // after this call features are cached for testIndex
6060 generator.createFeatures(features, testSentence1, testIndex, null);
6161
62 Assert.assertEquals(1, generator.getNumberOfCacheMisses());
63 Assert.assertEquals(0, generator.getNumberOfCacheHits());
62 Assertions.assertEquals(1, generator.getNumberOfCacheMisses());
63 Assertions.assertEquals(0, generator.getNumberOfCacheHits());
6464
65 Assert.assertTrue(features.contains(testSentence1[testIndex]));
65 Assertions.assertTrue(features.contains(testSentence1[testIndex]));
6666
6767 features.clear();
6868
7474
7575 generator.createFeatures(features, testSentence1, testIndex, null);
7676
77 Assert.assertEquals(1, generator.getNumberOfCacheMisses());
78 Assert.assertEquals(1, generator.getNumberOfCacheHits());
77 Assertions.assertEquals(1, generator.getNumberOfCacheMisses());
78 Assertions.assertEquals(1, generator.getNumberOfCacheHits());
7979
80 Assert.assertTrue(features.contains(expectedToken));
81 Assert.assertEquals(1, features.size());
80 Assertions.assertTrue(features.contains(expectedToken));
81 Assertions.assertEquals(1, features.size());
8282
8383 features.clear();
8484
8888
8989 generator.createFeatures(features, testSentence1, testIndex2, null);
9090
91 Assert.assertEquals(2, generator.getNumberOfCacheMisses());
92 Assert.assertEquals(1, generator.getNumberOfCacheHits());
93 Assert.assertTrue(features.contains(testSentence1[testIndex2]));
91 Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
92 Assertions.assertEquals(1, generator.getNumberOfCacheHits());
93 Assertions.assertTrue(features.contains(testSentence1[testIndex2]));
9494
9595 features.clear();
9696
9898
9999 generator.createFeatures(features, testSentence1, testIndex, null);
100100
101 Assert.assertTrue(features.contains(expectedToken));
101 Assertions.assertTrue(features.contains(expectedToken));
102102 }
103103
104104 /**
105105 * Tests if the cache was cleared after the sentence changed.
106106 */
107107 @Test
108 public void testCacheClearAfterSentenceChange() {
108 void testCacheClearAfterSentenceChange() {
109109 CachedFeatureGenerator generator = new CachedFeatureGenerator(identityGenerator);
110110
111111 int testIndex = 0;
118118 // use another sentence but same index
119119 generator.createFeatures(features, testSentence2, testIndex, null);
120120
121 Assert.assertEquals(2, generator.getNumberOfCacheMisses());
122 Assert.assertEquals(0, generator.getNumberOfCacheHits());
121 Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
122 Assertions.assertEquals(0, generator.getNumberOfCacheHits());
123123
124 Assert.assertTrue(features.contains(testSentence2[testIndex]));
125 Assert.assertEquals(1, features.size());
124 Assertions.assertTrue(features.contains(testSentence2[testIndex]));
125 Assertions.assertEquals(1, features.size());
126126
127127 features.clear();
128128
133133
134134 generator.createFeatures(features, testSentence2, testIndex, null);
135135
136 Assert.assertTrue(features.contains(expectedToken));
137 Assert.assertEquals(1, features.size());
136 Assertions.assertTrue(features.contains(expectedToken));
137 Assertions.assertEquals(1, features.size());
138138 }
139139 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class CharacterNgramFeatureGeneratorTest {
2727
2828 private List<String> features;
2929 static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3030
31 @Before
32 public void setUp() throws Exception {
31 @BeforeEach
32 void setUp() {
3333 features = new ArrayList<>();
3434 }
3535
3636 @Test
37 public void testDefault() {
37 void testDefault() {
3838
3939 final int testTokenIndex = 3;
4040
4343 generator.createFeatures(features, testSentence, testTokenIndex, null);
4444
4545 assertContainsNg(features,
46 "ex", "exa", "exam", "examp",
47 "xa", "xam", "xamp", "xampl",
48 "am", "amp", "ampl", "ample",
49 "mp", "mpl", "mple",
50 "pl", "ple",
51 "le");
46 "ex", "exa", "exam", "examp",
47 "xa", "xam", "xamp", "xampl",
48 "am", "amp", "ampl", "ample",
49 "mp", "mpl", "mple",
50 "pl", "ple",
51 "le");
5252 }
5353
5454 private static void assertContainsNg(List<String> features, String... elements) {
55 Assert.assertEquals(elements.length, features.size());
56 for (String e: elements) {
57 Assert.assertTrue(features.contains("ng=" + e));
55 Assertions.assertEquals(elements.length, features.size());
56 for (String e : elements) {
57 Assertions.assertTrue(features.contains("ng=" + e));
5858 }
5959 }
6060 }
2121 import java.util.List;
2222 import java.util.Map;
2323
24 import opennlp.tools.util.InvalidFormatException;
2524 import opennlp.tools.util.model.ArtifactSerializer;
2625
2726 @Deprecated // TODO: (OPENNLP-1174) remove back-compat support when it is unnecessary
5049
5150 @Override
5251 public void init(Map<String, String> properties,
53 FeatureGeneratorResourceProvider resourceProvider)
54 throws InvalidFormatException {
52 FeatureGeneratorResourceProvider resourceProvider) {
5553 }
5654 }
1616
1717 package opennlp.tools.util.featuregen;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 public class FeatureGeneratorUtilTest {
2323
2424 @Test
25 public void test() {
25 void test() {
2626 // digits
27 Assert.assertEquals("2d", FeatureGeneratorUtil.tokenFeature("12"));
28 Assert.assertEquals("4d", FeatureGeneratorUtil.tokenFeature("1234"));
29 Assert.assertEquals("an", FeatureGeneratorUtil.tokenFeature("abcd234"));
30 Assert.assertEquals("dd", FeatureGeneratorUtil.tokenFeature("1234-56"));
31 Assert.assertEquals("ds", FeatureGeneratorUtil.tokenFeature("4/6/2017"));
32 Assert.assertEquals("dc", FeatureGeneratorUtil.tokenFeature("1,234,567"));
33 Assert.assertEquals("dp", FeatureGeneratorUtil.tokenFeature("12.34567"));
34 Assert.assertEquals("num", FeatureGeneratorUtil.tokenFeature("123(456)7890"));
27 Assertions.assertEquals("2d", FeatureGeneratorUtil.tokenFeature("12"));
28 Assertions.assertEquals("4d", FeatureGeneratorUtil.tokenFeature("1234"));
29 Assertions.assertEquals("an", FeatureGeneratorUtil.tokenFeature("abcd234"));
30 Assertions.assertEquals("dd", FeatureGeneratorUtil.tokenFeature("1234-56"));
31 Assertions.assertEquals("ds", FeatureGeneratorUtil.tokenFeature("4/6/2017"));
32 Assertions.assertEquals("dc", FeatureGeneratorUtil.tokenFeature("1,234,567"));
33 Assertions.assertEquals("dp", FeatureGeneratorUtil.tokenFeature("12.34567"));
34 Assertions.assertEquals("num", FeatureGeneratorUtil.tokenFeature("123(456)7890"));
3535
3636 // letters
37 Assert.assertEquals("lc", FeatureGeneratorUtil.tokenFeature("opennlp"));
38 Assert.assertEquals("sc", FeatureGeneratorUtil.tokenFeature("O"));
39 Assert.assertEquals("ac", FeatureGeneratorUtil.tokenFeature("OPENNLP"));
40 Assert.assertEquals("cp", FeatureGeneratorUtil.tokenFeature("A."));
41 Assert.assertEquals("ic", FeatureGeneratorUtil.tokenFeature("Mike"));
42 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("somethingStupid"));
37 Assertions.assertEquals("lc", FeatureGeneratorUtil.tokenFeature("opennlp"));
38 Assertions.assertEquals("sc", FeatureGeneratorUtil.tokenFeature("O"));
39 Assertions.assertEquals("ac", FeatureGeneratorUtil.tokenFeature("OPENNLP"));
40 Assertions.assertEquals("cp", FeatureGeneratorUtil.tokenFeature("A."));
41 Assertions.assertEquals("ic", FeatureGeneratorUtil.tokenFeature("Mike"));
42 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("somethingStupid"));
4343
4444 // symbols
45 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature(","));
46 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("."));
47 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("?"));
48 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("!"));
49 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("#"));
50 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("%"));
51 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("&"));
45 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature(","));
46 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("."));
47 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("?"));
48 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("!"));
49 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("#"));
50 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("%"));
51 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("&"));
5252 }
5353
5454 @Test
55 public void testJapanese() {
55 void testJapanese() {
5656 // Hiragana
57 Assert.assertEquals("jah", FeatureGeneratorUtil.tokenFeature("そういえば"));
58 Assert.assertEquals("jah", FeatureGeneratorUtil.tokenFeature("おーぷん・そ〜す・そふとうぇあ"));
59 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("あぱっち・そふとうぇあ財団"));
57 Assertions.assertEquals("jah", FeatureGeneratorUtil.tokenFeature("そういえば"));
58 Assertions.assertEquals("jah", FeatureGeneratorUtil.tokenFeature("おーぷん・そ〜す・そふとうぇあ"));
59 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("あぱっち・そふとうぇあ財団"));
6060
6161 // Katakana
62 Assert.assertEquals("jak", FeatureGeneratorUtil.tokenFeature("ジャパン"));
63 Assert.assertEquals("jak", FeatureGeneratorUtil.tokenFeature("オープン・ソ〜ス・ソフトウェア"));
64 Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("アパッチ・ソフトウェア財団"));
62 Assertions.assertEquals("jak", FeatureGeneratorUtil.tokenFeature("ジャパン"));
63 Assertions.assertEquals("jak", FeatureGeneratorUtil.tokenFeature("オープン・ソ〜ス・ソフトウェア"));
64 Assertions.assertEquals("other", FeatureGeneratorUtil.tokenFeature("アパッチ・ソフトウェア財団"));
6565 }
6666 }
2222 import java.util.Collection;
2323 import java.util.Map;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 import opennlp.tools.util.InvalidFormatException;
2929 import opennlp.tools.util.featuregen.WordClusterDictionary.WordClusterDictionarySerializer;
3434 public class GeneratorFactoryClassicFormatTest {
3535
3636 @Test
37 public void testCreationWithTokenClassFeatureGenerator() throws Exception {
37 void testCreationWithTokenClassFeatureGenerator() throws Exception {
3838 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
3939 "/opennlp/tools/util/featuregen/TestTokenClassFeatureGeneratorConfig_classic.xml");
4040
4141 // If this fails the generator descriptor could not be found
4242 // at the expected location
43 Assert.assertNotNull(generatorDescriptorIn);
43 Assertions.assertNotNull(generatorDescriptorIn);
4444
4545 AggregatedFeatureGenerator aggregatedGenerator =
4646 (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null);
4747
48 Assert.assertEquals(1, aggregatedGenerator.getGenerators().size());
49 Assert.assertEquals(TokenClassFeatureGenerator.class.getName(),
48 Assertions.assertEquals(1, aggregatedGenerator.getGenerators().size());
49 Assertions.assertEquals(TokenClassFeatureGenerator.class.getName(),
5050 aggregatedGenerator.getGenerators().iterator().next().getClass().getName());
5151
5252 }
5353
5454 @Test
55 public void testCreationWihtSimpleDescriptor() throws Exception {
55 void testCreationWihtSimpleDescriptor() throws Exception {
5656 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
5757 "/opennlp/tools/util/featuregen/TestFeatureGeneratorConfig_classic.xml");
5858
5959 // If this fails the generator descriptor could not be found
6060 // at the expected location
61 Assert.assertNotNull(generatorDescriptorIn);
61 Assertions.assertNotNull(generatorDescriptorIn);
6262
6363 Collection<String> expectedGenerators = new ArrayList<>();
6464 expectedGenerators.add(OutcomePriorFeatureGenerator.class.getName());
6565
6666 AggregatedFeatureGenerator aggregatedGenerator =
6767 (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null);
68
6968
7069
7170 for (AdaptiveFeatureGenerator generator : aggregatedGenerator.getGenerators()) {
7776
7877 // If this fails not all expected generators were found and
7978 // removed from the expected generators collection
80 Assert.assertEquals(0, expectedGenerators.size());
79 Assertions.assertEquals(0, expectedGenerators.size());
8180 }
8281
8382 @Test
84 public void testCreationWithCustomGenerator() throws Exception {
83 void testCreationWithCustomGenerator() throws Exception {
8584 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
8685 "/opennlp/tools/util/featuregen/CustomClassLoading_classic.xml");
8786
8887 // If this fails the generator descriptor could not be found
8988 // at the expected location
90 Assert.assertNotNull(generatorDescriptorIn);
89 Assertions.assertNotNull(generatorDescriptorIn);
9190
9291 AggregatedFeatureGenerator aggregatedGenerator =
9392 (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null);
9493
9594 Collection<AdaptiveFeatureGenerator> embeddedGenerator = aggregatedGenerator.getGenerators();
9695
97 Assert.assertEquals(1, embeddedGenerator.size());
96 Assertions.assertEquals(1, embeddedGenerator.size());
9897
9998 for (AdaptiveFeatureGenerator generator : embeddedGenerator) {
100 Assert.assertEquals(TokenFeatureGenerator.class.getName(), generator.getClass().getName());
99 Assertions.assertEquals(TokenFeatureGenerator.class.getName(), generator.getClass().getName());
101100 }
102101 }
103102
105104 * Tests the creation from a descriptor which contains an unkown element.
106105 * The creation should fail with an {@link InvalidFormatException}
107106 */
108 @Test(expected = IOException.class)
109 public void testCreationWithUnkownElement() throws IOException {
107 @Test
108 void testCreationWithUnkownElement() {
110109
111 try (InputStream descIn = getClass().getResourceAsStream(
112 "/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement_classic.xml")) {
113 GeneratorFactory.create(descIn, null);
114 }
110 Assertions.assertThrows(IOException.class, () -> {
111
112 try (InputStream descIn = getClass().getResourceAsStream(
113 "/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement_classic.xml")) {
114 GeneratorFactory.create(descIn, null);
115 }
116 });
115117 }
116118
117119 @Test
118 public void testArtifactToSerializerMappingExtraction() throws IOException {
120 void testArtifactToSerializerMappingExtraction() throws IOException {
119121 // TODO: Define a new one here with custom elements ...
120122 InputStream descIn = getClass().getResourceAsStream(
121123 "/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers_classic.xml");
123125 Map<String, ArtifactSerializer<?>> mapping =
124126 GeneratorFactory.extractArtifactSerializerMappings(descIn);
125127
126 Assert.assertTrue(mapping.get("test.resource") instanceof WordClusterDictionarySerializer);
128 Assertions.assertTrue(mapping.get("test.resource") instanceof WordClusterDictionarySerializer);
127129 }
128130
129131 @Test
130 public void testDictionaryArtifactToSerializerMappingExtraction() throws IOException {
132 void testDictionaryArtifactToSerializerMappingExtraction() throws IOException {
131133
132134 InputStream descIn = getClass().getResourceAsStream(
133135 "/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction_classic.xml");
135137 Map<String, ArtifactSerializer<?>> mapping =
136138 GeneratorFactory.extractArtifactSerializerMappings(descIn);
137139
138 Assert.assertTrue(mapping.get("test.dictionary") instanceof DictionarySerializer);
140 Assertions.assertTrue(mapping.get("test.dictionary") instanceof DictionarySerializer);
139141 // TODO: if make the following effective, the test fails.
140142 // this is strange because DictionaryFeatureGeneratorFactory cast dictResource to Dictionary...
141143 //Assert.assertTrue(mapping.get("test.dictionary") instanceof
2323 import java.util.List;
2424 import java.util.Map;
2525
26 import org.junit.Assert;
27 import org.junit.Test;
26 import org.junit.jupiter.api.Assertions;
27 import org.junit.jupiter.api.Test;
2828
2929 import opennlp.tools.util.InvalidFormatException;
3030 import opennlp.tools.util.model.ArtifactSerializer;
7676 }
7777
7878 @Test
79 public void testCreationWithTokenClassFeatureGenerator() throws Exception {
79 void testCreationWithTokenClassFeatureGenerator() throws Exception {
8080 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
8181 "/opennlp/tools/util/featuregen/TestTokenClassFeatureGeneratorConfig.xml");
8282
8383 // If this fails the generator descriptor could not be found
8484 // at the expected location
85 Assert.assertNotNull(generatorDescriptorIn);
85 Assertions.assertNotNull(generatorDescriptorIn);
8686
8787 AggregatedFeatureGenerator aggregatedGenerator =
8888 (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null);
8989
90 Assert.assertEquals(1, aggregatedGenerator.getGenerators().size());
91 Assert.assertEquals(TokenClassFeatureGenerator.class.getName(),
90 Assertions.assertEquals(1, aggregatedGenerator.getGenerators().size());
91 Assertions.assertEquals(TokenClassFeatureGenerator.class.getName(),
9292 aggregatedGenerator.getGenerators().iterator().next().getClass().getName());
9393
9494 }
9595
9696 @Test
97 public void testCreationWihtSimpleDescriptor() throws Exception {
97 void testCreationWihtSimpleDescriptor() throws Exception {
9898 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
9999 "/opennlp/tools/util/featuregen/TestFeatureGeneratorConfig.xml");
100100
101101 // If this fails the generator descriptor could not be found
102102 // at the expected location
103 Assert.assertNotNull(generatorDescriptorIn);
103 Assertions.assertNotNull(generatorDescriptorIn);
104104
105105 Collection<String> expectedGenerators = new ArrayList<>();
106106 expectedGenerators.add(OutcomePriorFeatureGenerator.class.getName());
109109 (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null);
110110
111111
112
113112 for (AdaptiveFeatureGenerator generator : aggregatedGenerator.getGenerators()) {
114113
115114 expectedGenerators.remove(generator.getClass().getName());
119118
120119 // If this fails not all expected generators were found and
121120 // removed from the expected generators collection
122 Assert.assertEquals(0, expectedGenerators.size());
121 Assertions.assertEquals(0, expectedGenerators.size());
123122 }
124123
125124 /**
126125 * Tests the creation from a descriptor which contains an unkown element.
127126 * The creation should fail with an {@link InvalidFormatException}
128127 */
129 @Test(expected = IOException.class)
130 public void testCreationWithUnkownElement() throws IOException {
131
132 try (InputStream descIn = getClass().getResourceAsStream(
133 "/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml")) {
134 GeneratorFactory.create(descIn, null);
135 }
136 }
137
138 @Test
139 public void testDictionaryArtifactToSerializerMappingExtraction() throws IOException {
128 @Test
129 void testCreationWithUnkownElement() {
130
131 Assertions.assertThrows(IOException.class, () -> {
132
133 try (InputStream descIn = getClass().getResourceAsStream(
134 "/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml")) {
135 GeneratorFactory.create(descIn, null);
136 }
137 });
138 }
139
140 @Test
141 void testDictionaryArtifactToSerializerMappingExtraction() throws IOException {
140142
141143 InputStream descIn = getClass().getResourceAsStream(
142144 "/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction.xml");
143145
144146 Map<String, ArtifactSerializer<?>> mapping =
145 GeneratorFactory.extractArtifactSerializerMappings(descIn);
146
147 Assert.assertTrue(mapping.get("test.dictionary") instanceof DictionarySerializer);
147 GeneratorFactory.extractArtifactSerializerMappings(descIn);
148
149 Assertions.assertTrue(mapping.get("test.dictionary") instanceof DictionarySerializer);
148150 // TODO: if make the following effective, the test fails.
149151 // this is strange because DictionaryFeatureGeneratorFactory cast dictResource to Dictionary...
150152 //Assert.assertTrue(mapping.get("test.dictionary") instanceof
152154 }
153155
154156 @Test
155 public void testParameters() throws Exception {
157 void testParameters() throws Exception {
156158 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
157159 "/opennlp/tools/util/featuregen/TestParametersConfig.xml");
158160
159161 // If this fails the generator descriptor could not be found
160162 // at the expected location
161 Assert.assertNotNull(generatorDescriptorIn);
163 Assertions.assertNotNull(generatorDescriptorIn);
162164
163165 AdaptiveFeatureGenerator generator = GeneratorFactory.create(generatorDescriptorIn, null);
164 Assert.assertTrue(generator instanceof TestParametersFeatureGenerator);
165
166 TestParametersFeatureGenerator featureGenerator = (TestParametersFeatureGenerator)generator;
167 Assert.assertEquals(123, featureGenerator.ip);
168 Assert.assertEquals(45, featureGenerator.fp, 0.1);
169 Assert.assertEquals(67890, featureGenerator.lp);
170 Assert.assertEquals(123456.789, featureGenerator.dp, 0.1);
171 Assert.assertTrue(featureGenerator.bp);
172 Assert.assertEquals("HELLO", featureGenerator.sp);
173 }
174
175 @Test
176 public void testNotAutomaticallyInsertAggregatedFeatureGenerator() throws Exception {
166 Assertions.assertTrue(generator instanceof TestParametersFeatureGenerator);
167
168 TestParametersFeatureGenerator featureGenerator = (TestParametersFeatureGenerator) generator;
169 Assertions.assertEquals(123, featureGenerator.ip);
170 Assertions.assertEquals(featureGenerator.fp, 0.1, 45);
171 Assertions.assertEquals(67890, featureGenerator.lp);
172 Assertions.assertEquals(featureGenerator.dp, 0.1, 123456.789);
173 Assertions.assertTrue(featureGenerator.bp);
174 Assertions.assertEquals("HELLO", featureGenerator.sp);
175 }
176
177 @Test
178 void testNotAutomaticallyInsertAggregatedFeatureGenerator() throws Exception {
177179 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
178180 "/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGenerator.xml");
179181
180182 // If this fails the generator descriptor could not be found
181183 // at the expected location
182 Assert.assertNotNull(generatorDescriptorIn);
183
184 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
185 Assert.assertTrue(featureGenerator instanceof OutcomePriorFeatureGenerator);
186 }
187
188 @Test
189 public void testAutomaticallyInsertAggregatedFeatureGenerator() throws Exception {
184 Assertions.assertNotNull(generatorDescriptorIn);
185
186 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
187 Assertions.assertTrue(featureGenerator instanceof OutcomePriorFeatureGenerator);
188 }
189
190 @Test
191 void testAutomaticallyInsertAggregatedFeatureGenerator() throws Exception {
190192 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
191193 "/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGenerator.xml");
192194
193195 // If this fails the generator descriptor could not be found
194196 // at the expected location
195 Assert.assertNotNull(generatorDescriptorIn);
196
197 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
198 Assert.assertTrue(featureGenerator instanceof AggregatedFeatureGenerator);
199
200 AggregatedFeatureGenerator aggregatedFeatureGenerator = (AggregatedFeatureGenerator)featureGenerator;
201 Assert.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
202 for (AdaptiveFeatureGenerator afg: aggregatedFeatureGenerator.getGenerators()) {
203 Assert.assertTrue(afg instanceof OutcomePriorFeatureGenerator);
204 }
205 }
206
207 @Test
208 public void testNotAutomaticallyInsertAggregatedFeatureGeneratorChild() throws Exception {
197 Assertions.assertNotNull(generatorDescriptorIn);
198
199 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
200 Assertions.assertTrue(featureGenerator instanceof AggregatedFeatureGenerator);
201
202 AggregatedFeatureGenerator aggregatedFeatureGenerator = (AggregatedFeatureGenerator) featureGenerator;
203 Assertions.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
204 for (AdaptiveFeatureGenerator afg : aggregatedFeatureGenerator.getGenerators()) {
205 Assertions.assertTrue(afg instanceof OutcomePriorFeatureGenerator);
206 }
207 }
208
209 @Test
210 void testNotAutomaticallyInsertAggregatedFeatureGeneratorChild() throws Exception {
209211 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
210212 "/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
211213
212214 // If this fails the generator descriptor could not be found
213215 // at the expected location
214 Assert.assertNotNull(generatorDescriptorIn);
215
216 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
217 Assert.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
218
219 CachedFeatureGenerator cachedFeatureGenerator = (CachedFeatureGenerator)featureGenerator;
220 Assert.assertTrue(cachedFeatureGenerator.getCachedFeatureGenerator()
216 Assertions.assertNotNull(generatorDescriptorIn);
217
218 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
219 Assertions.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
220
221 CachedFeatureGenerator cachedFeatureGenerator = (CachedFeatureGenerator) featureGenerator;
222 Assertions.assertTrue(cachedFeatureGenerator.getCachedFeatureGenerator()
221223 instanceof OutcomePriorFeatureGenerator);
222224 }
223225
224226 @Test
225 public void testAutomaticallyInsertAggregatedFeatureGeneratorChildren() throws Exception {
227 void testAutomaticallyInsertAggregatedFeatureGeneratorChildren() throws Exception {
226228 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
227229 "/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
228230
229231 // If this fails the generator descriptor could not be found
230232 // at the expected location
231 Assert.assertNotNull(generatorDescriptorIn);
232
233 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
234 Assert.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
235
236 CachedFeatureGenerator cachedFeatureGenerator = (CachedFeatureGenerator)featureGenerator;
233 Assertions.assertNotNull(generatorDescriptorIn);
234
235 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
236 Assertions.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
237
238 CachedFeatureGenerator cachedFeatureGenerator = (CachedFeatureGenerator) featureGenerator;
237239 AdaptiveFeatureGenerator afg = cachedFeatureGenerator.getCachedFeatureGenerator();
238 Assert.assertTrue(afg instanceof AggregatedFeatureGenerator);
239
240 AggregatedFeatureGenerator aggregatedFeatureGenerator = (AggregatedFeatureGenerator)afg;
241 Assert.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
242 for (AdaptiveFeatureGenerator afgen: aggregatedFeatureGenerator.getGenerators()) {
243 Assert.assertTrue(afgen instanceof OutcomePriorFeatureGenerator);
244 }
245 }
246
247 @Test
248 public void testInsertCachedFeatureGenerator() throws Exception {
240 Assertions.assertTrue(afg instanceof AggregatedFeatureGenerator);
241
242 AggregatedFeatureGenerator aggregatedFeatureGenerator = (AggregatedFeatureGenerator) afg;
243 Assertions.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
244 for (AdaptiveFeatureGenerator afgen : aggregatedFeatureGenerator.getGenerators()) {
245 Assertions.assertTrue(afgen instanceof OutcomePriorFeatureGenerator);
246 }
247 }
248
249 @Test
250 void testInsertCachedFeatureGenerator() throws Exception {
249251 InputStream generatorDescriptorIn = getClass().getResourceAsStream(
250252 "/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml");
251253
252254 // If this fails the generator descriptor could not be found
253255 // at the expected location
254 Assert.assertNotNull(generatorDescriptorIn);
255
256 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
257 Assert.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
258 CachedFeatureGenerator cachedFeatureGenerator = (CachedFeatureGenerator)featureGenerator;
259
260 Assert.assertTrue(cachedFeatureGenerator.getCachedFeatureGenerator()
256 Assertions.assertNotNull(generatorDescriptorIn);
257
258 AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
259 Assertions.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
260 CachedFeatureGenerator cachedFeatureGenerator = (CachedFeatureGenerator) featureGenerator;
261
262 Assertions.assertTrue(cachedFeatureGenerator.getCachedFeatureGenerator()
261263 instanceof AggregatedFeatureGenerator);
262264 AggregatedFeatureGenerator aggregatedFeatureGenerator =
263 (AggregatedFeatureGenerator)cachedFeatureGenerator.getCachedFeatureGenerator();
264 Assert.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
265 for (AdaptiveFeatureGenerator afg: aggregatedFeatureGenerator.getGenerators()) {
266 Assert.assertTrue(afg instanceof OutcomePriorFeatureGenerator);
265 (AggregatedFeatureGenerator) cachedFeatureGenerator.getCachedFeatureGenerator();
266 Assertions.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
267 for (AdaptiveFeatureGenerator afg : aggregatedFeatureGenerator.getGenerators()) {
268 Assertions.assertTrue(afg instanceof OutcomePriorFeatureGenerator);
267269 }
268270 }
269271 }
1818
1919 import java.util.List;
2020
21 class IdentityFeatureGenerator implements AdaptiveFeatureGenerator {
21 public class IdentityFeatureGenerator implements AdaptiveFeatureGenerator {
2222
2323 public void createFeatures(List<String> features, String[] tokens, int index,
2424 String[] previousOutcomes) {
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 import opennlp.tools.namefind.TokenNameFinder;
2626 import opennlp.tools.util.Span;
3939 public Span[] find(String[] tokens) {
4040 for (int i = 0; i < tokens.length; i++) {
4141 if (theName.equals(tokens[i])) {
42 return new Span[]{ new Span(i, i + 1, "person") };
42 return new Span[] {new Span(i, i + 1, "person")};
4343 }
4444 }
4545
46 return new Span[]{};
46 return new Span[] {};
4747 }
4848
4949 @Override
5252 }
5353
5454 @Test
55 public void test() {
55 void test() {
5656
5757 List<String> features = new ArrayList<>();
5858
59 String[] testSentence = new String[]{ "Every", "John", "has", "its", "day", "." };
59 String[] testSentence = new String[] {"Every", "John", "has", "its", "day", "."};
6060
6161 AdaptiveFeatureGenerator generator = new InSpanGenerator("john", new SimpleSpecificPersonFinder("John"));
6262
6363 generator.createFeatures(features, testSentence, 0, null);
64 Assert.assertEquals(0, features.size());
64 Assertions.assertEquals(0, features.size());
6565
6666 features.clear();
6767 generator.createFeatures(features, testSentence, 1, null);
68 Assert.assertEquals(2, features.size());
69 Assert.assertEquals("john:w=dic", features.get(0));
70 Assert.assertEquals("john:w=dic=John", features.get(1));
68 Assertions.assertEquals(2, features.size());
69 Assertions.assertEquals("john:w=dic", features.get(0));
70 Assertions.assertEquals("john:w=dic=John", features.get(1));
7171 }
7272 }
2020 import java.util.ArrayList;
2121 import java.util.List;
2222
23 import org.junit.Assert;
24 import org.junit.Test;
23 import org.junit.jupiter.api.Assertions;
24 import org.junit.jupiter.api.Test;
2525
2626 import opennlp.tools.postag.POSTaggerMETest;
2727 import opennlp.tools.util.model.ModelType;
3030
3131
3232 @Test
33 public void testFeatureGeneration() throws IOException {
33 void testFeatureGeneration() throws IOException {
3434 POSTaggerNameFeatureGenerator fg = new POSTaggerNameFeatureGenerator(
3535 POSTaggerMETest.trainPOSModel(ModelType.MAXENT));
3636
3838 for (int i = 0; i < tokens.length; i++) {
3939 List<String> feats = new ArrayList<>();
4040 fg.createFeatures(feats, tokens, i, null);
41 Assert.assertTrue(feats.get(0).startsWith("pos="));
41 Assertions.assertTrue(feats.get(0).startsWith("pos="));
4242 }
4343 }
4444 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class PosTaggerFeatureGeneratorTest {
2727
2929 static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3030 static String[] testTags = new String[] {"DT", "VBZ", "DT", "NN", "NN"};
3131
32 @Before
33 public void setUp() throws Exception {
32 @BeforeEach
33 void setUp() {
3434 features = new ArrayList<>();
3535 }
3636
3737 @Test
38 public void testBegin() {
38 void testBegin() {
3939
4040 final int testTokenIndex = 0;
4141
4343
4444 generator.createFeatures(features, testSentence, testTokenIndex, testTags);
4545
46 Assert.assertEquals(0, features.size());
46 Assertions.assertEquals(0, features.size());
4747 }
4848
4949 @Test
50 public void testNext() {
50 void testNext() {
5151
5252 final int testTokenIndex = 1;
5353
5555
5656 generator.createFeatures(features, testSentence, testTokenIndex, testTags);
5757
58 Assert.assertEquals(1, features.size());
59 Assert.assertEquals("t=DT", features.get(0));
58 Assertions.assertEquals(1, features.size());
59 Assertions.assertEquals("t=DT", features.get(0));
6060 }
6161
6262 @Test
63 public void testMiddle() {
63 void testMiddle() {
6464
6565 final int testTokenIndex = 3;
6666
6868
6969 generator.createFeatures(features, testSentence, testTokenIndex, testTags);
7070
71 Assert.assertEquals(2, features.size());
72 Assert.assertEquals("t=DT", features.get(0));
73 Assert.assertEquals("t2=VBZ,DT", features.get(1));
71 Assertions.assertEquals(2, features.size());
72 Assertions.assertEquals("t=DT", features.get(0));
73 Assertions.assertEquals("t2=VBZ,DT", features.get(1));
7474 }
7575 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class PrefixFeatureGeneratorTest {
2727
2828 private List<String> features;
2929
30 @Before
31 public void setUp() throws Exception {
30 @BeforeEach
31 void setUp() {
3232 features = new ArrayList<>();
3333 }
3434
3535 @Test
36 public void lengthTest1() {
37
36 void lengthTest1() {
37
3838 String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3939
4040 int testTokenIndex = 0;
4141 int suffixLength = 2;
42
43 AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
42
43 AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
4444
4545 generator.createFeatures(features, testSentence, testTokenIndex, null);
46
47 Assert.assertEquals(2, features.size());
48 Assert.assertEquals("pre=T", features.get(0));
49 Assert.assertEquals("pre=Th", features.get(1));
50
46
47 Assertions.assertEquals(2, features.size());
48 Assertions.assertEquals("pre=T", features.get(0));
49 Assertions.assertEquals("pre=Th", features.get(1));
50
5151 }
52
52
5353 @Test
54 public void lengthTest2() {
55
54 void lengthTest2() {
55
5656 String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
5757
5858 int testTokenIndex = 3;
5959 int suffixLength = 5;
60
61 AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
60
61 AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
6262
6363 generator.createFeatures(features, testSentence, testTokenIndex, null);
64
65 Assert.assertEquals(5, features.size());
66 Assert.assertEquals("pre=e", features.get(0));
67 Assert.assertEquals("pre=ex", features.get(1));
68 Assert.assertEquals("pre=exa", features.get(2));
69 Assert.assertEquals("pre=exam", features.get(3));
70 Assert.assertEquals("pre=examp", features.get(4));
71
64
65 Assertions.assertEquals(5, features.size());
66 Assertions.assertEquals("pre=e", features.get(0));
67 Assertions.assertEquals("pre=ex", features.get(1));
68 Assertions.assertEquals("pre=exa", features.get(2));
69 Assertions.assertEquals("pre=exam", features.get(3));
70 Assertions.assertEquals("pre=examp", features.get(4));
71
7272 }
73
73
7474 @Test
75 public void lengthTest3() {
76
75 void lengthTest3() {
76
7777 String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
7878
7979 int testTokenIndex = 1;
8080 int suffixLength = 5;
81
82 AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
81
82 AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
8383
8484 generator.createFeatures(features, testSentence, testTokenIndex, null);
85
86 Assert.assertEquals(2, features.size());
87 Assert.assertEquals("pre=i", features.get(0));
88 Assert.assertEquals("pre=is", features.get(1));
89
85
86 Assertions.assertEquals(2, features.size());
87 Assertions.assertEquals("pre=i", features.get(0));
88 Assertions.assertEquals("pre=is", features.get(1));
89
9090 }
9191 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 /**
2626 * Test for the {@link PreviousMapFeatureGenerator} class.
2828 public class PreviousMapFeatureGeneratorTest {
2929
3030 @Test
31 public void testFeatureGeneration() {
31 void testFeatureGeneration() {
3232
3333 AdaptiveFeatureGenerator fg = new PreviousMapFeatureGenerator();
3434
3838
3939 // this should generate the pd=null feature
4040 fg.createFeatures(features, sentence, 0, null);
41 Assert.assertEquals(1, features.size());
42 Assert.assertEquals("pd=null", features.get(0));
41 Assertions.assertEquals(1, features.size());
42 Assertions.assertEquals("pd=null", features.get(0));
4343
4444 features.clear();
4545
4646 // this should generate the pd=1 feature
4747 fg.updateAdaptiveData(sentence, new String[] {"1", "2", "3"});
4848 fg.createFeatures(features, sentence, 0, null);
49 Assert.assertEquals(1, features.size());
50 Assert.assertEquals("pd=1", features.get(0));
49 Assertions.assertEquals(1, features.size());
50 Assertions.assertEquals("pd=1", features.get(0));
5151
5252 features.clear();
5353
5555 // the adaptive data was cleared
5656 fg.clearAdaptiveData();
5757 fg.createFeatures(features, sentence, 0, null);
58 Assert.assertEquals(1, features.size());
59 Assert.assertEquals("pd=null", features.get(0));
58 Assertions.assertEquals(1, features.size());
59 Assertions.assertEquals("pd=null", features.get(0));
6060 }
6161 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.Test;
2424
2525 public class PreviousTwoMapFeatureGeneratorTest {
2626
2727 @Test
28 public void testFeatureGeneration() {
28 void testFeatureGeneration() {
2929
3030 AdaptiveFeatureGenerator fg = new PreviousTwoMapFeatureGenerator();
3131
3535
3636 // this should generate the no features
3737 fg.createFeatures(features, sentence, 0, null);
38 Assert.assertEquals(0, features.size());
38 Assertions.assertEquals(0, features.size());
3939
4040 // this should generate the pd=null feature
4141 fg.createFeatures(features, sentence, 1, null);
42 Assert.assertEquals(1, features.size());
43 Assert.assertEquals("ppd=null,null", features.get(0));
42 Assertions.assertEquals(1, features.size());
43 Assertions.assertEquals("ppd=null,null", features.get(0));
4444
4545 features.clear();
4646
4747 // this should generate the pd=1 feature
4848 fg.updateAdaptiveData(sentence, new String[] {"1", "2", "3"});
4949 fg.createFeatures(features, sentence, 1, null);
50 Assert.assertEquals(1, features.size());
51 Assert.assertEquals("ppd=2,1", features.get(0));
50 Assertions.assertEquals(1, features.size());
51 Assertions.assertEquals("ppd=2,1", features.get(0));
5252
5353 features.clear();
5454
5656 // the adaptive data was cleared
5757 fg.clearAdaptiveData();
5858 fg.createFeatures(features, sentence, 1, null);
59 Assert.assertEquals(1, features.size());
60 Assert.assertEquals("ppd=null,null", features.get(0));
59 Assertions.assertEquals(1, features.size());
60 Assertions.assertEquals("ppd=null,null", features.get(0));
6161 }
6262 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class SentenceFeatureGeneratorTest {
2727
2929 static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3030 static String[] testShort = new String[] {"word"};
3131
32 @Before
33 public void setUp() throws Exception {
32 @BeforeEach
33 void setUp() {
3434 features = new ArrayList<>();
3535 }
3636
3737 @Test
38 public void testTT() {
38 void testTT() {
3939 AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(true, true);
4040
4141 generator.createFeatures(features, testSentence, 2, null);
42 Assert.assertEquals(0, features.size());
42 Assertions.assertEquals(0, features.size());
4343
4444 generator.createFeatures(features, testSentence, 0, null);
45 Assert.assertEquals(1, features.size());
46 Assert.assertEquals("S=begin", features.get(0));
45 Assertions.assertEquals(1, features.size());
46 Assertions.assertEquals("S=begin", features.get(0));
4747
4848 features.clear();
4949
5050 generator.createFeatures(features, testSentence, testSentence.length - 1, null);
51 Assert.assertEquals(1, features.size());
52 Assert.assertEquals("S=end", features.get(0));
51 Assertions.assertEquals(1, features.size());
52 Assertions.assertEquals("S=end", features.get(0));
5353
5454 features.clear();
5555
5656 generator.createFeatures(features, testShort, 0, null);
57 Assert.assertEquals(2, features.size());
58 Assert.assertEquals("S=begin", features.get(0));
59 Assert.assertEquals("S=end", features.get(1));
57 Assertions.assertEquals(2, features.size());
58 Assertions.assertEquals("S=begin", features.get(0));
59 Assertions.assertEquals("S=end", features.get(1));
6060 }
6161
6262 @Test
63 public void testTF() {
63 void testTF() {
6464 AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(true, false);
6565
6666 generator.createFeatures(features, testSentence, 2, null);
67 Assert.assertEquals(0, features.size());
67 Assertions.assertEquals(0, features.size());
6868
6969 generator.createFeatures(features, testSentence, 0, null);
70 Assert.assertEquals(1, features.size());
71 Assert.assertEquals("S=begin", features.get(0));
70 Assertions.assertEquals(1, features.size());
71 Assertions.assertEquals("S=begin", features.get(0));
7272
7373 features.clear();
7474
7575 generator.createFeatures(features, testSentence, testSentence.length - 1, null);
76 Assert.assertEquals(0, features.size());
76 Assertions.assertEquals(0, features.size());
7777
7878 features.clear();
7979
8080 generator.createFeatures(features, testShort, 0, null);
81 Assert.assertEquals(1, features.size());
82 Assert.assertEquals("S=begin", features.get(0));
81 Assertions.assertEquals(1, features.size());
82 Assertions.assertEquals("S=begin", features.get(0));
8383 }
8484
8585 @Test
86 public void testFT() {
86 void testFT() {
8787 AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(false, true);
8888
8989 generator.createFeatures(features, testSentence, 2, null);
90 Assert.assertEquals(0, features.size());
90 Assertions.assertEquals(0, features.size());
9191
9292 generator.createFeatures(features, testSentence, 0, null);
93 Assert.assertEquals(0, features.size());
93 Assertions.assertEquals(0, features.size());
9494
9595 generator.createFeatures(features, testSentence, testSentence.length - 1, null);
96 Assert.assertEquals(1, features.size());
97 Assert.assertEquals("S=end", features.get(0));
96 Assertions.assertEquals(1, features.size());
97 Assertions.assertEquals("S=end", features.get(0));
9898
9999 features.clear();
100100
101101 generator.createFeatures(features, testShort, 0, null);
102 Assert.assertEquals(1, features.size());
103 Assert.assertEquals("S=end", features.get(0));
102 Assertions.assertEquals(1, features.size());
103 Assertions.assertEquals("S=end", features.get(0));
104104 }
105105
106106 @Test
107 public void testFF() {
107 void testFF() {
108108 AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(false, false);
109109
110110 generator.createFeatures(features, testSentence, 2, null);
111 Assert.assertEquals(0, features.size());
111 Assertions.assertEquals(0, features.size());
112112
113113 generator.createFeatures(features, testSentence, 0, null);
114 Assert.assertEquals(0, features.size());
114 Assertions.assertEquals(0, features.size());
115115
116116 generator.createFeatures(features, testSentence, testSentence.length - 1, null);
117 Assert.assertEquals(0, features.size());
117 Assertions.assertEquals(0, features.size());
118118
119119 generator.createFeatures(features, testShort, 0, null);
120 Assert.assertEquals(0, features.size());
120 Assertions.assertEquals(0, features.size());
121121 }
122122 }
1616
1717 package opennlp.tools.util.featuregen;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222 public class StringPatternTest {
2323
2424 @Test
25 public void testIsAllLetters() {
26 Assert.assertTrue(StringPattern.recognize("test").isAllLetter());
27 Assert.assertTrue(StringPattern.recognize("TEST").isAllLetter());
28 Assert.assertTrue(StringPattern.recognize("TesT").isAllLetter());
29 Assert.assertTrue(StringPattern.recognize("grün").isAllLetter());
30 Assert.assertTrue(StringPattern.recognize("üäöæß").isAllLetter());
31 Assert.assertTrue(StringPattern.recognize("あア亜Aa").isAllLetter());
25 void testIsAllLetters() {
26 Assertions.assertTrue(StringPattern.recognize("test").isAllLetter());
27 Assertions.assertTrue(StringPattern.recognize("TEST").isAllLetter());
28 Assertions.assertTrue(StringPattern.recognize("TesT").isAllLetter());
29 Assertions.assertTrue(StringPattern.recognize("grün").isAllLetter());
30 Assertions.assertTrue(StringPattern.recognize("üäöæß").isAllLetter());
31 Assertions.assertTrue(StringPattern.recognize("あア亜Aa").isAllLetter());
3232 }
3333
3434 @Test
35 public void testIsInitialCapitalLetter() {
36 Assert.assertTrue(StringPattern.recognize("Test").isInitialCapitalLetter());
37 Assert.assertFalse(StringPattern.recognize("tEST").isInitialCapitalLetter());
38 Assert.assertTrue(StringPattern.recognize("TesT").isInitialCapitalLetter());
39 Assert.assertTrue(StringPattern.recognize("Üäöæß").isInitialCapitalLetter());
40 Assert.assertFalse(StringPattern.recognize("いイ井").isInitialCapitalLetter());
41 Assert.assertTrue(StringPattern.recognize("Iいイ井").isInitialCapitalLetter());
42 Assert.assertTrue(StringPattern.recognize("Iいイ井").isInitialCapitalLetter());
35 void testIsInitialCapitalLetter() {
36 Assertions.assertTrue(StringPattern.recognize("Test").isInitialCapitalLetter());
37 Assertions.assertFalse(StringPattern.recognize("tEST").isInitialCapitalLetter());
38 Assertions.assertTrue(StringPattern.recognize("TesT").isInitialCapitalLetter());
39 Assertions.assertTrue(StringPattern.recognize("Üäöæß").isInitialCapitalLetter());
40 Assertions.assertFalse(StringPattern.recognize("いイ井").isInitialCapitalLetter());
41 Assertions.assertTrue(StringPattern.recognize("Iいイ井").isInitialCapitalLetter());
42 Assertions.assertTrue(StringPattern.recognize("Iいイ井").isInitialCapitalLetter());
4343 }
4444
4545 @Test
46 public void testIsAllCapitalLetter() {
47 Assert.assertTrue(StringPattern.recognize("TEST").isAllCapitalLetter());
48 Assert.assertTrue(StringPattern.recognize("ÄÄÄÜÜÜÖÖÖÖ").isAllCapitalLetter());
49 Assert.assertFalse(StringPattern.recognize("ÄÄÄÜÜÜÖÖä").isAllCapitalLetter());
50 Assert.assertFalse(StringPattern.recognize("ÄÄÄÜÜdÜÖÖ").isAllCapitalLetter());
51 Assert.assertTrue(StringPattern.recognize("ABC").isAllCapitalLetter());
52 Assert.assertFalse(StringPattern.recognize("うウ宇").isAllCapitalLetter());
46 void testIsAllCapitalLetter() {
47 Assertions.assertTrue(StringPattern.recognize("TEST").isAllCapitalLetter());
48 Assertions.assertTrue(StringPattern.recognize("ÄÄÄÜÜÜÖÖÖÖ").isAllCapitalLetter());
49 Assertions.assertFalse(StringPattern.recognize("ÄÄÄÜÜÜÖÖä").isAllCapitalLetter());
50 Assertions.assertFalse(StringPattern.recognize("ÄÄÄÜÜdÜÖÖ").isAllCapitalLetter());
51 Assertions.assertTrue(StringPattern.recognize("ABC").isAllCapitalLetter());
52 Assertions.assertFalse(StringPattern.recognize("うウ宇").isAllCapitalLetter());
5353 }
5454
5555 @Test
56 public void testIsAllLowerCaseLetter() {
57 Assert.assertTrue(StringPattern.recognize("test").isAllLowerCaseLetter());
58 Assert.assertTrue(StringPattern.recognize("öäü").isAllLowerCaseLetter());
59 Assert.assertTrue(StringPattern.recognize("öäüßßß").isAllLowerCaseLetter());
60 Assert.assertFalse(StringPattern.recognize("Test").isAllLowerCaseLetter());
61 Assert.assertFalse(StringPattern.recognize("TEST").isAllLowerCaseLetter());
62 Assert.assertFalse(StringPattern.recognize("testT").isAllLowerCaseLetter());
63 Assert.assertFalse(StringPattern.recognize("tesÖt").isAllLowerCaseLetter());
64 Assert.assertTrue(StringPattern.recognize("abc").isAllLowerCaseLetter());
65 Assert.assertFalse(StringPattern.recognize("えエ絵").isAllLowerCaseLetter());
56 void testIsAllLowerCaseLetter() {
57 Assertions.assertTrue(StringPattern.recognize("test").isAllLowerCaseLetter());
58 Assertions.assertTrue(StringPattern.recognize("öäü").isAllLowerCaseLetter());
59 Assertions.assertTrue(StringPattern.recognize("öäüßßß").isAllLowerCaseLetter());
60 Assertions.assertFalse(StringPattern.recognize("Test").isAllLowerCaseLetter());
61 Assertions.assertFalse(StringPattern.recognize("TEST").isAllLowerCaseLetter());
62 Assertions.assertFalse(StringPattern.recognize("testT").isAllLowerCaseLetter());
63 Assertions.assertFalse(StringPattern.recognize("tesÖt").isAllLowerCaseLetter());
64 Assertions.assertTrue(StringPattern.recognize("abc").isAllLowerCaseLetter());
65 Assertions.assertFalse(StringPattern.recognize("えエ絵").isAllLowerCaseLetter());
6666 }
6767
6868 @Test
69 public void testIsAllDigit() {
70 Assert.assertTrue(StringPattern.recognize("123456").isAllDigit());
71 Assert.assertFalse(StringPattern.recognize("123,56").isAllDigit());
72 Assert.assertFalse(StringPattern.recognize("12356f").isAllDigit());
73 Assert.assertTrue(StringPattern.recognize("123456").isAllDigit());
69 void testIsAllDigit() {
70 Assertions.assertTrue(StringPattern.recognize("123456").isAllDigit());
71 Assertions.assertFalse(StringPattern.recognize("123,56").isAllDigit());
72 Assertions.assertFalse(StringPattern.recognize("12356f").isAllDigit());
73 Assertions.assertTrue(StringPattern.recognize("123456").isAllDigit());
7474 }
7575
7676 @Test
77 public void testIsAllHiragana() {
78 Assert.assertTrue(StringPattern.recognize("あぱっち・るしーん").isAllHiragana());
79 Assert.assertFalse(StringPattern.recognize("あぱっち・そふとうぇあ財団").isAllHiragana());
80 Assert.assertFalse(StringPattern.recognize("あぱっち・るしーんV1.0").isAllHiragana());
77 void testIsAllHiragana() {
78 Assertions.assertTrue(StringPattern.recognize("あぱっち・るしーん").isAllHiragana());
79 Assertions.assertFalse(StringPattern.recognize("あぱっち・そふとうぇあ財団").isAllHiragana());
80 Assertions.assertFalse(StringPattern.recognize("あぱっち・るしーんV1.0").isAllHiragana());
8181 }
8282
8383 @Test
84 public void testIsAllKatakana() {
85 Assert.assertTrue(StringPattern.recognize("アパッチ・ルシーン").isAllKatakana());
86 Assert.assertFalse(StringPattern.recognize("アパッチ・ソフトウェア財団").isAllKatakana());
87 Assert.assertFalse(StringPattern.recognize("アパッチ・ルシーンV1.0").isAllKatakana());
84 void testIsAllKatakana() {
85 Assertions.assertTrue(StringPattern.recognize("アパッチ・ルシーン").isAllKatakana());
86 Assertions.assertFalse(StringPattern.recognize("アパッチ・ソフトウェア財団").isAllKatakana());
87 Assertions.assertFalse(StringPattern.recognize("アパッチ・ルシーンV1.0").isAllKatakana());
8888 }
8989
9090 @Test
91 public void testDigits() {
92 Assert.assertEquals(6, StringPattern.recognize("123456").digits());
93 Assert.assertEquals(3, StringPattern.recognize("123fff").digits());
94 Assert.assertEquals(0, StringPattern.recognize("test").digits());
95 Assert.assertEquals(3, StringPattern.recognize("123fff").digits());
91 void testDigits() {
92 Assertions.assertEquals(6, StringPattern.recognize("123456").digits());
93 Assertions.assertEquals(3, StringPattern.recognize("123fff").digits());
94 Assertions.assertEquals(0, StringPattern.recognize("test").digits());
95 Assertions.assertEquals(3, StringPattern.recognize("123fff").digits());
9696 }
9797
9898 @Test
99 public void testContainsPeriod() {
100 Assert.assertTrue(StringPattern.recognize("test.").containsPeriod());
101 Assert.assertTrue(StringPattern.recognize("23.5").containsPeriod());
102 Assert.assertFalse(StringPattern.recognize("test,/-1").containsPeriod());
99 void testContainsPeriod() {
100 Assertions.assertTrue(StringPattern.recognize("test.").containsPeriod());
101 Assertions.assertTrue(StringPattern.recognize("23.5").containsPeriod());
102 Assertions.assertFalse(StringPattern.recognize("test,/-1").containsPeriod());
103103 }
104104
105105 @Test
106 public void testContainsComma() {
107 Assert.assertTrue(StringPattern.recognize("test,").containsComma());
108 Assert.assertTrue(StringPattern.recognize("23,5").containsComma());
109 Assert.assertFalse(StringPattern.recognize("test./-1").containsComma());
106 void testContainsComma() {
107 Assertions.assertTrue(StringPattern.recognize("test,").containsComma());
108 Assertions.assertTrue(StringPattern.recognize("23,5").containsComma());
109 Assertions.assertFalse(StringPattern.recognize("test./-1").containsComma());
110110 }
111111
112112 @Test
113 public void testContainsSlash() {
114 Assert.assertTrue(StringPattern.recognize("test/").containsSlash());
115 Assert.assertTrue(StringPattern.recognize("23/5").containsSlash());
116 Assert.assertFalse(StringPattern.recognize("test.1-,").containsSlash());
113 void testContainsSlash() {
114 Assertions.assertTrue(StringPattern.recognize("test/").containsSlash());
115 Assertions.assertTrue(StringPattern.recognize("23/5").containsSlash());
116 Assertions.assertFalse(StringPattern.recognize("test.1-,").containsSlash());
117117 }
118118
119119 @Test
120 public void testContainsDigit() {
121 Assert.assertTrue(StringPattern.recognize("test1").containsDigit());
122 Assert.assertTrue(StringPattern.recognize("23,5").containsDigit());
123 Assert.assertFalse(StringPattern.recognize("test./-,").containsDigit());
124 Assert.assertTrue(StringPattern.recognize("テスト1").containsDigit());
125 Assert.assertFalse(StringPattern.recognize("テストTEST").containsDigit());
120 void testContainsDigit() {
121 Assertions.assertTrue(StringPattern.recognize("test1").containsDigit());
122 Assertions.assertTrue(StringPattern.recognize("23,5").containsDigit());
123 Assertions.assertFalse(StringPattern.recognize("test./-,").containsDigit());
124 Assertions.assertTrue(StringPattern.recognize("テスト1").containsDigit());
125 Assertions.assertFalse(StringPattern.recognize("テストTEST").containsDigit());
126126 }
127127
128128 @Test
129 public void testContainsHyphen() {
130 Assert.assertTrue(StringPattern.recognize("test--").containsHyphen());
131 Assert.assertTrue(StringPattern.recognize("23-5").containsHyphen());
132 Assert.assertFalse(StringPattern.recognize("test.1/,").containsHyphen());
129 void testContainsHyphen() {
130 Assertions.assertTrue(StringPattern.recognize("test--").containsHyphen());
131 Assertions.assertTrue(StringPattern.recognize("23-5").containsHyphen());
132 Assertions.assertFalse(StringPattern.recognize("test.1/,").containsHyphen());
133133 }
134134
135135 @Test
136 public void testContainsLetters() {
137 Assert.assertTrue(StringPattern.recognize("test--").containsLetters());
138 Assert.assertTrue(StringPattern.recognize("23h5ßm").containsLetters());
139 Assert.assertFalse(StringPattern.recognize("---.1/,").containsLetters());
136 void testContainsLetters() {
137 Assertions.assertTrue(StringPattern.recognize("test--").containsLetters());
138 Assertions.assertTrue(StringPattern.recognize("23h5ßm").containsLetters());
139 Assertions.assertFalse(StringPattern.recognize("---.1/,").containsLetters());
140140 }
141141
142142 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class SuffixFeatureGeneratorTest {
2727
2828 private List<String> features;
2929
30 @Before
31 public void setUp() throws Exception {
30 @BeforeEach
31 void setUp() {
3232 features = new ArrayList<>();
3333 }
3434
3535 @Test
36 public void lengthTest1() {
37
36 void lengthTest1() {
37
3838 String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3939
4040 int testTokenIndex = 0;
4141 int suffixLength = 2;
42
43 AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
42
43 AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
4444
4545 generator.createFeatures(features, testSentence, testTokenIndex, null);
4646
47 Assert.assertEquals(2, features.size());
48 Assert.assertEquals("suf=s", features.get(0));
49 Assert.assertEquals("suf=is", features.get(1));
50
47 Assertions.assertEquals(2, features.size());
48 Assertions.assertEquals("suf=s", features.get(0));
49 Assertions.assertEquals("suf=is", features.get(1));
50
5151 }
52
52
5353 @Test
54 public void lengthTest2() {
55
54 void lengthTest2() {
55
5656 String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
5757
5858 int testTokenIndex = 3;
5959 int suffixLength = 5;
60
61 AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
60
61 AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
6262
6363 generator.createFeatures(features, testSentence, testTokenIndex, null);
6464
65 Assert.assertEquals(5, features.size());
66 Assert.assertEquals("suf=e", features.get(0));
67 Assert.assertEquals("suf=le", features.get(1));
68 Assert.assertEquals("suf=ple", features.get(2));
69 Assert.assertEquals("suf=mple", features.get(3));
70 Assert.assertEquals("suf=ample", features.get(4));
71
65 Assertions.assertEquals(5, features.size());
66 Assertions.assertEquals("suf=e", features.get(0));
67 Assertions.assertEquals("suf=le", features.get(1));
68 Assertions.assertEquals("suf=ple", features.get(2));
69 Assertions.assertEquals("suf=mple", features.get(3));
70 Assertions.assertEquals("suf=ample", features.get(4));
71
7272 }
73
73
7474 @Test
75 public void lengthTest3() {
76
75 void lengthTest3() {
76
7777 String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
7878
7979 int testTokenIndex = 1;
8080 int suffixLength = 5;
81
82 AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
81
82 AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
8383
8484 generator.createFeatures(features, testSentence, testTokenIndex, null);
85
86 Assert.assertEquals(2, features.size());
87 Assert.assertEquals("suf=s", features.get(0));
88 Assert.assertEquals("suf=is", features.get(1));
89
85
86 Assertions.assertEquals(2, features.size());
87 Assertions.assertEquals("suf=s", features.get(0));
88 Assertions.assertEquals("suf=is", features.get(1));
89
9090 }
9191 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class TokenClassFeatureGeneratorTest {
2727
2828 private List<String> features;
2929 static String[] testSentence = new String[] {"This", "is", "an", "Example", "sentence"};
3030
31 @Before
32 public void setUp() throws Exception {
31 @BeforeEach
32 void setUp() {
3333 features = new ArrayList<>();
3434 }
3535
3636 @Test
37 public void testGenWAC() {
37 void testGenWAC() {
3838
3939 final int testTokenIndex = 3;
4040
4242
4343 generator.createFeatures(features, testSentence, testTokenIndex, null);
4444
45 Assert.assertEquals(2, features.size());
46 Assert.assertEquals("wc=ic", features.get(0));
47 Assert.assertEquals("w&c=example,ic", features.get(1));
45 Assertions.assertEquals(2, features.size());
46 Assertions.assertEquals("wc=ic", features.get(0));
47 Assertions.assertEquals("w&c=example,ic", features.get(1));
4848 }
4949
5050 @Test
51 public void testNoWAC() {
51 void testNoWAC() {
5252
5353 final int testTokenIndex = 3;
5454
5656
5757 generator.createFeatures(features, testSentence, testTokenIndex, null);
5858
59 Assert.assertEquals(1, features.size());
60 Assert.assertEquals("wc=ic", features.get(0));
59 Assertions.assertEquals(1, features.size());
60 Assertions.assertEquals("wc=ic", features.get(0));
6161 }
6262 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class TokenFeatureGeneratorTest {
2727
2828 private List<String> features;
2929 static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3030
31 @Before
32 public void setUp() throws Exception {
31 @BeforeEach
32 void setUp() {
3333 features = new ArrayList<>();
3434 }
3535
3636 @Test
37 public void test() {
37 void test() {
3838
3939 final int testTokenIndex = 0;
4040
4242
4343 generator.createFeatures(features, testSentence, testTokenIndex, null);
4444
45 Assert.assertEquals(1, features.size());
46 Assert.assertEquals("w=This", features.get(0));
45 Assertions.assertEquals(1, features.size());
46 Assertions.assertEquals("w=This", features.get(0));
4747 }
4848
4949 @Test
50 public void testLowerCase() {
50 void testLowerCase() {
5151
5252 final int testTokenIndex = 0;
5353
5555
5656 generator.createFeatures(features, testSentence, testTokenIndex, null);
5757
58 Assert.assertEquals(1, features.size());
59 Assert.assertEquals("w=this", features.get(0));
58 Assertions.assertEquals(1, features.size());
59 Assertions.assertEquals("w=this", features.get(0));
6060 }
6161 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class TokenPatternFeatureGeneratorTest {
2727
2828 private List<String> features;
2929
30 @Before
31 public void setUp() throws Exception {
30 @BeforeEach
31 void setUp() {
3232 features = new ArrayList<>();
3333 }
3434
3535 @Test
36 public void testSingleToken() {
36 void testSingleToken() {
3737
3838 String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3939 final int testTokenIndex = 3;
4141 AdaptiveFeatureGenerator generator = new TokenPatternFeatureGenerator();
4242
4343 generator.createFeatures(features, testSentence, testTokenIndex, null);
44 Assert.assertEquals(1, features.size());
45 Assert.assertEquals("st=example", features.get(0));
44 Assertions.assertEquals(1, features.size());
45 Assertions.assertEquals("st=example", features.get(0));
4646 }
4747
4848 @Test
49 public void testSentence() {
49 void testSentence() {
5050
5151 String[] testSentence = new String[] {"This is an example sentence"};
5252 final int testTokenIndex = 0;
5454 AdaptiveFeatureGenerator generator = new TokenPatternFeatureGenerator();
5555
5656 generator.createFeatures(features, testSentence, testTokenIndex, null);
57 Assert.assertEquals(14, features.size());
58 Assert.assertEquals("stn=5", features.get(0));
59 Assert.assertEquals("pt2=iclc", features.get(1));
60 Assert.assertEquals("pt3=iclclc", features.get(2));
61 Assert.assertEquals("st=this", features.get(3));
62 Assert.assertEquals("pt2=lclc", features.get(4));
63 Assert.assertEquals("pt3=lclclc", features.get(5));
64 Assert.assertEquals("st=is", features.get(6));
65 Assert.assertEquals("pt2=lclc", features.get(7));
66 Assert.assertEquals("pt3=lclclc", features.get(8));
67 Assert.assertEquals("st=an", features.get(9));
68 Assert.assertEquals("pt2=lclc", features.get(10));
69 Assert.assertEquals("st=example", features.get(11));
70 Assert.assertEquals("st=sentence", features.get(12));
71 Assert.assertEquals("pta=iclclclclc", features.get(13));
57 Assertions.assertEquals(14, features.size());
58 Assertions.assertEquals("stn=5", features.get(0));
59 Assertions.assertEquals("pt2=iclc", features.get(1));
60 Assertions.assertEquals("pt3=iclclc", features.get(2));
61 Assertions.assertEquals("st=this", features.get(3));
62 Assertions.assertEquals("pt2=lclc", features.get(4));
63 Assertions.assertEquals("pt3=lclclc", features.get(5));
64 Assertions.assertEquals("st=is", features.get(6));
65 Assertions.assertEquals("pt2=lclc", features.get(7));
66 Assertions.assertEquals("pt3=lclclc", features.get(8));
67 Assertions.assertEquals("st=an", features.get(9));
68 Assertions.assertEquals("pt2=lclc", features.get(10));
69 Assertions.assertEquals("st=example", features.get(11));
70 Assertions.assertEquals("st=sentence", features.get(12));
71 Assertions.assertEquals("pta=iclclclclc", features.get(13));
7272 }
7373 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 public class TrigramNameFeatureGeneratorTest {
2727
2828 private List<String> features;
2929 static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
3030
31 @Before
32 public void setUp() throws Exception {
31 @BeforeEach
32 void setUp() {
3333 features = new ArrayList<>();
3434 }
3535
3636 @Test
37 public void testBegin() {
37 void testBegin() {
3838
3939 final int testTokenIndex = 0;
4040
4242
4343 generator.createFeatures(features, testSentence, testTokenIndex, null);
4444
45 Assert.assertEquals(2, features.size());
46 Assert.assertEquals("w,nw,nnw=This,is,an", features.get(0));
47 Assert.assertEquals("wc,nwc,nnwc=ic,lc,lc", features.get(1));
45 Assertions.assertEquals(2, features.size());
46 Assertions.assertEquals("w,nw,nnw=This,is,an", features.get(0));
47 Assertions.assertEquals("wc,nwc,nnwc=ic,lc,lc", features.get(1));
4848 }
4949
5050 @Test
51 public void testNextOfBegin() {
51 void testNextOfBegin() {
5252
5353 final int testTokenIndex = 1;
5454
5656
5757 generator.createFeatures(features, testSentence, testTokenIndex, null);
5858
59 Assert.assertEquals(2, features.size());
60 Assert.assertEquals("w,nw,nnw=is,an,example", features.get(0));
61 Assert.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(1));
59 Assertions.assertEquals(2, features.size());
60 Assertions.assertEquals("w,nw,nnw=is,an,example", features.get(0));
61 Assertions.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(1));
6262 }
6363
6464 @Test
65 public void testMiddle() {
65 void testMiddle() {
6666
6767 final int testTokenIndex = 2;
6868
7070
7171 generator.createFeatures(features, testSentence, testTokenIndex, null);
7272
73 Assert.assertEquals(4, features.size());
74 Assert.assertEquals("ppw,pw,w=This,is,an", features.get(0));
75 Assert.assertEquals("ppwc,pwc,wc=ic,lc,lc", features.get(1));
76 Assert.assertEquals("w,nw,nnw=an,example,sentence", features.get(2));
77 Assert.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(3));
73 Assertions.assertEquals(4, features.size());
74 Assertions.assertEquals("ppw,pw,w=This,is,an", features.get(0));
75 Assertions.assertEquals("ppwc,pwc,wc=ic,lc,lc", features.get(1));
76 Assertions.assertEquals("w,nw,nnw=an,example,sentence", features.get(2));
77 Assertions.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(3));
7878 }
7979
8080 @Test
81 public void testEnd() {
81 void testEnd() {
8282
8383 final int testTokenIndex = 4;
8484
8686
8787 generator.createFeatures(features, testSentence, testTokenIndex, null);
8888
89 Assert.assertEquals(2, features.size());
90 Assert.assertEquals("ppw,pw,w=an,example,sentence", features.get(0));
91 Assert.assertEquals("ppwc,pwc,wc=lc,lc,lc", features.get(1));
89 Assertions.assertEquals(2, features.size());
90 Assertions.assertEquals("ppw,pw,w=an,example,sentence", features.get(0));
91 Assertions.assertEquals("ppwc,pwc,wc=lc,lc,lc", features.get(1));
9292 }
9393
9494 @Test
95 public void testShort() {
95 void testShort() {
9696
9797 String[] shortSentence = new String[] {"I", "know", "it"};
9898
102102
103103 generator.createFeatures(features, shortSentence, testTokenIndex, null);
104104
105 Assert.assertEquals(0, features.size());
105 Assertions.assertEquals(0, features.size());
106106 }
107107 }
1919 import java.util.ArrayList;
2020 import java.util.List;
2121
22 import org.junit.Assert;
23 import org.junit.Before;
24 import org.junit.Test;
22 import org.junit.jupiter.api.Assertions;
23 import org.junit.jupiter.api.BeforeEach;
24 import org.junit.jupiter.api.Test;
2525
2626 /**
2727 * Test for the {@link WindowFeatureGenerator} class.
3333
3434 private List<String> features;
3535
36 @Before
37 public void setUp() throws Exception {
36 @BeforeEach
37 void setUp() {
3838 features = new ArrayList<>();
3939 }
4040
4343 * and next window size of zero.
4444 */
4545 @Test
46 public void testWithoutWindow() {
46 void testWithoutWindow() {
4747
4848 AdaptiveFeatureGenerator windowFeatureGenerator = new WindowFeatureGenerator(
49 new IdentityFeatureGenerator(), 0, 0);
49 new IdentityFeatureGenerator(), 0, 0);
5050
5151 int testTokenIndex = 2;
5252
5353 windowFeatureGenerator.createFeatures(features, testSentence, testTokenIndex, null);
5454
55 Assert.assertEquals(1, features.size());
55 Assertions.assertEquals(1, features.size());
5656
57 Assert.assertEquals("c", features.get(0));
57 Assertions.assertEquals("c", features.get(0));
5858 }
5959
6060 @Test
61 public void testWindowSizeOne() {
61 void testWindowSizeOne() {
6262 AdaptiveFeatureGenerator windowFeatureGenerator = new WindowFeatureGenerator(
6363 new IdentityFeatureGenerator(), 1, 1);
6464
6666
6767 windowFeatureGenerator.createFeatures(features, testSentence, testTokenIndex, null);
6868
69 Assert.assertEquals(3, features.size());
69 Assertions.assertEquals(3, features.size());
7070
71 Assert.assertEquals("c", features.get(0));
72 Assert.assertEquals("p1b", features.get(1));
73 Assert.assertEquals("n1d", features.get(2));
71 Assertions.assertEquals("c", features.get(0));
72 Assertions.assertEquals("p1b", features.get(1));
73 Assertions.assertEquals("n1d", features.get(2));
7474 }
7575
7676 @Test
77 public void testWindowAtBeginOfSentence() {
77 void testWindowAtBeginOfSentence() {
7878 AdaptiveFeatureGenerator windowFeatureGenerator = new WindowFeatureGenerator(
7979 new IdentityFeatureGenerator(), 1, 0);
8080
8181 int testTokenIndex = 0;
8282 windowFeatureGenerator.createFeatures(features, testSentence, testTokenIndex, null);
83 Assert.assertEquals(1, features.size());
84 Assert.assertEquals("a", features.get(0));
83 Assertions.assertEquals(1, features.size());
84 Assertions.assertEquals("a", features.get(0));
8585 }
8686
8787 @Test
88 public void testWindowAtEndOfSentence() {
88 void testWindowAtEndOfSentence() {
8989 AdaptiveFeatureGenerator windowFeatureGenerator = new WindowFeatureGenerator(
9090 new IdentityFeatureGenerator(), 0, 1);
9191
9292 int testTokenIndex = testSentence.length - 1;
9393 windowFeatureGenerator.createFeatures(features, testSentence, testTokenIndex, null);
94 Assert.assertEquals(1, features.size());
95 Assert.assertEquals("h", features.get(0));
94 Assertions.assertEquals(1, features.size());
95 Assertions.assertEquals("h", features.get(0));
9696 }
9797
9898 /**
9999 * Tests for a window size of previous and next 2 if the features are correct.
100100 */
101101 @Test
102 public void testForCorrectFeatures() {
102 void testForCorrectFeatures() {
103103 AdaptiveFeatureGenerator windowFeatureGenerator = new WindowFeatureGenerator(
104104 new IdentityFeatureGenerator(), 2, 2);
105105
106106 int testTokenIndex = 3;
107107 windowFeatureGenerator.createFeatures(features, testSentence, testTokenIndex, null);
108 Assert.assertEquals(5, features.size());
108 Assertions.assertEquals(5, features.size());
109109
110 Assert.assertEquals("d", features.get(0));
111 Assert.assertEquals("p1c", features.get(1));
112 Assert.assertEquals("p2b", features.get(2));
113 Assert.assertEquals("n1e", features.get(3));
114 Assert.assertEquals("n2f", features.get(4));
110 Assertions.assertEquals("d", features.get(0));
111 Assertions.assertEquals("p1c", features.get(1));
112 Assertions.assertEquals("p2b", features.get(2));
113 Assertions.assertEquals("n1e", features.get(3));
114 Assertions.assertEquals("n2f", features.get(4));
115115 }
116116 }
2222 import java.util.Arrays;
2323 import java.util.Random;
2424
25 import org.junit.Assert;
26 import org.junit.Test;
25 import org.junit.jupiter.api.Assertions;
26 import org.junit.jupiter.api.Test;
2727
2828 public class ByteArraySerializerTest {
2929
3030 @Test
31 public void testSerialization() throws IOException {
31 void testSerialization() throws IOException {
3232
3333 byte[] b = new byte[1024];
3434 new Random(23).nextBytes(b);
3636 ByteArraySerializer serializer = new ByteArraySerializer();
3737
3838 ByteArrayOutputStream bOut = new ByteArrayOutputStream();
39 serializer.serialize(Arrays.copyOf(b, b.length), bOut) ;
39 serializer.serialize(Arrays.copyOf(b, b.length), bOut);
4040
41 Assert.assertArrayEquals(b, bOut.toByteArray());
42 Assert.assertArrayEquals(b, serializer.create(new ByteArrayInputStream(b)));
41 Assertions.assertArrayEquals(b, bOut.toByteArray());
42 Assertions.assertArrayEquals(b, serializer.create(new ByteArrayInputStream(b)));
4343 }
4444 }
1616
1717 package opennlp.tools.util.normalizer;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222
2323 public class EmojiCharSequenceNormalizerTest {
2525 public EmojiCharSequenceNormalizer normalizer = EmojiCharSequenceNormalizer.getInstance();
2626
2727 @Test
28 public void normalizeEmoji() throws Exception {
28 void normalizeEmoji() {
2929
3030 String s = new StringBuilder()
3131 .append("Any funny text goes here ")
3535 .append(" ")
3636 .appendCodePoint(0x1F61B)
3737 .toString();
38 Assert.assertEquals(
38 Assertions.assertEquals(
3939 "Any funny text goes here ", normalizer.normalize(s));
4040 }
4141
1515 */
1616 package opennlp.tools.util.normalizer;
1717
18 import org.junit.Assert;
19 import org.junit.Test;
18 import org.junit.jupiter.api.Assertions;
19 import org.junit.jupiter.api.Test;
2020
2121
2222 public class NumberCharSequenceNormalizerTest {
2525
2626
2727 @Test
28 public void normalize() throws Exception {
29 Assert.assertEquals("absc , abcd", normalizer.normalize("absc 123,0123 abcd"));
28 void normalize() {
29 Assertions.assertEquals("absc , abcd", normalizer.normalize("absc 123,0123 abcd"));
3030 }
3131 }
1616
1717 package opennlp.tools.util.normalizer;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222
2323 public class ShrinkCharSequenceNormalizerTest {
2525 public ShrinkCharSequenceNormalizer normalizer = ShrinkCharSequenceNormalizer.getInstance();
2626
2727 @Test
28 public void normalizeSpace() throws Exception {
29 Assert.assertEquals(
28 void normalizeSpace() {
29 Assertions.assertEquals(
3030 "a text extra space", normalizer.normalize("a text extra space"));
3131 }
3232
3333 @Test
34 public void normalizeChar() throws Exception {
35 Assert.assertEquals("Helloo", normalizer.normalize("Helllllloooooo"));
36 Assert.assertEquals("Hello", normalizer.normalize("Hello"));
37 Assert.assertEquals("HHello", normalizer.normalize("HHello"));
34 void normalizeChar() {
35 Assertions.assertEquals("Helloo", normalizer.normalize("Helllllloooooo"));
36 Assertions.assertEquals("Hello", normalizer.normalize("Hello"));
37 Assertions.assertEquals("HHello", normalizer.normalize("HHello"));
3838 }
3939
4040 }
1616
1717 package opennlp.tools.util.normalizer;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222
2323 public class TwitterCharSequenceNormalizerTest {
2525 public TwitterCharSequenceNormalizer normalizer = TwitterCharSequenceNormalizer.getInstance();
2626
2727 @Test
28 public void normalizeHashtag() throws Exception {
29 Assert.assertEquals("asdf 2nnfdf", normalizer.normalize("asdf #hasdk23 2nnfdf"));
28 void normalizeHashtag() {
29 Assertions.assertEquals("asdf 2nnfdf", normalizer.normalize("asdf #hasdk23 2nnfdf"));
3030 }
3131
3232 @Test
33 public void normalizeUser() throws Exception {
34 Assert.assertEquals("asdf 2nnfdf", normalizer.normalize("asdf @hasdk23 2nnfdf"));
33 void normalizeUser() {
34 Assertions.assertEquals("asdf 2nnfdf", normalizer.normalize("asdf @hasdk23 2nnfdf"));
3535 }
3636
3737 @Test
38 public void normalizeRT() throws Exception {
39 Assert.assertEquals(" 2nnfdf", normalizer.normalize("RT RT RT 2nnfdf"));
38 void normalizeRT() {
39 Assertions.assertEquals(" 2nnfdf", normalizer.normalize("RT RT RT 2nnfdf"));
4040 }
4141
4242 @Test
43 public void normalizeLaugh() throws Exception {
44 Assert.assertEquals("ahahah", normalizer.normalize("ahahahah"));
45 Assert.assertEquals("haha", normalizer.normalize("hahha"));
46 Assert.assertEquals("haha", normalizer.normalize("hahaa"));
47 Assert.assertEquals("ahaha", normalizer.normalize("ahahahahhahahhahahaaaa"));
48 Assert.assertEquals("jaja", normalizer.normalize("jajjajajaja"));
43 void normalizeLaugh() {
44 Assertions.assertEquals("ahahah", normalizer.normalize("ahahahah"));
45 Assertions.assertEquals("haha", normalizer.normalize("hahha"));
46 Assertions.assertEquals("haha", normalizer.normalize("hahaa"));
47 Assertions.assertEquals("ahaha", normalizer.normalize("ahahahahhahahhahahaaaa"));
48 Assertions.assertEquals("jaja", normalizer.normalize("jajjajajaja"));
4949 }
5050
5151
52
5352 @Test
54 public void normalizeFace() throws Exception {
55 Assert.assertEquals("hello hello", normalizer.normalize("hello :-) hello"));
56 Assert.assertEquals("hello hello", normalizer.normalize("hello ;) hello"));
57 Assert.assertEquals(" hello", normalizer.normalize(":) hello"));
58 Assert.assertEquals("hello ", normalizer.normalize("hello :P"));
53 void normalizeFace() {
54 Assertions.assertEquals("hello hello", normalizer.normalize("hello :-) hello"));
55 Assertions.assertEquals("hello hello", normalizer.normalize("hello ;) hello"));
56 Assertions.assertEquals(" hello", normalizer.normalize(":) hello"));
57 Assertions.assertEquals("hello ", normalizer.normalize("hello :P"));
5958 }
6059
6160 }
1616
1717 package opennlp.tools.util.normalizer;
1818
19 import org.junit.Assert;
20 import org.junit.Test;
19 import org.junit.jupiter.api.Assertions;
20 import org.junit.jupiter.api.Test;
2121
2222
2323 public class UrlCharSequenceNormalizerTest {
2525 public UrlCharSequenceNormalizer normalizer = UrlCharSequenceNormalizer.getInstance();
2626
2727 @Test
28 public void normalizeUrl() throws Exception {
29 Assert.assertEquals(
28 void normalizeUrl() {
29 Assertions.assertEquals(
3030 "asdf 2nnfdf", normalizer.normalize("asdf http://asdf.com/dfa/cxs 2nnfdf"));
3131
3232
33 Assert.assertEquals(
33 Assertions.assertEquals(
3434 "asdf 2nnfdf ", normalizer.normalize("asdf http://asdf.com/dfa/cx" +
3535 "s 2nnfdf http://asdf.com/dfa/cxs"));
3636 }
3737
3838 @Test
39 public void normalizeEmail() throws Exception {
40 Assert.assertEquals(
39 void normalizeEmail() {
40 Assertions.assertEquals(
4141 "asdf 2nnfdf", normalizer.normalize("asdf asd.fdfa@hasdk23.com.br 2nnfdf"));
42 Assert.assertEquals(
42 Assertions.assertEquals(
4343 "asdf 2nnfdf ", normalizer.normalize("asdf asd.fdfa@hasdk23.com.br" +
4444 " 2nnfdf asd.fdfa@hasdk23.com.br"));
45 Assert.assertEquals(
45 Assertions.assertEquals(
4646 "asdf 2nnfdf", normalizer.normalize("asdf asd+fdfa@hasdk23.com.br 2nnfdf"));
47 Assert.assertEquals(
47 Assertions.assertEquals(
4848 "asdf _br 2nnfdf", normalizer.normalize("asdf asd.fdfa@hasdk23.com_br 2nnfdf"));
4949 }
5050 }
2424 <parent>
2525 <groupId>org.apache.opennlp</groupId>
2626 <artifactId>opennlp</artifactId>
27 <version>2.0.0</version>
27 <version>2.1.0</version>
2828 <relativePath>../pom.xml</relativePath>
2929 </parent>
3030
6262 </dependency>
6363
6464 <dependency>
65 <groupId>junit</groupId>
66 <artifactId>junit</artifactId>
65 <groupId>org.junit.jupiter</groupId>
66 <artifactId>junit-jupiter-api</artifactId>
67 <scope>test</scope>
68 </dependency>
69
70 <dependency>
71 <groupId>org.junit.jupiter</groupId>
72 <artifactId>junit-jupiter-engine</artifactId>
6773 <scope>test</scope>
6874 </dependency>
6975 </dependencies>
2727 import org.apache.uima.util.InvalidXMLException;
2828 import org.apache.uima.util.XMLInputSource;
2929
30 import org.junit.Assert;
30 import org.junit.jupiter.api.Assertions;
3131
3232 /**
3333 * Test for initialization of the opennlp.uima Annotators
5050 ae.process(cas);
5151 ae.reconfigure();
5252 } catch (Exception e) {
53 Assert.fail(e.getLocalizedMessage() + " for desc " + descName);
53 Assertions.fail(e.getLocalizedMessage() + " for desc " + descName);
5454 }
5555 }
5656 }
3333 import org.apache.uima.util.InvalidXMLException;
3434 import org.apache.uima.util.XMLInputSource;
3535
36 import org.junit.AfterClass;
37 import org.junit.Assert;
38 import org.junit.BeforeClass;
39 import org.junit.Test;
36 import org.junit.jupiter.api.AfterAll;
37 import org.junit.jupiter.api.Assertions;
38 import org.junit.jupiter.api.BeforeAll;
39 import org.junit.jupiter.api.Test;
4040
4141 import opennlp.tools.util.StringList;
4242 import opennlp.uima.util.CasUtil;
4747
4848 private static AnalysisEngine AE;
4949
50 @BeforeClass
50 @BeforeAll
5151 public static void beforeClass() throws Exception {
5252 AE = produceAE("DictionaryNameFinder.xml");
5353 }
5454
55 @AfterClass
55 @AfterAll
5656 public static void afterClass() {
5757 AE.destroy(); // is this necessary?
5858 }
7373 DictionaryResource dic = (DictionaryResource) AE.getResourceManager()
7474 .getResource("/opennlp.uima.Dictionary");
7575 // simple check if ordering always is the same...
76 Assert.assertEquals(
76 Assertions.assertEquals(
7777 "[[Berlin], [Stockholm], [New,York], [London], [Copenhagen], [Paris]]",
7878 dic.getDictionary().toString());
7979 // else we can do a simple test like this
80 Assert.assertEquals("There should be six entries in the dictionary", 6,
81 dic.getDictionary().asStringSet().size());
82 Assert.assertTrue("London should be in the dictionary",
83 dic.getDictionary().contains(new StringList("London")));
80 Assertions.assertEquals(6,
81 dic.getDictionary().asStringSet().size(), "There should be six entries in the dictionary");
82 Assertions.assertTrue(dic.getDictionary().contains(new StringList("London")),
83 "London should be in the dictionary");
8484 } catch (Exception e) {
85 Assert.fail("Dictionary was not loaded.");
85 Assertions.fail("Dictionary was not loaded.");
8686 }
8787
8888 }
105105
106106 while (locationIterator.isValid()) {
107107 AnnotationFS annotationFS = locationIterator.get();
108 Assert.assertTrue(expectedLocations.contains(annotationFS.getCoveredText()));
108 Assertions.assertTrue(expectedLocations.contains(annotationFS.getCoveredText()));
109109 expectedLocations.remove(annotationFS.getCoveredText());
110110 locationIterator.moveToNext();
111111 }
112 Assert.assertEquals(0, expectedLocations.size());
112 Assertions.assertEquals(0, expectedLocations.size());
113113 } catch (Exception e) {
114114 e.printStackTrace();
115 Assert.fail(e.getLocalizedMessage());
115 Assertions.fail(e.getLocalizedMessage());
116116 }
117117
118118 }
2626 import org.apache.uima.cas.text.AnnotationFS;
2727 import org.apache.uima.resource.metadata.TypeSystemDescription;
2828
29 import org.junit.Assert;
30 import org.junit.Test;
29 import org.junit.jupiter.api.Assertions;
30 import org.junit.jupiter.api.Test;
3131
3232 public class AnnotationComboIteratorTest {
3333
7171 tokensBySentence.add(tokens);
7272 }
7373
74 Assert.assertEquals(Collections.singletonList("A"), tokensBySentence.get(0));
75 Assert.assertEquals(Arrays.asList("H", "I"), tokensBySentence.get(1));
74 Assertions.assertEquals(Collections.singletonList("A"), tokensBySentence.get(0));
75 Assertions.assertEquals(Arrays.asList("H", "I"), tokensBySentence.get(1));
7676 }
7777
7878 }
3030
3131 <groupId>org.apache.opennlp</groupId>
3232 <artifactId>opennlp</artifactId>
33 <version>2.0.0</version>
33 <version>2.1.0</version>
3434 <packaging>pom</packaging>
3535
3636 <name>Apache OpenNLP Reactor</name>
3939 <connection>scm:git:https://github.com/apache/opennlp.git</connection>
4040 <developerConnection>scm:git:git@github.com:apache/opennlp.git</developerConnection>
4141 <url>https://github.com/apache/opennlp.git</url>
42 <tag>2.0.0</tag>
42 <tag>opennlp-2.1.0</tag>
4343 </scm>
4444
4545 <repositories>
9292
9393 <dependencyManagement>
9494 <dependencies>
95
9596 <dependency>
96 <groupId>junit</groupId>
97 <artifactId>junit</artifactId>
97 <groupId>org.junit.jupiter</groupId>
98 <artifactId>junit-jupiter-api</artifactId>
99 <version>${junit.version}</version>
100 <scope>test</scope>
101 </dependency>
102
103 <dependency>
104 <groupId>org.junit.jupiter</groupId>
105 <artifactId>junit-jupiter-engine</artifactId>
98106 <version>${junit.version}</version>
99107 <scope>test</scope>
100108 </dependency>
140148 <commons.io.version>2.6</commons.io.version>
141149 <enforcer.plugin.version>3.0.0-M2</enforcer.plugin.version>
142150 <glassfish.version>2.30.1</glassfish.version>
143 <junit.version>4.13.1</junit.version>
151 <junit.version>5.9.1</junit.version>
144152 <morfologik.version>2.1.7</morfologik.version>
145153 <osgi.version>4.2.0</osgi.version>
146154 <checkstyle.plugin.version>2.17</checkstyle.plugin.version>
406414 <configuration>
407415 <rules>
408416 <requireJavaVersion>
409 <message>Java 8 or higher is required to compile this module</message>
417 <message>Java 11 or higher is required to compile this module</message>
410418 <version>[${java.version},)</version>
411419 </requireJavaVersion>
412420 <requireMavenVersion>