Import upstream version 0.0~git20190408.0.01ec452, md5 eaf728835f9afeb014fa23f4db014449
Debian Janitor
4 years ago
0 | # Compiled Object files, Static and Dynamic libs (Shared Objects) | |
1 | *.o | |
2 | *.a | |
3 | *.so | |
4 | ||
5 | # Folders | |
6 | _obj | |
7 | _test | |
8 | ||
9 | # Architecture specific extensions/prefixes | |
10 | *.[568vq] | |
11 | [568vq].out | |
12 | ||
13 | *.cgo1.go | |
14 | *.cgo2.c | |
15 | _cgo_defun.c | |
16 | _cgo_gotypes.go | |
17 | _cgo_export.* | |
18 | ||
19 | _testmain.go | |
20 | ||
21 | *.exe | |
22 | *.test | |
23 | *.prof |
3 | 3 | [![Build Status](https://travis-ci.org/jaytaylor/html2text.svg?branch=master)](https://travis-ci.org/jaytaylor/html2text) |
4 | 4 | [![Report Card](https://goreportcard.com/badge/github.com/jaytaylor/html2text)](https://goreportcard.com/report/github.com/jaytaylor/html2text) |
5 | 5 | |
6 | ### Converts HTML into text | |
6 | ### Converts HTML into text of the markdown-flavored variety | |
7 | 7 | |
8 | 8 | |
9 | 9 | ## Introduction |
10 | 10 | |
11 | 11 | Ensure your emails are readable by all! |
12 | 12 | |
13 | Turns HTML into raw text, useful for sending fancy HTML emails with a equivalently nicely formatted TXT document as a fallback (e.g. for people who don't allow HTML emails or have other display issues). | |
13 | Turns HTML into raw text, useful for sending fancy HTML emails with an equivalently nicely formatted TXT document as a fallback (e.g. for people who don't allow HTML emails or have other display issues). | |
14 | 14 | |
15 | 15 | html2text is a simple golang package for rendering HTML into plaintext. |
16 | 16 | |
22 | 22 | ## Download the package |
23 | 23 | |
24 | 24 | ```bash |
25 | go get github.com/jaytaylor/html2text | |
25 | go get jaytaylor.com/html2text | |
26 | 26 | ``` |
27 | 27 | |
28 | 28 | ## Example usage |
33 | 33 | import ( |
34 | 34 | "fmt" |
35 | 35 | |
36 | "github.com/jaytaylor/html2text" | |
36 | "jaytaylor.com/html2text" | |
37 | 37 | ) |
38 | 38 | |
39 | 39 | func main() { |
77 | 77 | </body> |
78 | 78 | </html>` |
79 | 79 | |
80 | text, err := FromString(inputHTML, Options{PrettyTables: true}) | |
80 | text, err := html2text.FromString(inputHTML, html2text.Options{PrettyTables: true}) | |
81 | 81 | if err != nil { |
82 | 82 | panic(err) |
83 | 83 | } |
14 | 14 | |
15 | 15 | // Options provide toggles and overrides to control specific rendering behaviors. |
16 | 16 | type Options struct { |
17 | PrettyTables bool // Turns on pretty ASCII rendering for table elements. | |
17 | PrettyTables bool // Turns on pretty ASCII rendering for table elements. | |
18 | PrettyTablesOptions *PrettyTablesOptions // Configures pretty ASCII rendering for table elements. | |
19 | OmitLinks bool // Turns on omitting links | |
20 | } | |
21 | ||
22 | // PrettyTablesOptions overrides tablewriter behaviors | |
23 | type PrettyTablesOptions struct { | |
24 | AutoFormatHeader bool | |
25 | AutoWrapText bool | |
26 | ReflowDuringAutoWrap bool | |
27 | ColWidth int | |
28 | ColumnSeparator string | |
29 | RowSeparator string | |
30 | CenterSeparator string | |
31 | HeaderAlignment int | |
32 | FooterAlignment int | |
33 | Alignment int | |
34 | ColumnAlignment []int | |
35 | NewLine string | |
36 | HeaderLine bool | |
37 | RowLine bool | |
38 | AutoMergeCells bool | |
39 | Borders tablewriter.Border | |
40 | } | |
41 | ||
42 | // NewPrettyTablesOptions creates PrettyTablesOptions with default settings | |
43 | func NewPrettyTablesOptions() *PrettyTablesOptions { | |
44 | return &PrettyTablesOptions{ | |
45 | AutoFormatHeader: true, | |
46 | AutoWrapText: true, | |
47 | ReflowDuringAutoWrap: true, | |
48 | ColWidth: tablewriter.MAX_ROW_WIDTH, | |
49 | ColumnSeparator: tablewriter.COLUMN, | |
50 | RowSeparator: tablewriter.ROW, | |
51 | CenterSeparator: tablewriter.CENTER, | |
52 | HeaderAlignment: tablewriter.ALIGN_DEFAULT, | |
53 | FooterAlignment: tablewriter.ALIGN_DEFAULT, | |
54 | Alignment: tablewriter.ALIGN_DEFAULT, | |
55 | ColumnAlignment: []int{}, | |
56 | NewLine: tablewriter.NEWLINE, | |
57 | HeaderLine: true, | |
58 | RowLine: false, | |
59 | AutoMergeCells: false, | |
60 | Borders: tablewriter.Border{Left: true, Right: true, Bottom: true, Top: true}, | |
61 | } | |
18 | 62 | } |
19 | 63 | |
20 | 64 | // FromHTMLNode renders text output from a pre-parsed HTML document. |
78 | 122 | justClosedDiv bool |
79 | 123 | blockquoteLevel int |
80 | 124 | lineLength int |
125 | isPre bool | |
81 | 126 | } |
82 | 127 | |
83 | 128 | // tableTraverseContext holds table ASCII-form related context. |
208 | 253 | if attrVal := getAttrVal(node, "href"); attrVal != "" { |
209 | 254 | attrVal = ctx.normalizeHrefLink(attrVal) |
210 | 255 | // Don't print link href if it matches link element content or if the link is empty. |
211 | if attrVal != "" && linkText != attrVal { | |
256 | if !ctx.options.OmitLinks && attrVal != "" && linkText != attrVal { | |
212 | 257 | hrefLink = "( " + attrVal + " )" |
213 | 258 | } |
214 | 259 | } |
226 | 271 | } |
227 | 272 | return ctx.traverseChildren(node) |
228 | 273 | |
274 | case atom.Pre: | |
275 | ctx.isPre = true | |
276 | err := ctx.traverseChildren(node) | |
277 | ctx.isPre = false | |
278 | return err | |
279 | ||
229 | 280 | case atom.Style, atom.Script, atom.Head: |
230 | 281 | // Ignore the subtree. |
231 | 282 | return nil |
268 | 319 | |
269 | 320 | buf := &bytes.Buffer{} |
270 | 321 | table := tablewriter.NewWriter(buf) |
322 | if ctx.options.PrettyTablesOptions != nil { | |
323 | options := ctx.options.PrettyTablesOptions | |
324 | table.SetAutoFormatHeaders(options.AutoFormatHeader) | |
325 | table.SetAutoWrapText(options.AutoWrapText) | |
326 | table.SetReflowDuringAutoWrap(options.ReflowDuringAutoWrap) | |
327 | table.SetColWidth(options.ColWidth) | |
328 | table.SetColumnSeparator(options.ColumnSeparator) | |
329 | table.SetRowSeparator(options.RowSeparator) | |
330 | table.SetCenterSeparator(options.CenterSeparator) | |
331 | table.SetHeaderAlignment(options.HeaderAlignment) | |
332 | table.SetFooterAlignment(options.FooterAlignment) | |
333 | table.SetAlignment(options.Alignment) | |
334 | table.SetColumnAlignment(options.ColumnAlignment) | |
335 | table.SetNewLine(options.NewLine) | |
336 | table.SetHeaderLine(options.HeaderLine) | |
337 | table.SetRowLine(options.RowLine) | |
338 | table.SetAutoMergeCells(options.AutoMergeCells) | |
339 | table.SetBorders(options.Borders) | |
340 | } | |
271 | 341 | table.SetHeader(ctx.tableCtx.header) |
272 | 342 | table.SetFooter(ctx.tableCtx.footer) |
273 | 343 | table.AppendBulk(ctx.tableCtx.body) |
324 | 394 | return ctx.traverseChildren(node) |
325 | 395 | |
326 | 396 | case html.TextNode: |
327 | data := strings.Trim(spacingRe.ReplaceAllString(node.Data, " "), " ") | |
397 | var data string | |
398 | if ctx.isPre { | |
399 | data = node.Data | |
400 | } else { | |
401 | data = strings.TrimSpace(spacingRe.ReplaceAllString(node.Data, " ")) | |
402 | } | |
328 | 403 | return ctx.emit(data) |
329 | 404 | |
330 | 405 | case html.ElementNode: |
138 | 138 | "Test text<br><BR />Test text", |
139 | 139 | "Test text\n\nTest text", |
140 | 140 | }, |
141 | { | |
142 | "<pre>test1\ntest 2\n\ntest 3</pre>", | |
143 | "test1\ntest 2\n\ntest 3", | |
144 | }, | |
141 | 145 | } |
142 | 146 | |
143 | 147 | for _, testCase := range testCases { |
184 | 188 | { |
185 | 189 | `<table> |
186 | 190 | <tbody> |
187 | <tr><td><p>Row-1-Col-1-Msg1</p><p>Row-1-Col-1-Msg2</p></td><td>Row-1-Col-2</td></tr> | |
191 | <tr><td><p>Row-1-Col-1-Msg123456789012345</p><p>Row-1-Col-1-Msg2</p></td><td>Row-1-Col-2</td></tr> | |
188 | 192 | <tr><td>Row-2-Col-1</td><td>Row-2-Col-2</td></tr> |
189 | 193 | </tbody> |
190 | 194 | </table>`, |
191 | 195 | // +--------------------------------+-------------+ |
192 | // | Row-1-Col-1-Msg1 | Row-1-Col-2 | | |
196 | // | Row-1-Col-1-Msg123456789012345 | Row-1-Col-2 | | |
193 | 197 | // | Row-1-Col-1-Msg2 | | |
194 | 198 | // | Row-2-Col-1 | Row-2-Col-2 | |
195 | 199 | // +--------------------------------+-------------+ |
196 | 200 | `+--------------------------------+-------------+ |
197 | | Row-1-Col-1-Msg1 | Row-1-Col-2 | | |
201 | | Row-1-Col-1-Msg123456789012345 | Row-1-Col-2 | | |
198 | 202 | | Row-1-Col-1-Msg2 | | |
199 | 203 | | Row-2-Col-1 | Row-2-Col-2 | |
200 | 204 | +--------------------------------+-------------+`, |
201 | `Row-1-Col-1-Msg1 | |
205 | `Row-1-Col-1-Msg123456789012345 | |
202 | 206 | |
203 | 207 | Row-1-Col-1-Msg2 |
204 | 208 | |
240 | 244 | "Header 1 Header 2 Footer 1 Footer 2 Row 1 Col 1 Row 1 Col 2 Row 2 Col 1 Row 2 Col 2", |
241 | 245 | }, |
242 | 246 | // Two tables in same HTML (goal is to test that context is |
243 | // reinitalized correctly). | |
247 | // reinitialized correctly). | |
244 | 248 | { |
245 | 249 | `<p> |
246 | 250 | <table> |
329 | 333 | |
330 | 334 | for _, testCase := range testCases { |
331 | 335 | options := Options{ |
332 | PrettyTables: true, | |
336 | PrettyTables: true, | |
337 | PrettyTablesOptions: NewPrettyTablesOptions(), | |
333 | 338 | } |
334 | 339 | // Check pretty tabular ASCII version. |
335 | 340 | if msg, err := wantString(testCase.input, testCase.tabularOutput, options); err != nil { |
467 | 472 | } |
468 | 473 | } |
469 | 474 | |
470 | func TestImageAltTags(t *testing.T) { | |
475 | func TestOmitLinks(t *testing.T) { | |
471 | 476 | testCases := []struct { |
472 | 477 | input string |
473 | 478 | output string |
474 | 479 | }{ |
475 | 480 | { |
481 | `<a></a>`, | |
482 | ``, | |
483 | }, | |
484 | { | |
485 | `<a href=""></a>`, | |
486 | ``, | |
487 | }, | |
488 | { | |
489 | `<a href="http://example.com/"></a>`, | |
490 | ``, | |
491 | }, | |
492 | { | |
493 | `<a href="">Link</a>`, | |
494 | `Link`, | |
495 | }, | |
496 | { | |
497 | `<a href="http://example.com/">Link</a>`, | |
498 | `Link`, | |
499 | }, | |
500 | { | |
501 | `<a href="http://example.com/"><span class="a">Link</span></a>`, | |
502 | `Link`, | |
503 | }, | |
504 | { | |
505 | "<a href='http://example.com/'>\n\t<span class='a'>Link</span>\n\t</a>", | |
506 | `Link`, | |
507 | }, | |
508 | { | |
509 | `<a href="http://example.com/"><img src="http://example.ru/hello.jpg" alt="Example"></a>`, | |
510 | `Example`, | |
511 | }, | |
512 | } | |
513 | ||
514 | for _, testCase := range testCases { | |
515 | if msg, err := wantString(testCase.input, testCase.output, Options{OmitLinks: true}); err != nil { | |
516 | t.Error(err) | |
517 | } else if len(msg) > 0 { | |
518 | t.Log(msg) | |
519 | } | |
520 | } | |
521 | } | |
522 | ||
523 | func TestImageAltTags(t *testing.T) { | |
524 | testCases := []struct { | |
525 | input string | |
526 | output string | |
527 | }{ | |
528 | { | |
476 | 529 | `<img />`, |
477 | 530 | ``, |
478 | 531 | }, |
614 | 667 | { |
615 | 668 | "Test 1<div>Test 2</div> <div>Test 3</div>Test 4", |
616 | 669 | "Test 1\nTest 2\nTest 3\nTest 4", |
670 | }, | |
671 | { | |
672 | "Test 1<div> Test 2 </div>", | |
673 | "Test 1\nTest 2", | |
617 | 674 | }, |
618 | 675 | } |
619 | 676 | |
743 | 800 | `hi |
744 | 801 | |
745 | 802 | <br> |
746 | ||
803 | ||
747 | 804 | hello <a href="https://google.com">google</a> |
748 | 805 | <br><br> |
749 | 806 | test<p>List:</p> |