New upstream version 2.8.0
Pirate Praveen
6 years ago
0 | 0 | language: ruby |
1 | sudo: false | |
2 | cache: bundler | |
3 | bundler_args: --path ../../vendor/bundle | |
1 | 4 | |
2 | before_install: | |
3 | - sudo apt-get update -qq | |
4 | - sudo apt-get install -qq libicu-dev | |
5 | addons: | |
6 | apt: | |
7 | sources: | |
8 | - libicu-dev | |
9 | - kalakris-cmake | |
10 | packages: | |
11 | - cmake | |
5 | 12 | |
6 | script: "bundle exec rake" | |
13 | script: bundle exec rake | |
14 | ||
15 | gemfile: | |
16 | - gemfiles/rails_5.gemfile | |
17 | - gemfiles/rails_4.gemfile | |
18 | - gemfiles/rails_3.gemfile | |
7 | 19 | |
8 | 20 | rvm: |
9 | - 1.9.2 | |
10 | - 1.9.3 | |
11 | - 2.0.0 | |
12 | - 2.1.1 | |
13 | - ree | |
21 | - 2.4.0 | |
22 | - 2.3.1 | |
23 | - 2.2.5 | |
24 | - ruby-head | |
14 | 25 | |
15 | 26 | matrix: |
16 | 27 | fast_finish: true |
17 | 28 | allow_failures: |
18 | - rvm: ree | |
29 | - rvm: ruby-head | |
30 | exclude: | |
31 | - gemfile: gemfiles/rails_4.gemfile | |
32 | rvm: 2.4.0 | |
33 | - gemfile: gemfiles/rails_3.gemfile | |
34 | rvm: 2.4.0 |
0 | appraise 'rails-3' do | |
1 | gem 'rack', '< 2' | |
2 | gem 'rails', '3.2.22.2' | |
3 | end | |
4 | ||
5 | appraise 'rails-4' do | |
6 | gem 'rack', '< 2' | |
7 | gem 'rails', '~> 4.2.6' | |
8 | end | |
9 | ||
10 | appraise 'rails-5' do | |
11 | gem 'rails', '~> 5.0.0' | |
12 | end |
0 | 0 | # CHANGELOG |
1 | 1 | |
2 | ## 2.6.0 | |
3 | * Switch from github-markdown to CommonMark #274 | |
4 | * Fixed a few warnings | |
5 | ||
6 | ## 2.5.0 | |
7 | ||
8 | * Ruby 2.4 support. Backwards compatible, but bumped minor version so projects can choose to lock at older version [#268](https://github.com/jch/html-pipeline/pull/268) | |
9 | ||
10 | ## 2.4.2 | |
11 | ||
12 | * Make EmojiFilter generated img tag HTML attributes configurable [#258](https://github.com/jch/html-pipeline/pull/258) | |
13 | ||
14 | ## 2.4.1 | |
15 | ||
16 | * Regression in EmailReplyPipeline: unfiltered content is being omitted [#253](https://github.com/jch/html-pipeline/pull/253) | |
17 | ||
18 | ## 2.4.0 | |
19 | ||
20 | * Optionally filter email addresses [#247](https://github.com/jch/html-pipeline/pull/247) | |
21 | ||
22 | ## 2.3.0 | |
23 | ||
24 | * Add option to pass in an anchor icon, instead of using octicons [#244](https://github.com/jch/html-pipeline/pull/244) | |
25 | ||
26 | ## 2.2.4 | |
27 | ||
28 | * Use entire namespace so MissingDependencyError constant is resolved [#243](https://github.com/jch/html-pipeline/pull/243) | |
29 | ||
30 | ## 2.2.3 | |
31 | ||
32 | * raise MissingDependencyError instead of aborting on missing dependency [#241](https://github.com/jch/html-pipeline/pull/241) | |
33 | * Fix typo [#239](https://github.com/jch/html-pipeline/pull/239) | |
34 | * Test against Ruby 2.3.0 on Travis CI [#238](https://github.com/jch/html-pipeline/pull/238) | |
35 | * use travis containers [#237](https://github.com/jch/html-pipeline/pull/237) | |
36 | ||
37 | ## 2.2.2 | |
38 | ||
39 | * Fix for calling mention_link_filter with only one argument [#230](https://github.com/jch/html-pipeline/pull/230) | |
40 | * Add html-pipeline-linkify_github to 3rd Party Extensions in README [#228](https://github.com/jch/html-pipeline/pull/228) | |
41 | ||
42 | ## 2.2.1 | |
43 | ||
44 | * Soften Nokogiri dependency to versions ">= 1.4" [#208](https://github.com/jch/html-pipeline/pull/208) | |
45 | ||
46 | ## 2.2.0 | |
47 | ||
48 | * Only allow cite attribute on blockquote and restrict schemes [#223](https://github.com/jch/html-pipeline/pull/223) | |
49 | ||
50 | ## 2.1.0 | |
51 | ||
52 | * Whitelist schemes for longdesc [#221](https://github.com/jch/html-pipeline/pull/221) | |
53 | * Extract emoji image tag generation to own method [#195](https://github.com/jch/html-pipeline/pull/195) | |
54 | * Update README.md [#211](https://github.com/jch/html-pipeline/pull/211) | |
55 | * Add ImageFilter for image url to img tag conversion [#207](https://github.com/jch/html-pipeline/pull/207) | |
56 | ||
57 | ## 2.0 | |
58 | ||
59 | **New** | |
60 | ||
61 | * Implement new EmojiFilter context option: ignored_ancestor_tags to accept more ignored tags. [#170](https://github.com/jch/html-pipeline/pull/170) @JuanitoFatas | |
62 | * Add GitHub flavor Markdown Task List extension [#162](https://github.com/jch/html-pipeline/pull/162) @simeonwillbanks | |
63 | * @mention allow for custom regex to identify usernames. [#157](https://github.com/jch/html-pipeline/pull/157) @brittballard | |
64 | * EmojiFilter now requires gemoji ~> 2. [#159](https://github.com/jch/html-pipeline/pull/159) @jch | |
65 | ||
66 | **Changes** | |
67 | ||
68 | * Restrict nokogiri to >= 1.4, <= 1.6.5 [#176](https://github.com/jch/html-pipeline/pull/176) @simeonwillbanks | |
69 | * MentionFilter#link_to_mentioned_user: Replace String introspection with Regexp match [#172](https://github.com/jch/html-pipeline/pull/172) @simeonwillbanks | |
70 | * Whitelist summary and details element. [#171](https://github.com/jch/html-pipeline/pull/171) @JuanitoFatas | |
71 | * Support ~login for MentionFilter. [#167](https://github.com/jch/html-pipeline/pull/167) @JuanitoFatas | |
72 | * Revert "Search for text nodes on DocumentFragments without root tags" [#158](https://github.com/jch/html-pipeline/pull/158) @jch | |
73 | * Drop support for ruby ree, 1.9.2, 1.9.3 [#156](https://github.com/jch/html-pipeline/pull/156) @jch | |
74 | * Skip EmojiFilter in `<tt>` tags [#147](https://github.com/jch/html-pipeline/pull/147) @moskvax | |
75 | * Use Linguist lexers [#153](https://github.com/jch/html-pipeline/pull/153) @pchaigno | |
76 | * Constrain Active Support >= 2, < 5 [#180](https://github.com/jch/html-pipeline/pull/180) @jch | |
77 | ||
2 | 78 | ## 1.11.0 |
3 | 79 | |
4 | 80 | * Search for text nodes on DocumentFragments without root tags #146 Razer6 |
5 | * Don't filter @mentions in <style> tags #145 jch | |
81 | * Don't filter @mentions in `<style>` tags #145 jch | |
6 | 82 | * Prefer `http_url` in HttpsFilter. `base_url` still works. #142 bkeepers |
7 | 83 | * Remove duplicate check in EmojiFilter #141 Razer6 |
8 | 84 |
0 | source "https://rubygems.org" | |
0 | source 'https://rubygems.org' | |
1 | 1 | |
2 | 2 | # Specify your gem's dependencies in html-pipeline.gemspec |
3 | 3 | gemspec |
4 | 4 | |
5 | 5 | group :development do |
6 | gem "bundler" | |
7 | gem "rake" | |
6 | gem 'appraisal' | |
7 | gem 'bundler' | |
8 | gem 'rake' | |
8 | 9 | end |
9 | 10 | |
10 | 11 | group :test do |
11 | gem "minitest", "~> 5.3" | |
12 | gem "rinku", "~> 1.7", :require => false | |
13 | gem "gemoji", "~> 1.0", :require => false | |
14 | gem "RedCloth", "~> 4.2.9", :require => false | |
15 | gem "github-markdown", "~> 0.5", :require => false | |
16 | gem "email_reply_parser", "~> 0.5", :require => false | |
12 | gem 'commonmarker', '~> 0.16', require: false | |
13 | gem 'email_reply_parser', '~> 0.5', require: false | |
14 | gem 'gemoji', '~> 2.0', require: false | |
15 | gem 'minitest' | |
16 | gem 'RedCloth', '~> 4.2.9', require: false | |
17 | gem 'rinku', '~> 1.7', require: false | |
18 | gem 'sanitize', '~> 2.0', require: false | |
17 | 19 | |
18 | if RUBY_VERSION < "2.1.0" | |
19 | gem "escape_utils", "~> 0.3", :require => false | |
20 | gem "github-linguist", "~> 2.6.2", :require => false | |
21 | else | |
22 | gem "escape_utils", "~> 1.0", :require => false | |
23 | gem "github-linguist", "~> 2.10", :require => false | |
24 | end | |
25 | ||
26 | if RUBY_VERSION < "1.9.2" | |
27 | gem "sanitize", ">= 2", "< 2.0.4", :require => false | |
28 | gem "nokogiri", ">= 1.4", "< 1.6" | |
29 | else | |
30 | gem "sanitize", "~> 2.0", :require => false | |
31 | end | |
32 | ||
33 | if RUBY_VERSION < "1.9.3" | |
34 | gem "activesupport", ">= 2", "< 4" | |
35 | end | |
20 | gem 'escape_utils', '~> 1.0', require: false | |
21 | gem 'rouge', '~> 3.1', require: false | |
36 | 22 | end |
0 | # HTML::Pipeline [![Build Status](https://secure.travis-ci.org/jch/html-pipeline.png)](http://travis-ci.org/jch/html-pipeline) | |
0 | # HTML::Pipeline [![Build Status](https://travis-ci.org/jch/html-pipeline.svg?branch=master)](https://travis-ci.org/jch/html-pipeline) | |
1 | 1 | |
2 | 2 | GitHub HTML processing filters and utilities. This module includes a small |
3 | 3 | framework for defining DOM based content filters and applying them to user |
77 | 77 | ```html |
78 | 78 | <p>This is <em>great</em>:</p> |
79 | 79 | |
80 | <div class="highlight"> | |
81 | <pre><span class="n">some_code</span><span class="p">(</span><span class="ss">:first</span><span class="p">)</span> | |
82 | </pre> | |
83 | </div> | |
84 | ``` | |
85 | ||
86 | To generate CSS for HTML formatted code, use the [pygments.rb](https://github.com/tmm1/pygments.rb#usage) `#css` method. `pygments.rb` is a dependency of the `SyntaxHighlightFilter`. | |
80 | <pre><code>some_code(:first) | |
81 | </code></pre> | |
82 | ``` | |
83 | ||
84 | To generate CSS for HTML formatted code, use the [Rouge CSS Theme](https://github.com/jneen/rouge#css-theme-options) `#css` method. `rouge` is a dependency of the `SyntaxHighlightFilter`. | |
87 | 85 | |
88 | 86 | Some filters take an optional **context** and/or **result** hash. These are |
89 | 87 | used to pass around arguments and metadata between filters in a pipeline. For |
140 | 138 | # Pipelines aren't limited to the web. You can use them for email |
141 | 139 | # processing also. |
142 | 140 | HtmlEmailPipeline = Pipeline.new [ |
141 | PlainTextInputFilter, | |
143 | 142 | ImageMaxWidthFilter |
144 | 143 | ], {} |
145 | 144 | |
163 | 162 | * `MarkdownFilter` - convert markdown to html |
164 | 163 | * `PlainTextInputFilter` - html escape text and wrap the result in a div |
165 | 164 | * `SanitizationFilter` - whitelist sanitize user markup |
166 | * `SyntaxHighlightFilter` - [code syntax highlighter](#syntax-highlighting) | |
165 | * `SyntaxHighlightFilter` - code syntax highlighter | |
167 | 166 | * `TextileFilter` - convert textile to html |
168 | 167 | * `TableOfContentsFilter` - anchor headings with name attributes and generate Table of Contents html unordered list linking headings |
169 | 168 | |
171 | 170 | |
172 | 171 | Filter gem dependencies are not bundled; you must bundle the filter's gem |
173 | 172 | dependencies. The below list details filters with dependencies. For example, |
174 | `SyntaxHighlightFilter` uses [github-linguist](https://github.com/github/linguist) | |
173 | `SyntaxHighlightFilter` uses [rouge](https://github.com/jneen/rouge) | |
175 | 174 | to detect and highlight languages. For example, to use the `SyntaxHighlightFilter`, |
176 | 175 | add the following to your Gemfile: |
177 | 176 | |
178 | 177 | ```ruby |
179 | gem 'github-linguist' | |
178 | gem 'rouge' | |
180 | 179 | ``` |
181 | 180 | |
182 | 181 | * `AutolinkFilter` - `rinku` |
183 | 182 | * `EmailReplyFilter` - `escape_utils`, `email_reply_parser` |
184 | 183 | * `EmojiFilter` - `gemoji` |
185 | * `MarkdownFilter` - `github-markdown` | |
184 | * `MarkdownFilter` - `commonmarker` | |
186 | 185 | * `PlainTextInputFilter` - `escape_utils` |
187 | 186 | * `SanitizationFilter` - `sanitize` |
188 | * `SyntaxHighlightFilter` - `github-linguist` | |
187 | * `SyntaxHighlightFilter` - `rouge` | |
189 | 188 | * `TextileFilter` - `RedCloth` |
190 | 189 | |
191 | 190 | _Note:_ See [Gemfile](/Gemfile) `:test` block for version requirements. |
232 | 231 | whether the filter is a common enough use case to belong in this gem, or should be |
233 | 232 | built as an external gem. |
234 | 233 | |
235 | * [html-pipeline-asciidoc_filter](https://github.com/asciidoctor/html-pipeline-asciidoc_filter) - asciidoc support | |
234 | Here are some extensions people have built: | |
235 | ||
236 | * [html-pipeline-asciidoc_filter](https://github.com/asciidoctor/html-pipeline-asciidoc_filter) | |
237 | * [jekyll-html-pipeline](https://github.com/gjtorikian/jekyll-html-pipeline) | |
238 | * [nanoc-html-pipeline](https://github.com/burnto/nanoc-html-pipeline) | |
239 | * [html-pipeline-bitly](https://github.com/dewski/html-pipeline-bitly) | |
240 | * [html-pipeline-cite](https://github.com/lifted-studios/html-pipeline-cite) | |
241 | * [tilt-html-pipeline](https://github.com/bradgessler/tilt-html-pipeline) | |
242 | * [html-pipeline-wiki-link'](https://github.com/lifted-studios/html-pipeline-wiki-link) - WikiMedia-style wiki links | |
243 | * [task_list](https://github.com/github/task_list) - GitHub flavor Markdown Task List | |
244 | * [html-pipeline-nico_link](https://github.com/rutan/html-pipeline-nico_link) - An HTML::Pipeline filter for [niconico](http://www.nicovideo.jp) description links | |
245 | * [html-pipeline-gitlab](https://gitlab.com/gitlab-org/html-pipeline-gitlab) - This gem implements various filters for html-pipeline used by GitLab | |
246 | * [html-pipeline-youtube](https://github.com/st0012/html-pipeline-youtube) - An HTML::Pipeline filter for YouTube links | |
247 | * [html-pipeline-flickr](https://github.com/st0012/html-pipeline-flickr) - An HTML::Pipeline filter for Flickr links | |
248 | * [html-pipeline-vimeo](https://github.com/dlackty/html-pipeline-vimeo) - An HTML::Pipeline filter for Vimeo links | |
249 | * [html-pipeline-hashtag](https://github.com/mr-dxdy/html-pipeline-hashtag) - An HTML::Pipeline filter for hashtags | |
250 | * [html-pipeline-linkify_github](https://github.com/jollygoodcode/html-pipeline-linkify_github) - An HTML::Pipeline filter to autolink GitHub urls | |
251 | * [html-pipeline-redcarpet_filter](https://github.com/bmikol/html-pipeline-redcarpet_filter) - Render Markdown source text into Markdown HTML using Redcarpet | |
252 | * [html-pipeline-typogruby_filter](https://github.com/bmikol/html-pipeline-typogruby_filter) - Add Typogruby text filters to your HTML::Pipeline | |
253 | * [korgi](https://github.com/jodeci/korgi) - HTML::Pipeline filters for links to Rails resources | |
254 | ||
236 | 255 | |
237 | 256 | ## Instrumenting |
238 | 257 | |
239 | 258 | Filters and Pipelines can be set up to be instrumented when called. The pipeline |
240 | must be setup with an [ActiveSupport::Notifications] | |
241 | (http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html) | |
259 | must be setup with an | |
260 | [ActiveSupport::Notifications](http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html) | |
242 | 261 | compatible service object and a name. New pipeline objects will default to the |
243 | 262 | `HTML::Pipeline.default_instrumentation_service` object. |
244 | 263 | |
284 | 303 | end |
285 | 304 | ``` |
286 | 305 | |
306 | ## FAQ | |
307 | ||
308 | ### 1. Why doesn't my pipeline work when there's no root element in the document? | |
309 | ||
310 | To make a pipeline work on a plain text document, put the `PlainTextInputFilter` | |
311 | at the beginning of your pipeline. This will wrap the content in a `div` so the | |
312 | filters have a root element to work with. If you're passing in an HTML fragment, | |
313 | but it doesn't have a root element, you can wrap the content in a `div` | |
314 | yourself. For example: | |
315 | ||
316 | ```ruby | |
317 | EmojiPipeline = Pipeline.new [ | |
318 | PlainTextInputFilter, # <- Wraps input in a div and escapes html tags | |
319 | EmojiFilter | |
320 | ], context | |
321 | ||
322 | plain_text = "Gutentag! :wave:" | |
323 | EmojiPipeline.call(plain_text) | |
324 | ||
325 | html_fragment = "This is outside of an html element, but <strong>this isn't. :+1:</strong>" | |
326 | EmojiPipeline.call("<div>#{html_fragment}</div>") # <- Wrap your own html fragments to avoid escaping | |
327 | ``` | |
328 | ||
329 | ### 2. How do I customize a whitelist for `SanitizationFilter`s? | |
330 | ||
331 | `SanitizationFilter::WHITELIST` is the default whitelist used if no `:whitelist` | |
332 | argument is given in the context. The default is a good starting template for | |
333 | you to add additional elements. You can either modify the constant's value, or | |
334 | re-define your own constant and pass that in via the context. | |
335 | ||
287 | 336 | ## Contributing |
288 | 337 | |
289 | 338 | Please review the [Contributing Guide](https://github.com/jch/html-pipeline/blob/master/CONTRIBUTING.md). |
306 | 355 | |
307 | 356 | This section is for gem maintainers to cut a new version of the gem. |
308 | 357 | |
309 | * update lib/html/pipeline/version.rb to next version number X.X.X following [semver](http://semver.org). | |
310 | * update CHANGELOG.md. Get latest changes with `git log --oneline vLAST_RELEASE..HEAD | grep Merge` | |
358 | * create a new branch named `release-x.y.z` where `x.y.z` follows [semver](http://semver.org) | |
359 | * update lib/html/pipeline/version.rb to next version number X.X.X | |
360 | * update CHANGELOG.md. Prepare a draft with `script/changelog` | |
361 | * push branch and create a new pull request | |
362 | * after tests are green, merge to master | |
311 | 363 | * on the master branch, run `script/release` |
0 | 0 | #!/usr/bin/env rake |
1 | require "bundler/gem_tasks" | |
1 | require 'rubygems' | |
2 | require 'bundler/setup' | |
3 | ||
4 | require 'bundler/gem_tasks' | |
2 | 5 | require 'rake/testtask' |
3 | 6 | |
4 | 7 | Rake::TestTask.new do |t| |
5 | t.libs << "test" | |
8 | t.libs << 'test' | |
6 | 9 | t.test_files = FileList['test/**/*_test.rb'] |
7 | 10 | t.verbose = true |
11 | t.warning = false | |
8 | 12 | end |
9 | 13 | |
10 | task :default => :test⏎ | |
14 | task default: :test |
3 | 3 | require 'optparse' |
4 | 4 | |
5 | 5 | # Accept "help", too |
6 | ARGV.map!{|a| a == "help" ? "--help" : a } | |
6 | .map! { |a| a == 'help' ? '--help' : a } | |
7 | 7 | |
8 | OptionParser.new do |opts| | |
8 | onParser.new do |opts| | |
9 | 9 | opts.banner = <<-HELP.gsub(/^ /, '') |
10 | 10 | Usage: html-pipeline [-h] [-f] |
11 | 11 | html-pipeline [FILTER [FILTER [...]]] < file.md |
12 | cat file.md | html-pipeline [FILTER [FILTER [...]]] | |
12 | cat file.md | html-pipeline [FILTER [FILTER [...]]] | |
13 | 13 | HELP |
14 | 14 | |
15 | opts.separator "Options:" | |
15 | opts.separator 'Options:' | |
16 | 16 | |
17 | opts.on("-f", "--filters", "List the available filters") do | |
18 | filters = HTML::Pipeline.constants.grep(/\w+Filter$/). | |
19 | map{|f| f.to_s.gsub(/Filter$/,'') } | |
17 | opts.on('-f', '--filters', 'List the available filters') do | |
18 | filters = HTML::Pipeline.constants.grep(/\w+Filter$/) | |
19 | .map { |f| f.to_s.gsub(/Filter$/, '') } | |
20 | 20 | |
21 | 21 | # Text filter doesn't work, no call method |
22 | filters -= ["Text"] | |
22 | filters -= ['Text'] | |
23 | 23 | |
24 | 24 | abort <<-HELP.gsub(/^ /, '') |
25 | 25 | Available filters: |
37 | 37 | HTML::Pipeline::ImageMaxWidthFilter, |
38 | 38 | HTML::Pipeline::EmojiFilter, |
39 | 39 | HTML::Pipeline::AutolinkFilter, |
40 | HTML::Pipeline::TableOfContentsFilter, | |
40 | HTML::Pipeline::TableOfContentsFilter | |
41 | 41 | ] |
42 | 42 | |
43 | # Add syntax highlighting if linguist is present | |
43 | # Add syntax highlighting if rouge is present | |
44 | 44 | begin |
45 | require 'linguist' | |
45 | require 'rouge' | |
46 | 46 | filters << HTML::Pipeline::SyntaxHighlightFilter |
47 | 47 | rescue LoadError |
48 | 48 | end |
51 | 51 | |
52 | 52 | def filter_named(name) |
53 | 53 | case name |
54 | when "Text" | |
54 | when 'Text' | |
55 | 55 | raise NameError # Text filter doesn't work, no call method |
56 | 56 | end |
57 | 57 | |
69 | 69 | end |
70 | 70 | |
71 | 71 | context = { |
72 | :asset_root => "/assets", | |
73 | :base_url => "/", | |
74 | :gfm => true | |
72 | asset_root: '/assets', | |
73 | base_url: '/', | |
74 | gfm: true | |
75 | 75 | } |
76 | 76 | |
77 | 77 | puts HTML::Pipeline.new(filters, context).call(ARGF.read)[:output] |
0 | # -*- encoding: utf-8 -*- | |
1 | require File.expand_path("../lib/html/pipeline/version", __FILE__) | |
0 | ||
1 | require File.expand_path('../lib/html/pipeline/version', __FILE__) | |
2 | 2 | |
3 | 3 | Gem::Specification.new do |gem| |
4 | gem.name = "html-pipeline" | |
4 | gem.name = 'html-pipeline' | |
5 | 5 | gem.version = HTML::Pipeline::VERSION |
6 | gem.license = "MIT" | |
7 | gem.authors = ["Ryan Tomayko", "Jerry Cheung"] | |
8 | gem.email = ["ryan@github.com", "jerry@github.com"] | |
9 | gem.description = %q{GitHub HTML processing filters and utilities} | |
10 | gem.summary = %q{Helpers for processing content through a chain of filters} | |
11 | gem.homepage = "https://github.com/jch/html-pipeline" | |
6 | gem.license = 'MIT' | |
7 | gem.authors = ['Ryan Tomayko', 'Jerry Cheung'] | |
8 | gem.email = ['ryan@github.com', 'jerry@github.com'] | |
9 | gem.description = 'GitHub HTML processing filters and utilities' | |
10 | gem.summary = 'Helpers for processing content through a chain of filters' | |
11 | gem.homepage = 'https://github.com/jch/html-pipeline' | |
12 | 12 | |
13 | gem.files = `git ls-files`.split $/ | |
14 | gem.test_files = gem.files.grep(%r{^test}) | |
15 | gem.require_paths = ["lib"] | |
13 | gem.files = `git ls-files -z`.split("\x0").reject { |f| f =~ %r{^(test|gemfiles|script)/} } | |
14 | gem.require_paths = ['lib'] | |
16 | 15 | |
17 | gem.add_dependency "nokogiri", "~> 1.4" | |
18 | gem.add_dependency "activesupport", ">= 2" | |
16 | gem.add_dependency 'activesupport', '>= 2' | |
17 | gem.add_dependency 'nokogiri', '>= 1.4' | |
19 | 18 | |
20 | 19 | gem.post_install_message = <<msg |
21 | 20 | ------------------------------------------------- |
10 | 10 | # mention. |
11 | 11 | # :info_url - Used to link to "more info" when someone mentions @mention |
12 | 12 | # or @mentioned. |
13 | # :username_pattern - Used to provide a custom regular expression to | |
14 | # identify usernames | |
13 | 15 | # |
14 | 16 | class MentionFilter < Filter |
15 | 17 | # Public: Find user @mentions in text. See |
26 | 28 | # the original text. |
27 | 29 | # |
28 | 30 | # Returns a String replaced with the return of the block. |
29 | def self.mentioned_logins_in(text) | |
30 | text.gsub MentionPattern do |match| | |
31 | login = $1 | |
31 | def self.mentioned_logins_in(text, username_pattern = UsernamePattern) | |
32 | text.gsub MentionPatterns[username_pattern] do |match| | |
33 | login = Regexp.last_match(1) | |
32 | 34 | yield match, login, MentionLogins.include?(login.downcase) |
33 | 35 | end |
34 | 36 | end |
35 | 37 | |
36 | # Pattern used to extract @mentions from text. | |
37 | MentionPattern = / | |
38 | (?:^|\W) # beginning of string or non-word char | |
39 | @((?>[a-z0-9][a-z0-9-]*)) # @username | |
40 | (?!\/) # without a trailing slash | |
41 | (?= | |
42 | \.+[ \t\W]| # dots followed by space or non-word character | |
43 | \.+$| # dots at end of line | |
44 | [^0-9a-zA-Z_.]| # non-word character except dot | |
45 | $ # end of line | |
46 | ) | |
47 | /ix | |
38 | # Hash that contains all of the mention patterns used by the pipeline | |
39 | MentionPatterns = Hash.new do |hash, key| | |
40 | hash[key] = / | |
41 | (?:^|\W) # beginning of string or non-word char | |
42 | @((?>#{key})) # @username | |
43 | (?!\/) # without a trailing slash | |
44 | (?= | |
45 | \.+[ \t\W]| # dots followed by space or non-word character | |
46 | \.+$| # dots at end of line | |
47 | [^0-9a-zA-Z_.]| # non-word character except dot | |
48 | $ # end of line | |
49 | ) | |
50 | /ix | |
51 | end | |
52 | ||
53 | # Default pattern used to extract usernames from text. The value can be | |
54 | # overriden by providing the username_pattern variable in the context. | |
55 | UsernamePattern = /[a-z0-9][a-z0-9-]*/ | |
48 | 56 | |
49 | 57 | # List of username logins that, when mentioned, link to the blog post |
50 | 58 | # about @mentions instead of triggering a real mention. |
51 | MentionLogins = %w( | |
59 | MentionLogins = %w[ | |
52 | 60 | mention |
53 | 61 | mentions |
54 | 62 | mentioned |
55 | 63 | mentioning |
56 | ) | |
64 | ].freeze | |
57 | 65 | |
58 | 66 | # Don't look for mentions in text nodes that are children of these elements |
59 | IGNORE_PARENTS = %w(pre code a style).to_set | |
67 | IGNORE_PARENTS = %w(pre code a style script).to_set | |
60 | 68 | |
61 | 69 | def call |
62 | 70 | result[:mentioned_usernames] ||= [] |
63 | 71 | |
64 | search_text_nodes(doc).each do |node| | |
72 | doc.search('.//text()').each do |node| | |
65 | 73 | content = node.to_html |
66 | next if !content.include?('@') | |
74 | next unless content.include?('@') | |
67 | 75 | next if has_ancestor?(node, IGNORE_PARENTS) |
68 | html = mention_link_filter(content, base_url, info_url) | |
76 | html = mention_link_filter(content, base_url, info_url, username_pattern) | |
69 | 77 | next if html == content |
70 | 78 | node.replace(html) |
71 | 79 | end |
78 | 86 | context[:info_url] || nil |
79 | 87 | end |
80 | 88 | |
89 | def username_pattern | |
90 | context[:username_pattern] || UsernamePattern | |
91 | end | |
92 | ||
81 | 93 | # Replace user @mentions in text with links to the mentioned user's |
82 | 94 | # profile page. |
83 | 95 | # |
85 | 97 | # base_url - The base URL used to construct user profile URLs. |
86 | 98 | # info_url - The "more info" URL used to link to more info on @mentions. |
87 | 99 | # If nil we don't link @mention or @mentioned. |
100 | # username_pattern - Regular expression used to identify usernames in | |
101 | # text | |
88 | 102 | # |
89 | 103 | # Returns a string with @mentions replaced with links. All links have a |
90 | 104 | # 'user-mention' class name attached for styling. |
91 | def mention_link_filter(text, base_url='/', info_url=nil) | |
92 | self.class.mentioned_logins_in(text) do |match, login, is_mentioned| | |
105 | def mention_link_filter(text, _base_url = '/', info_url = nil, username_pattern = UsernamePattern) | |
106 | self.class.mentioned_logins_in(text, username_pattern) do |match, login, is_mentioned| | |
93 | 107 | link = |
94 | 108 | if is_mentioned |
95 | 109 | link_to_mention_info(login, info_url) |
101 | 115 | end |
102 | 116 | end |
103 | 117 | |
104 | def link_to_mention_info(text, info_url=nil) | |
118 | def link_to_mention_info(text, info_url = nil) | |
105 | 119 | return "@#{text}" if info_url.nil? |
106 | "<a href='#{info_url}' class='user-mention'>" + | |
107 | "@#{text}" + | |
108 | "</a>" | |
120 | "<a href='#{info_url}' class='user-mention'>" \ | |
121 | "@#{text}" \ | |
122 | '</a>' | |
109 | 123 | end |
110 | 124 | |
111 | 125 | def link_to_mentioned_user(login) |
112 | 126 | result[:mentioned_usernames] |= [login] |
113 | url = File.join(base_url, login) | |
114 | "<a href='#{url}' class='user-mention'>" + | |
115 | "@#{login}" + | |
116 | "</a>" | |
127 | ||
128 | url = base_url.dup | |
129 | url << '/' unless url =~ /[\/~]\z/ | |
130 | ||
131 | "<a href='#{url << login}' class='user-mention'>" \ | |
132 | "@#{login}" \ | |
133 | '</a>' | |
117 | 134 | end |
118 | 135 | end |
119 | 136 | end |
1 | 1 | |
2 | 2 | module HTML |
3 | 3 | class Pipeline |
4 | ||
5 | 4 | class AbsoluteSourceFilter < Filter |
6 | 5 | # HTML Filter for replacing relative and root relative image URLs with |
7 | 6 | # fully qualified URLs |
17 | 16 | # This filter does not write additional information to the context. |
18 | 17 | # This filter would need to be run before CamoFilter. |
19 | 18 | def call |
20 | doc.search("img").each do |element| | |
19 | doc.search('img').each do |element| | |
21 | 20 | next if element['src'].nil? || element['src'].empty? |
22 | 21 | src = element['src'].strip |
23 | unless src.start_with? 'http' | |
24 | if src.start_with? '/' | |
25 | base = image_base_url | |
26 | else | |
27 | base = image_subpage_url | |
28 | end | |
29 | element["src"] = URI.join(base, src).to_s | |
30 | end | |
22 | next if src.start_with? 'http' | |
23 | base = if src.start_with? '/' | |
24 | image_base_url | |
25 | else | |
26 | image_subpage_url | |
27 | end | |
28 | element['src'] = URI.join(base, src).to_s | |
31 | 29 | end |
32 | 30 | doc |
33 | 31 | end |
34 | ||
32 | ||
35 | 33 | # Private: the base url you want to use |
36 | 34 | def image_base_url |
37 | context[:image_base_url] or raise "Missing context :image_base_url for #{self.class.name}" | |
35 | context[:image_base_url] || raise("Missing context :image_base_url for #{self.class.name}") | |
38 | 36 | end |
39 | 37 | |
40 | 38 | # Private: the relative url you want to use |
41 | 39 | def image_subpage_url |
42 | context[:image_subpage_url] or raise "Missing context :image_subpage_url for #{self.class.name}" | |
40 | context[:image_subpage_url] || raise("Missing context :image_subpage_url for #{self.class.name}") | |
43 | 41 | end |
44 | ||
45 | 42 | end |
46 | 43 | end |
47 | end⏎ | |
44 | end |
0 | begin | |
1 | require "rinku" | |
2 | rescue LoadError => _ | |
3 | abort "Missing dependency 'rinku' for AutolinkFilter. See README.md for details." | |
4 | end | |
0 | HTML::Pipeline.require_dependency('rinku', 'AutolinkFilter') | |
5 | 1 | |
6 | 2 | module HTML |
7 | 3 | class Pipeline |
23 | 23 | def call |
24 | 24 | return doc unless asset_proxy_enabled? |
25 | 25 | |
26 | doc.search("img").each do |element| | |
26 | doc.search('img').each do |element| | |
27 | 27 | original_src = element['src'] |
28 | 28 | next unless original_src |
29 | 29 | |
85 | 85 | # Private: helper to hexencode a string. Each byte ends up encoded into |
86 | 86 | # two characters, zero padded value in the range [0-9a-f]. |
87 | 87 | def hexencode(str) |
88 | str.to_enum(:each_byte).map { |byte| "%02x" % byte }.join | |
88 | str.to_enum(:each_byte).map { |byte| format('%02x', byte) }.join | |
89 | 89 | end |
90 | 90 | end |
91 | 91 | end |
0 | begin | |
1 | require "escape_utils" | |
2 | rescue LoadError => _ | |
3 | abort "Missing dependency 'escape_utils' for EmailReplyFilter. See README.md for details." | |
4 | end | |
5 | ||
6 | begin | |
7 | require "email_reply_parser" | |
8 | rescue LoadError => _ | |
9 | abort "Missing dependency 'email_reply_parser' for EmailReplyFilter. See README.md for details." | |
10 | end | |
0 | HTML::Pipeline.require_dependency('escape_utils', 'EmailReplyFilter') | |
1 | HTML::Pipeline.require_dependency('email_reply_parser', 'EmailReplyFilter') | |
11 | 2 | |
12 | 3 | module HTML |
13 | 4 | class Pipeline |
25 | 16 | EMAIL_QUOTED_HEADER = %(<div class="email-quoted-reply">).freeze |
26 | 17 | EMAIL_SIGNATURE_HEADER = %(<div class="email-signature-reply">).freeze |
27 | 18 | EMAIL_FRAGMENT_HEADER = %(<div class="email-fragment">).freeze |
28 | EMAIL_HEADER_END = "</div>".freeze | |
19 | EMAIL_HEADER_END = '</div>'.freeze | |
20 | EMAIL_REGEX = /[^@\s.][^@\s]*@\[?[a-z0-9.-]+\]?/ | |
21 | HIDDEN_EMAIL_PATTERN = '***@***.***'.freeze | |
29 | 22 | |
30 | 23 | # Scans an email body to determine which bits are quoted and which should |
31 | 24 | # be hidden. EmailReplyParser is used to split the comment into an Array |
44 | 37 | paragraphs = EmailReplyParser.read(text.dup).fragments.map do |fragment| |
45 | 38 | pieces = [escape_html(fragment.to_s.strip).gsub(/^\s*(>|>)/, '')] |
46 | 39 | if fragment.quoted? |
40 | if context[:hide_quoted_email_addresses] | |
41 | pieces.map! do |piece| | |
42 | piece.gsub(EMAIL_REGEX, HIDDEN_EMAIL_PATTERN) | |
43 | end | |
44 | end | |
47 | 45 | pieces.unshift EMAIL_QUOTED_HEADER |
48 | 46 | pieces << EMAIL_HEADER_END |
49 | 47 | elsif fragment.signature? |
0 | require "cgi" | |
1 | ||
2 | begin | |
3 | require "gemoji" | |
4 | rescue LoadError => _ | |
5 | abort "Missing dependency 'gemoji' for EmojiFilter. See README.md for details." | |
6 | end | |
0 | require 'cgi' | |
1 | HTML::Pipeline.require_dependency('gemoji', 'EmojiFilter') | |
7 | 2 | |
8 | 3 | module HTML |
9 | 4 | class Pipeline |
12 | 7 | # Context: |
13 | 8 | # :asset_root (required) - base url to link to emoji sprite |
14 | 9 | # :asset_path (optional) - url path to link to emoji sprite. :file_name can be used as a placeholder for the sprite file name. If no asset_path is set "emoji/:file_name" is used. |
10 | # :ignored_ancestor_tags (optional) - Tags to stop the emojification. Node has matched ancestor HTML tags will not be emojified. Default to pre, code, and tt tags. Extra tags please pass in the form of array, e.g., %w(blockquote summary). | |
11 | # :img_attrs (optional) - Attributes for generated img tag. E.g. Pass { "draggble" => true, "height" => nil } to set draggable attribute to "true" and clear height attribute of generated img tag. | |
15 | 12 | class EmojiFilter < Filter |
13 | DEFAULT_IGNORED_ANCESTOR_TAGS = %w[pre code tt].freeze | |
14 | ||
16 | 15 | def call |
17 | search_text_nodes(doc).each do |node| | |
18 | content = node.to_html | |
16 | doc.search('.//text()').each do |node| | |
17 | content = node.text | |
19 | 18 | next unless content.include?(':') |
20 | next if has_ancestor?(node, %w(pre code)) | |
19 | next if has_ancestor?(node, ignored_ancestor_tags) | |
21 | 20 | html = emoji_image_filter(content) |
22 | 21 | next if html == content |
23 | 22 | node.replace(html) |
24 | 23 | end |
25 | 24 | doc |
26 | 25 | end |
27 | ||
26 | ||
28 | 27 | # Implementation of validate hook. |
29 | 28 | # Errors should raise exceptions or use an existing validator. |
30 | 29 | def validate |
37 | 36 | # |
38 | 37 | # Returns a String with :emoji: replaced with images. |
39 | 38 | def emoji_image_filter(text) |
40 | text.gsub(emoji_pattern) do |match| | |
41 | name = $1 | |
42 | "<img class='emoji' title=':#{name}:' alt=':#{name}:' src='#{emoji_url(name)}' height='20' width='20' align='absmiddle' />" | |
39 | text.gsub(emoji_pattern) do |_match| | |
40 | emoji_image_tag(Regexp.last_match(1)) | |
43 | 41 | end |
44 | 42 | end |
45 | 43 | |
57 | 55 | # Returns the context's asset_path or the default path if no context asset_path is given. |
58 | 56 | def asset_path(name) |
59 | 57 | if context[:asset_path] |
60 | context[:asset_path].gsub(":file_name", emoji_filename(name)) | |
58 | context[:asset_path].gsub(':file_name', emoji_filename(name)) | |
61 | 59 | else |
62 | File.join("emoji", emoji_filename(name)) | |
60 | File.join('emoji', emoji_filename(name)) | |
63 | 61 | end |
64 | 62 | end |
65 | 63 | |
66 | 64 | private |
65 | ||
66 | # Build an emoji image tag | |
67 | def emoji_image_tag(name) | |
68 | require 'active_support/core_ext/hash/indifferent_access' | |
69 | html_attrs = | |
70 | default_img_attrs(name) | |
71 | .merge!((context[:img_attrs] || {}).with_indifferent_access) | |
72 | .map { |attr, value| !value.nil? && %(#{attr}="#{value.respond_to?(:call) && value.call(name) || value}") } | |
73 | .reject(&:blank?).join(' '.freeze) | |
74 | ||
75 | "<img #{html_attrs}>" | |
76 | end | |
77 | ||
78 | # Default attributes for img tag | |
79 | def default_img_attrs(name) | |
80 | { | |
81 | 'class' => 'emoji'.freeze, | |
82 | 'title' => ":#{name}:", | |
83 | 'alt' => ":#{name}:", | |
84 | 'src' => emoji_url(name).to_s, | |
85 | 'height' => '20'.freeze, | |
86 | 'width' => '20'.freeze, | |
87 | 'align' => 'absmiddle'.freeze | |
88 | } | |
89 | end | |
67 | 90 | |
68 | 91 | def emoji_url(name) |
69 | 92 | File.join(asset_root, asset_path(name)) |
78 | 101 | self.class.emoji_pattern |
79 | 102 | end |
80 | 103 | |
81 | # Detect gemoji v2 which has a new API | |
82 | # https://github.com/jch/html-pipeline/pull/129 | |
83 | if Emoji.respond_to?(:all) | |
84 | def self.emoji_names | |
85 | Emoji.all.map(&:aliases).flatten.sort | |
86 | end | |
104 | def self.emoji_names | |
105 | Emoji.all.map(&:aliases).flatten.sort | |
106 | end | |
87 | 107 | |
88 | def emoji_filename(name) | |
89 | Emoji.find_by_alias(name).image_filename | |
90 | end | |
91 | else | |
92 | def self.emoji_names | |
93 | Emoji.names | |
94 | end | |
108 | def emoji_filename(name) | |
109 | Emoji.find_by_alias(name).image_filename | |
110 | end | |
95 | 111 | |
96 | def emoji_filename(name) | |
97 | "#{::CGI.escape(name)}.png" | |
112 | # Return ancestor tags to stop the emojification. | |
113 | # | |
114 | # @return [Array<String>] Ancestor tags. | |
115 | def ignored_ancestor_tags | |
116 | if context[:ignored_ancestor_tags] | |
117 | DEFAULT_IGNORED_ANCESTOR_TAGS | context[:ignored_ancestor_tags] | |
118 | else | |
119 | DEFAULT_IGNORED_ANCESTOR_TAGS | |
98 | 120 | end |
99 | 121 | end |
100 | 122 | end |
29 | 29 | class InvalidDocumentException < StandardError; end |
30 | 30 | |
31 | 31 | def initialize(doc, context = nil, result = nil) |
32 | if doc.kind_of?(String) | |
32 | if doc.is_a?(String) | |
33 | 33 | @html = doc.to_str |
34 | 34 | @doc = nil |
35 | 35 | else |
58 | 58 | @doc ||= parse_html(html) |
59 | 59 | end |
60 | 60 | |
61 | # Searches a Nokogiri::HTML::DocumentFragment for text nodes. If no elements | |
62 | # are found, a second search without root tags is invoked. | |
63 | def search_text_nodes(doc) | |
64 | nodes = doc.xpath('.//text()') | |
65 | nodes.empty? ? doc.xpath('text()') : nodes | |
66 | end | |
67 | ||
68 | 61 | # The String representation of the document. If a DocumentFragment was |
69 | 62 | # provided to the Filter, it is serialized into a String when this method is |
70 | 63 | # called. |
82 | 75 | end |
83 | 76 | |
84 | 77 | # Make sure the context has everything we need. Noop: Subclasses can override. |
85 | def validate | |
86 | end | |
78 | def validate; end | |
87 | 79 | |
88 | 80 | # The Repository object provided in the context hash, or nil when no |
89 | 81 | # :repository was specified. |
122 | 114 | # Returns true when the node has a matching ancestor. |
123 | 115 | def has_ancestor?(node, tags) |
124 | 116 | while node = node.parent |
125 | if tags.include?(node.name.downcase) | |
126 | break true | |
127 | end | |
117 | break true if tags.include?(node.name.downcase) | |
128 | 118 | end |
129 | 119 | end |
130 | 120 | |
140 | 130 | # the last filter returns a String. |
141 | 131 | def self.to_document(input, context = nil) |
142 | 132 | html = call(input, context) |
143 | HTML::Pipeline::parse(html) | |
133 | HTML::Pipeline.parse(html) | |
144 | 134 | end |
145 | 135 | |
146 | 136 | # Like call but guarantees that a string of HTML markup is returned. |
164 | 154 | |
165 | 155 | if missing.any? |
166 | 156 | raise ArgumentError, |
167 | "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}" | |
157 | "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}" | |
168 | 158 | end |
169 | 159 | end |
170 | 160 | end |
6 | 6 | # :http_url - The HTTP url to force HTTPS. Falls back to :base_url |
7 | 7 | class HttpsFilter < Filter |
8 | 8 | def call |
9 | doc.css(%Q(a[href^="#{http_url}"])).each do |element| | |
10 | element['href'] = element['href'].sub(/^http:/,'https:') | |
9 | doc.css(%(a[href^="#{http_url}"])).each do |element| | |
10 | element['href'] = element['href'].sub(/^http:/, 'https:') | |
11 | 11 | end |
12 | 12 | doc |
13 | 13 | end |
0 | module HTML | |
1 | class Pipeline | |
2 | # HTML Filter that converts image's url into <img> tag. | |
3 | # For example, it will convert | |
4 | # http://example.com/test.jpg | |
5 | # into | |
6 | # <img src="http://example.com/test.jpg" alt=""/>. | |
7 | ||
8 | class ImageFilter < TextFilter | |
9 | def call | |
10 | @text.gsub(/(https|http)?:\/\/.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?/i) do |match| | |
11 | %(<img src="#{match}" alt=""/>) | |
12 | end | |
13 | end | |
14 | end | |
15 | end | |
16 | end |
16 | 16 | # js injection via javascript: urls. |
17 | 17 | next if element['src'].to_s.strip =~ /\Ajavascript/i |
18 | 18 | |
19 | element['style'] = "max-width:100%;" | |
19 | element['style'] = 'max-width:100%;' | |
20 | 20 | |
21 | if !has_ancestor?(element, %w(a)) | |
22 | link_image element | |
23 | end | |
21 | link_image element unless has_ancestor?(element, %w[a]) | |
24 | 22 | end |
25 | 23 | |
26 | 24 | doc |
27 | 25 | end |
28 | 26 | |
29 | 27 | def link_image(element) |
30 | link = doc.document.create_element('a', :href => element['src'], :target => '_blank') | |
28 | link = doc.document.create_element('a', href: element['src'], target: '_blank') | |
31 | 29 | link.add_child(element.dup) |
32 | 30 | element.replace(link) |
33 | 31 | end |
34 | 32 | end |
35 | 33 | end |
36 | end⏎ | |
34 | end |
0 | begin | |
1 | require "github/markdown" | |
2 | rescue LoadError => _ | |
3 | abort "Missing dependency 'github-markdown' for MarkdownFilter. See README.md for details." | |
4 | end | |
0 | HTML::Pipeline.require_dependency('commonmarker', 'MarkdownFilter') | |
5 | 1 | |
6 | 2 | module HTML |
7 | 3 | class Pipeline |
11 | 7 | # |
12 | 8 | # Context options: |
13 | 9 | # :gfm => false Disable GFM line-end processing |
10 | # :commonmarker_extensions => [ :table, :strikethrough, | |
11 | # :tagfilter, :autolink ] Common marker extensions to include | |
14 | 12 | # |
15 | 13 | # This filter does not write any additional information to the context hash. |
16 | 14 | class MarkdownFilter < TextFilter |
17 | 15 | def initialize(text, context = nil, result = nil) |
18 | 16 | super text, context, result |
19 | @text = @text.gsub "\r", '' | |
17 | @text = @text.delete "\r" | |
20 | 18 | end |
21 | 19 | |
22 | 20 | # Convert Markdown to HTML using the best available implementation |
23 | 21 | # and convert into a DocumentFragment. |
24 | 22 | def call |
25 | mode = (context[:gfm] != false) ? :gfm : :markdown | |
26 | html = GitHub::Markdown.to_html(@text, mode) | |
23 | options = [:GITHUB_PRE_LANG] | |
24 | options << :HARDBREAKS if context[:gfm] != false | |
25 | extensions = context.fetch( | |
26 | :commonmarker_extensions, | |
27 | %i[table strikethrough tagfilter autolink] | |
28 | ) | |
29 | html = CommonMarker.render_html(@text, options, extensions) | |
27 | 30 | html.rstrip! |
28 | 31 | html |
29 | 32 | end |
0 | begin | |
1 | require "escape_utils" | |
2 | rescue LoadError => _ | |
3 | abort "Missing dependency 'escape_utils' for PlainTextInputFilter. See README.md for details." | |
4 | end | |
0 | HTML::Pipeline.require_dependency('escape_utils', 'PlainTextInputFilter') | |
5 | 1 | |
6 | 2 | module HTML |
7 | 3 | class Pipeline |
0 | begin | |
1 | require "sanitize" | |
2 | rescue LoadError => _ | |
3 | abort "Missing dependency 'sanitize' for SanitizationFilter. See README.md for details." | |
4 | end | |
0 | HTML::Pipeline.require_dependency('sanitize', 'SanitizationFilter') | |
5 | 1 | |
6 | 2 | module HTML |
7 | 3 | class Pipeline |
24 | 20 | # |
25 | 21 | # This filter does not write additional information to the context. |
26 | 22 | class SanitizationFilter < Filter |
27 | LISTS = Set.new(%w(ul ol).freeze) | |
23 | LISTS = Set.new(%w[ul ol].freeze) | |
28 | 24 | LIST_ITEM = 'li'.freeze |
29 | 25 | |
30 | 26 | # List of table child elements. These must be contained by a <table> element |
31 | 27 | # or they are not allowed through. Otherwise they can be used to break out |
32 | 28 | # of places we're using tables to contain formatted user content (like pull |
33 | 29 | # request review comments). |
34 | TABLE_ITEMS = Set.new(%w(tr td th).freeze) | |
30 | TABLE_ITEMS = Set.new(%w[tr td th].freeze) | |
35 | 31 | TABLE = 'table'.freeze |
36 | TABLE_SECTIONS = Set.new(%w(thead tbody tfoot).freeze) | |
32 | TABLE_SECTIONS = Set.new(%w[thead tbody tfoot].freeze) | |
37 | 33 | |
38 | 34 | # These schemes are the only ones allowed in <a href> attributes by default. |
39 | 35 | ANCHOR_SCHEMES = ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'].freeze |
41 | 37 | # The main sanitization whitelist. Only these elements and attributes are |
42 | 38 | # allowed through by default. |
43 | 39 | WHITELIST = { |
44 | :elements => %w( | |
40 | elements: %w[ | |
45 | 41 | h1 h2 h3 h4 h5 h6 h7 h8 br b i strong em a pre code img tt |
46 | 42 | div ins del sup sub p ol ul table thead tbody tfoot blockquote |
47 | dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike | |
48 | ), | |
49 | :remove_contents => ['script'], | |
50 | :attributes => { | |
51 | 'a' => ['href'], | |
52 | 'img' => ['src'], | |
53 | 'div' => ['itemscope', 'itemtype'], | |
54 | :all => ['abbr', 'accept', 'accept-charset', | |
55 | 'accesskey', 'action', 'align', 'alt', 'axis', | |
56 | 'border', 'cellpadding', 'cellspacing', 'char', | |
57 | 'charoff', 'charset', 'checked', 'cite', | |
58 | 'clear', 'cols', 'colspan', 'color', | |
59 | 'compact', 'coords', 'datetime', 'details', 'dir', | |
60 | 'disabled', 'enctype', 'for', 'frame', | |
61 | 'headers', 'height', 'hreflang', | |
62 | 'hspace', 'ismap', 'label', 'lang', | |
63 | 'longdesc', 'maxlength', 'media', 'method', | |
64 | 'multiple', 'name', 'nohref', 'noshade', | |
65 | 'nowrap', 'prompt', 'readonly', 'rel', 'rev', | |
66 | 'rows', 'rowspan', 'rules', 'scope', | |
67 | 'selected', 'shape', 'size', 'span', | |
68 | 'start', 'summary', 'tabindex', 'target', | |
69 | 'title', 'type', 'usemap', 'valign', 'value', | |
70 | 'vspace', 'width', 'itemprop'] | |
43 | dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike summary details | |
44 | ], | |
45 | remove_contents: ['script'], | |
46 | attributes: { | |
47 | 'a' => ['href'], | |
48 | 'img' => %w[src longdesc], | |
49 | 'div' => %w[itemscope itemtype], | |
50 | 'blockquote' => ['cite'], | |
51 | 'del' => ['cite'], | |
52 | 'ins' => ['cite'], | |
53 | 'q' => ['cite'], | |
54 | :all => ['abbr', 'accept', 'accept-charset', | |
55 | 'accesskey', 'action', 'align', 'alt', 'axis', | |
56 | 'border', 'cellpadding', 'cellspacing', 'char', | |
57 | 'charoff', 'charset', 'checked', | |
58 | 'clear', 'cols', 'colspan', 'color', | |
59 | 'compact', 'coords', 'datetime', 'dir', | |
60 | 'disabled', 'enctype', 'for', 'frame', | |
61 | 'headers', 'height', 'hreflang', | |
62 | 'hspace', 'ismap', 'label', 'lang', | |
63 | 'maxlength', 'media', 'method', | |
64 | 'multiple', 'name', 'nohref', 'noshade', | |
65 | 'nowrap', 'open', 'prompt', 'readonly', 'rel', 'rev', | |
66 | 'rows', 'rowspan', 'rules', 'scope', | |
67 | 'selected', 'shape', 'size', 'span', | |
68 | 'start', 'summary', 'tabindex', 'target', | |
69 | 'title', 'type', 'usemap', 'valign', 'value', | |
70 | 'vspace', 'width', 'itemprop'] | |
71 | 71 | }, |
72 | :protocols => { | |
73 | 'a' => {'href' => ANCHOR_SCHEMES}, | |
74 | 'img' => {'src' => ['http', 'https', :relative]} | |
72 | protocols: { | |
73 | 'a' => { 'href' => ANCHOR_SCHEMES }, | |
74 | 'blockquote' => { 'cite' => ['http', 'https', :relative] }, | |
75 | 'del' => { 'cite' => ['http', 'https', :relative] }, | |
76 | 'ins' => { 'cite' => ['http', 'https', :relative] }, | |
77 | 'q' => { 'cite' => ['http', 'https', :relative] }, | |
78 | 'img' => { | |
79 | 'src' => ['http', 'https', :relative], | |
80 | 'longdesc' => ['http', 'https', :relative] | |
81 | } | |
75 | 82 | }, |
76 | :transformers => [ | |
83 | transformers: [ | |
77 | 84 | # Top-level <li> elements are removed because they can break out of |
78 | 85 | # containing markup. |
79 | 86 | lambda { |env| |
80 | name, node = env[:node_name], env[:node] | |
81 | if name == LIST_ITEM && !node.ancestors.any?{ |n| LISTS.include?(n.name) } | |
87 | name = env[:node_name] | |
88 | node = env[:node] | |
89 | if name == LIST_ITEM && node.ancestors.none? { |n| LISTS.include?(n.name) } | |
82 | 90 | node.replace(node.children) |
83 | 91 | end |
84 | 92 | }, |
85 | 93 | |
86 | 94 | # Table child elements that are not contained by a <table> are removed. |
87 | 95 | lambda { |env| |
88 | name, node = env[:node_name], env[:node] | |
89 | if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && !node.ancestors.any? { |n| n.name == TABLE } | |
96 | name = env[:node_name] | |
97 | node = env[:node] | |
98 | if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && node.ancestors.none? { |n| n.name == TABLE } | |
90 | 99 | node.replace(node.children) |
91 | 100 | end |
92 | 101 | } |
93 | 102 | ] |
94 | } | |
103 | }.freeze | |
95 | 104 | |
96 | 105 | # A more limited sanitization whitelist. This includes all attributes, |
97 | 106 | # protocols, and transformers from WHITELIST but with a more locked down |
98 | 107 | # set of allowed elements. |
99 | 108 | LIMITED = WHITELIST.merge( |
100 | :elements => %w(b i strong em a pre code img ins del sup sub p ol ul li)) | |
109 | elements: %w[b i strong em a pre code img ins del sup sub p ol ul li] | |
110 | ) | |
101 | 111 | |
102 | 112 | # Strip all HTML tags from the document. |
103 | FULL = { :elements => [] } | |
113 | FULL = { elements: [] }.freeze | |
104 | 114 | |
105 | 115 | # Sanitize markup using the Sanitize library. |
106 | 116 | def call |
0 | begin | |
1 | require "linguist" | |
2 | rescue LoadError => _ | |
3 | abort "Missing dependency 'github-linguist' for SyntaxHighlightFilter. See README.md for details." | |
4 | end | |
0 | HTML::Pipeline.require_dependency('rouge', 'SyntaxHighlightFilter') | |
5 | 1 | |
6 | 2 | module HTML |
7 | 3 | class Pipeline |
8 | 4 | # HTML Filter that syntax highlights code blocks wrapped |
9 | 5 | # in <pre lang="...">. |
10 | 6 | class SyntaxHighlightFilter < Filter |
7 | def initialize(*args) | |
8 | super(*args) | |
9 | @formatter = Rouge::Formatters::HTML.new | |
10 | end | |
11 | ||
11 | 12 | def call |
12 | 13 | doc.search('pre').each do |node| |
13 | 14 | default = context[:highlight] && context[:highlight].to_s |
14 | 15 | next unless lang = node['lang'] || default |
15 | next unless lexer = Pygments::Lexer[lang] | |
16 | next unless lexer = lexer_for(lang) | |
16 | 17 | text = node.inner_text |
17 | 18 | |
18 | html = highlight_with_timeout_handling(lexer, text) | |
19 | html = highlight_with_timeout_handling(text, lang) | |
19 | 20 | next if html.nil? |
20 | 21 | |
21 | if (node = node.replace(html).first) | |
22 | klass = node["class"] | |
23 | klass = [klass, "highlight-#{lang}"].compact.join " " | |
22 | node.inner_html = html | |
23 | klass = node['class'] | |
24 | scope = context[:scope] || "highlight-#{lang}" | |
25 | klass = [klass, scope].compact.join ' ' | |
24 | 26 | |
25 | node["class"] = klass | |
26 | end | |
27 | node['class'] = klass | |
27 | 28 | end |
28 | 29 | doc |
29 | 30 | end |
30 | 31 | |
31 | def highlight_with_timeout_handling(lexer, text) | |
32 | lexer.highlight(text) | |
33 | rescue Timeout::Error => boom | |
32 | def highlight_with_timeout_handling(text, lang) | |
33 | Rouge.highlight(text, lang, @formatter) | |
34 | rescue Timeout::Error => _ | |
34 | 35 | nil |
36 | end | |
37 | ||
38 | def lexer_for(lang) | |
39 | Rouge::Lexer.find(lang) | |
35 | 40 | end |
36 | 41 | end |
37 | 42 | end |
3 | 3 | attr_reader :text |
4 | 4 | |
5 | 5 | def initialize(text, context = nil, result = nil) |
6 | raise TypeError, "text cannot be HTML" if text.is_a?(DocumentFragment) | |
6 | raise TypeError, 'text cannot be HTML' if text.is_a?(DocumentFragment) | |
7 | 7 | # Ensure that this is always a string |
8 | 8 | @text = text.respond_to?(:to_str) ? text.to_str : text.to_s |
9 | 9 | super nil, context, result |
10 | 10 | end |
11 | 11 | end |
12 | 12 | end |
13 | end⏎ | |
13 | end |
0 | begin | |
1 | require "redcloth" | |
2 | rescue LoadError => _ | |
3 | abort "Missing dependency 'RedCloth' for TextileFilter. See README.md for details." | |
4 | end | |
0 | HTML::Pipeline.require_dependency('redcloth', 'RedCloth') | |
5 | 1 | |
6 | 2 | module HTML |
7 | 3 | class Pipeline |
22 | 22 | # result[:output].to_s |
23 | 23 | # # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..." |
24 | 24 | class TableOfContentsFilter < Filter |
25 | PUNCTUATION_REGEXP = RUBY_VERSION > "1.9" ? /[^\p{Word}\- ]/u : /[^\w\- ]/ | |
25 | PUNCTUATION_REGEXP = RUBY_VERSION > '1.9' ? /[^\p{Word}\- ]/u : /[^\w\- ]/ | |
26 | ||
27 | # The icon that will be placed next to an anchored rendered markdown header | |
28 | def anchor_icon | |
29 | context[:anchor_icon] || '<span aria-hidden="true" class="octicon octicon-link"></span>' | |
30 | end | |
26 | 31 | |
27 | 32 | def call |
28 | result[:toc] = "" | |
33 | result[:toc] = '' | |
29 | 34 | |
30 | 35 | headers = Hash.new(0) |
31 | 36 | doc.css('h1, h2, h3, h4, h5, h6').each do |node| |
32 | 37 | text = node.text |
33 | id = text.downcase | |
38 | id = ascii_downcase(text) | |
34 | 39 | id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation |
35 | id.gsub!(' ', '-') # replace spaces with dash | |
40 | id.tr!(' ', '-') # replace spaces with dash | |
36 | 41 | |
37 | uniq = (headers[id] > 0) ? "-#{headers[id]}" : '' | |
42 | uniq = headers[id] > 0 ? "-#{headers[id]}" : '' | |
38 | 43 | headers[id] += 1 |
39 | 44 | if header_content = node.children.first |
40 | result[:toc] << %Q{<li><a href="##{id}#{uniq}">#{text}</a></li>\n} | |
41 | header_content.add_previous_sibling(%Q{<a id="#{id}#{uniq}" class="anchor" href="##{id}#{uniq}" aria-hidden="true"><span class="octicon octicon-link"></span></a>}) | |
45 | result[:toc] << %(<li><a href="##{id}#{uniq}">#{text}</a></li>\n) | |
46 | header_content.add_previous_sibling(%(<a id="#{id}#{uniq}" class="anchor" href="##{id}#{uniq}" aria-hidden="true">#{anchor_icon}</a>)) | |
42 | 47 | end |
43 | 48 | end |
44 | result[:toc] = %Q{<ul class="section-nav">\n#{result[:toc]}</ul>} unless result[:toc].empty? | |
49 | result[:toc] = %(<ul class="section-nav">\n#{result[:toc]}</ul>) unless result[:toc].empty? | |
45 | 50 | doc |
51 | end | |
52 | ||
53 | if RUBY_VERSION >= '2.4' | |
54 | def ascii_downcase(str) | |
55 | str.downcase(:ascii) | |
56 | end | |
57 | else | |
58 | def ascii_downcase(str) | |
59 | str.downcase | |
60 | end | |
46 | 61 | end |
47 | 62 | end |
48 | 63 | end |
0 | require "nokogiri" | |
1 | require "active_support/xml_mini/nokogiri" # convert Documents to hashes | |
0 | require 'nokogiri' | |
1 | require 'active_support/xml_mini/nokogiri' # convert Documents to hashes | |
2 | 2 | |
3 | 3 | module HTML |
4 | 4 | # GitHub HTML processing filters and utilities. This module includes a small |
31 | 31 | autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter' |
32 | 32 | autoload :EmojiFilter, 'html/pipeline/emoji_filter' |
33 | 33 | autoload :HttpsFilter, 'html/pipeline/https_filter' |
34 | autoload :ImageFilter, 'html/pipeline/image_filter' | |
34 | 35 | autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter' |
35 | 36 | autoload :MarkdownFilter, 'html/pipeline/markdown_filter' |
36 | 37 | autoload :MentionFilter, 'html/pipeline/@mention_filter' |
41 | 42 | autoload :TableOfContentsFilter, 'html/pipeline/toc_filter' |
42 | 43 | autoload :TextFilter, 'html/pipeline/text_filter' |
43 | 44 | |
45 | class MissingDependencyError < RuntimeError; end | |
46 | def self.require_dependency(name, requirer) | |
47 | require name | |
48 | rescue LoadError => e | |
49 | raise MissingDependencyError, | |
50 | "Missing dependency '#{name}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}" | |
51 | end | |
52 | ||
44 | 53 | # Our DOM implementation. |
45 | 54 | DocumentFragment = Nokogiri::HTML::DocumentFragment |
46 | 55 | |
65 | 74 | # Public: String name for this Pipeline. Defaults to Class name. |
66 | 75 | attr_writer :instrumentation_name |
67 | 76 | def instrumentation_name |
68 | @instrumentation_name || self.class.name | |
77 | return @instrumentation_name if defined?(@instrumentation_name) | |
78 | @instrumentation_name = self.class.name | |
69 | 79 | end |
70 | 80 | |
71 | 81 | class << self |
74 | 84 | end |
75 | 85 | |
76 | 86 | def initialize(filters, default_context = {}, result_class = nil) |
77 | raise ArgumentError, "default_context cannot be nil" if default_context.nil? | |
87 | raise ArgumentError, 'default_context cannot be nil' if default_context.nil? | |
78 | 88 | @filters = filters.flatten.freeze |
79 | 89 | @default_context = default_context.freeze |
80 | 90 | @result_class = result_class || Hash |
97 | 107 | context = @default_context.merge(context) |
98 | 108 | context = context.freeze |
99 | 109 | result ||= @result_class.new |
100 | payload = default_payload :filters => @filters.map(&:name), | |
101 | :context => context, :result => result | |
102 | instrument "call_pipeline.html_pipeline", payload do | |
110 | payload = default_payload filters: @filters.map(&:name), | |
111 | context: context, result: result | |
112 | instrument 'call_pipeline.html_pipeline', payload do | |
103 | 113 | result[:output] = |
104 | 114 | @filters.inject(html) do |doc, filter| |
105 | 115 | perform_filter(filter, doc, context, result) |
114 | 124 | # |
115 | 125 | # Returns the result of the filter. |
116 | 126 | def perform_filter(filter, doc, context, result) |
117 | payload = default_payload :filter => filter.name, | |
118 | :context => context, :result => result | |
119 | instrument "call_filter.html_pipeline", payload do | |
127 | payload = default_payload filter: filter.name, | |
128 | context: context, result: result | |
129 | instrument 'call_filter.html_pipeline', payload do | |
120 | 130 | filter.call(doc, context, result) |
121 | 131 | end |
122 | 132 | end |
167 | 177 | # |
168 | 178 | # Returns a Hash. |
169 | 179 | def default_payload(payload = {}) |
170 | {:pipeline => instrumentation_name}.merge(payload) | |
180 | { pipeline: instrumentation_name }.merge(payload) | |
171 | 181 | end |
172 | 182 | end |
173 | 183 | end |
174 | 184 | |
175 | 185 | # XXX nokogiri monkey patches for 1.8 |
176 | if not ''.respond_to?(:force_encoding) | |
186 | unless ''.respond_to?(:force_encoding) | |
177 | 187 | class Nokogiri::XML::Node |
178 | 188 | # Work around an issue with utf-8 encoded data being erroneously converted to |
179 | 189 | # ... some other shit when replacing text nodes. See 'utf-8 output 2' in |
185 | 195 | replace_without_encoding_fix(replacement) |
186 | 196 | end |
187 | 197 | |
188 | alias_method :replace_without_encoding_fix, :replace | |
189 | alias_method :replace, :replace_with_encoding_fix | |
198 | alias replace_without_encoding_fix replace | |
199 | alias replace replace_with_encoding_fix | |
190 | 200 | |
191 | 201 | def swap(replacement) |
192 | 202 | replace(replacement) |
0 | --- !ruby/object:Gem::Specification | |
1 | name: html-pipeline | |
2 | version: !ruby/object:Gem::Version | |
3 | version: 1.11.0 | |
4 | platform: ruby | |
5 | authors: | |
6 | - Ryan Tomayko | |
7 | - Jerry Cheung | |
8 | autorequire: | |
9 | bindir: bin | |
10 | cert_chain: [] | |
11 | date: 2014-09-15 00:00:00.000000000 Z | |
12 | dependencies: | |
13 | - !ruby/object:Gem::Dependency | |
14 | name: nokogiri | |
15 | requirement: !ruby/object:Gem::Requirement | |
16 | requirements: | |
17 | - - ~> | |
18 | - !ruby/object:Gem::Version | |
19 | version: '1.4' | |
20 | type: :runtime | |
21 | prerelease: false | |
22 | version_requirements: !ruby/object:Gem::Requirement | |
23 | requirements: | |
24 | - - ~> | |
25 | - !ruby/object:Gem::Version | |
26 | version: '1.4' | |
27 | - !ruby/object:Gem::Dependency | |
28 | name: activesupport | |
29 | requirement: !ruby/object:Gem::Requirement | |
30 | requirements: | |
31 | - - '>=' | |
32 | - !ruby/object:Gem::Version | |
33 | version: '2' | |
34 | type: :runtime | |
35 | prerelease: false | |
36 | version_requirements: !ruby/object:Gem::Requirement | |
37 | requirements: | |
38 | - - '>=' | |
39 | - !ruby/object:Gem::Version | |
40 | version: '2' | |
41 | description: GitHub HTML processing filters and utilities | |
42 | email: | |
43 | - ryan@github.com | |
44 | - jerry@github.com | |
45 | executables: [] | |
46 | extensions: [] | |
47 | extra_rdoc_files: [] | |
48 | files: | |
49 | - .gitignore | |
50 | - .travis.yml | |
51 | - CHANGELOG.md | |
52 | - CONTRIBUTING.md | |
53 | - Gemfile | |
54 | - LICENSE | |
55 | - README.md | |
56 | - Rakefile | |
57 | - bin/html-pipeline | |
58 | - html-pipeline.gemspec | |
59 | - lib/html/pipeline.rb | |
60 | - lib/html/pipeline/@mention_filter.rb | |
61 | - lib/html/pipeline/absolute_source_filter.rb | |
62 | - lib/html/pipeline/autolink_filter.rb | |
63 | - lib/html/pipeline/body_content.rb | |
64 | - lib/html/pipeline/camo_filter.rb | |
65 | - lib/html/pipeline/email_reply_filter.rb | |
66 | - lib/html/pipeline/emoji_filter.rb | |
67 | - lib/html/pipeline/filter.rb | |
68 | - lib/html/pipeline/https_filter.rb | |
69 | - lib/html/pipeline/image_max_width_filter.rb | |
70 | - lib/html/pipeline/markdown_filter.rb | |
71 | - lib/html/pipeline/plain_text_input_filter.rb | |
72 | - lib/html/pipeline/sanitization_filter.rb | |
73 | - lib/html/pipeline/syntax_highlight_filter.rb | |
74 | - lib/html/pipeline/text_filter.rb | |
75 | - lib/html/pipeline/textile_filter.rb | |
76 | - lib/html/pipeline/toc_filter.rb | |
77 | - lib/html/pipeline/version.rb | |
78 | - script/package | |
79 | - script/release | |
80 | - test/helpers/mocked_instrumentation_service.rb | |
81 | - test/html/pipeline/absolute_source_filter_test.rb | |
82 | - test/html/pipeline/autolink_filter_test.rb | |
83 | - test/html/pipeline/camo_filter_test.rb | |
84 | - test/html/pipeline/emoji_filter_test.rb | |
85 | - test/html/pipeline/https_filter_test.rb | |
86 | - test/html/pipeline/image_max_width_filter_test.rb | |
87 | - test/html/pipeline/markdown_filter_test.rb | |
88 | - test/html/pipeline/mention_filter_test.rb | |
89 | - test/html/pipeline/plain_text_input_filter_test.rb | |
90 | - test/html/pipeline/sanitization_filter_test.rb | |
91 | - test/html/pipeline/syntax_highlight_filter_test.rb | |
92 | - test/html/pipeline/toc_filter_test.rb | |
93 | - test/html/pipeline_test.rb | |
94 | - test/test_helper.rb | |
95 | homepage: https://github.com/jch/html-pipeline | |
96 | licenses: | |
97 | - MIT | |
98 | metadata: {} | |
99 | post_install_message: | | |
100 | ------------------------------------------------- | |
101 | Thank you for installing html-pipeline! | |
102 | You must bundle Filter gem dependencies. | |
103 | See html-pipeline README.md for more details. | |
104 | https://github.com/jch/html-pipeline#dependencies | |
105 | ------------------------------------------------- | |
106 | rdoc_options: [] | |
107 | require_paths: | |
108 | - lib | |
109 | required_ruby_version: !ruby/object:Gem::Requirement | |
110 | requirements: | |
111 | - - '>=' | |
112 | - !ruby/object:Gem::Version | |
113 | version: '0' | |
114 | required_rubygems_version: !ruby/object:Gem::Requirement | |
115 | requirements: | |
116 | - - '>=' | |
117 | - !ruby/object:Gem::Version | |
118 | version: '0' | |
119 | requirements: [] | |
120 | rubyforge_project: | |
121 | rubygems_version: 2.0.14 | |
122 | signing_key: | |
123 | specification_version: 4 | |
124 | summary: Helpers for processing content through a chain of filters | |
125 | test_files: | |
126 | - test/helpers/mocked_instrumentation_service.rb | |
127 | - test/html/pipeline/absolute_source_filter_test.rb | |
128 | - test/html/pipeline/autolink_filter_test.rb | |
129 | - test/html/pipeline/camo_filter_test.rb | |
130 | - test/html/pipeline/emoji_filter_test.rb | |
131 | - test/html/pipeline/https_filter_test.rb | |
132 | - test/html/pipeline/image_max_width_filter_test.rb | |
133 | - test/html/pipeline/markdown_filter_test.rb | |
134 | - test/html/pipeline/mention_filter_test.rb | |
135 | - test/html/pipeline/plain_text_input_filter_test.rb | |
136 | - test/html/pipeline/sanitization_filter_test.rb | |
137 | - test/html/pipeline/syntax_highlight_filter_test.rb | |
138 | - test/html/pipeline/toc_filter_test.rb | |
139 | - test/html/pipeline_test.rb | |
140 | - test/test_helper.rb |
0 | #!/usr/bin/env bash | |
1 | # Usage: script/gem | |
2 | # Updates the gemspec and builds a new gem in the pkg directory. | |
3 | ||
4 | mkdir -p pkg | |
5 | gem build *.gemspec | |
6 | mv *.gem pkg |
0 | #!/usr/bin/env bash | |
1 | # Usage: script/release | |
2 | # Build the package, tag a commit, push it to origin, and then release the | |
3 | # package publicly. | |
4 | ||
5 | set -e | |
6 | ||
7 | version="$(script/package | grep Version: | awk '{print $2}')" | |
8 | [ -n "$version" ] || exit 1 | |
9 | ||
10 | echo $version | |
11 | git commit --allow-empty -a -m "Release $version" | |
12 | git tag "v$version" | |
13 | git push origin | |
14 | git push origin "v$version" | |
15 | gem push pkg/*-${version}.gem |
0 | class MockedInstrumentationService | |
1 | attr_reader :events | |
2 | def initialize(event = nil, events = []) | |
3 | @events = events | |
4 | subscribe event | |
5 | end | |
6 | def instrument(event, payload = nil) | |
7 | payload ||= {} | |
8 | res = yield payload | |
9 | events << [event, payload, res] if @subscribe == event | |
10 | res | |
11 | end | |
12 | def subscribe(event) | |
13 | @subscribe = event | |
14 | @events | |
15 | end | |
16 | end |
0 | require "test_helper" | |
1 | ||
2 | class HTML::Pipeline::AbsoluteSourceFilterTest < Minitest::Test | |
3 | AbsoluteSourceFilter = HTML::Pipeline::AbsoluteSourceFilter | |
4 | ||
5 | def setup | |
6 | @image_base_url = 'http://assets.example.com' | |
7 | @image_subpage_url = 'http://blog.example.com/a/post' | |
8 | @options = { | |
9 | :image_base_url => @image_base_url, | |
10 | :image_subpage_url => @image_subpage_url | |
11 | } | |
12 | end | |
13 | ||
14 | def test_rewrites_root_urls | |
15 | orig = %(<p><img src="/img.png"></p>) | |
16 | assert_equal "<p><img src=\"#{@image_base_url}/img.png\"></p>", | |
17 | AbsoluteSourceFilter.call(orig, @options).to_s | |
18 | end | |
19 | ||
20 | def test_rewrites_relative_urls | |
21 | orig = %(<p><img src="post/img.png"></p>) | |
22 | assert_equal "<p><img src=\"#{@image_subpage_url}/img.png\"></p>", | |
23 | AbsoluteSourceFilter.call(orig, @options).to_s | |
24 | end | |
25 | ||
26 | def test_does_not_rewrite_absolute_urls | |
27 | orig = %(<p><img src="http://other.example.com/img.png"></p>) | |
28 | result = AbsoluteSourceFilter.call(orig, @options).to_s | |
29 | refute_match /@image_base_url/, result | |
30 | refute_match /@image_subpage_url/, result | |
31 | end | |
32 | ||
33 | def test_fails_when_context_is_missing | |
34 | assert_raises RuntimeError do | |
35 | AbsoluteSourceFilter.call("<img src=\"img.png\">", {}) | |
36 | end | |
37 | assert_raises RuntimeError do | |
38 | AbsoluteSourceFilter.call("<img src=\"/img.png\">", {}) | |
39 | end | |
40 | end | |
41 | ||
42 | def test_tells_you_where_context_is_required | |
43 | exception = assert_raises(RuntimeError) { | |
44 | AbsoluteSourceFilter.call("<img src=\"img.png\">", {}) | |
45 | } | |
46 | assert_match 'HTML::Pipeline::AbsoluteSourceFilter', exception.message | |
47 | ||
48 | exception = assert_raises(RuntimeError) { | |
49 | AbsoluteSourceFilter.call("<img src=\"/img.png\">", {}) | |
50 | } | |
51 | assert_match 'HTML::Pipeline::AbsoluteSourceFilter', exception.message | |
52 | end | |
53 | ||
54 | end |
0 | require "test_helper" | |
1 | ||
2 | AutolinkFilter = HTML::Pipeline::AutolinkFilter | |
3 | ||
4 | class HTML::Pipeline::AutolinkFilterTest < Minitest::Test | |
5 | def test_uses_rinku_for_autolinking | |
6 | # just try to parse a complicated piece of HTML | |
7 | # that Rails auto_link cannot handle | |
8 | assert_equal '<p>"<a href="http://www.github.com">http://www.github.com</a>"</p>', | |
9 | AutolinkFilter.to_html('<p>"http://www.github.com"</p>') | |
10 | end | |
11 | ||
12 | def test_autolink_option | |
13 | assert_equal '<p>"http://www.github.com"</p>', | |
14 | AutolinkFilter.to_html('<p>"http://www.github.com"</p>', :autolink => false) | |
15 | end | |
16 | ||
17 | def test_autolink_link_attr | |
18 | assert_equal '<p>"<a href="http://www.github.com" target="_blank">http://www.github.com</a>"</p>', | |
19 | AutolinkFilter.to_html('<p>"http://www.github.com"</p>', :link_attr => 'target="_blank"') | |
20 | end | |
21 | ||
22 | def test_autolink_flags | |
23 | assert_equal '<p>"<a href="http://github">http://github</a>"</p>', | |
24 | AutolinkFilter.to_html('<p>"http://github"</p>', :flags => Rinku::AUTOLINK_SHORT_DOMAINS) | |
25 | end | |
26 | ||
27 | def test_autolink_skip_tags | |
28 | assert_equal '<code>"http://github.com"</code>', | |
29 | AutolinkFilter.to_html('<code>"http://github.com"</code>') | |
30 | ||
31 | assert_equal '<code>"<a href="http://github.com">http://github.com</a>"</code>', | |
32 | AutolinkFilter.to_html('<code>"http://github.com"</code>', :skip_tags => %w(kbd script)) | |
33 | end | |
34 | end |
0 | require "test_helper" | |
1 | ||
2 | class HTML::Pipeline::CamoFilterTest < Minitest::Test | |
3 | CamoFilter = HTML::Pipeline::CamoFilter | |
4 | ||
5 | def setup | |
6 | @asset_proxy_url = 'https//assets.example.org' | |
7 | @asset_proxy_secret_key = 'ssssh-secret' | |
8 | @options = { | |
9 | :asset_proxy => @asset_proxy_url, | |
10 | :asset_proxy_secret_key => @asset_proxy_secret_key, | |
11 | :asset_proxy_whitelist => [/(^|\.)github\.com$/] | |
12 | } | |
13 | end | |
14 | ||
15 | def test_asset_proxy_disabled | |
16 | orig = %(<p><img src="http://twitter.com/img.png"></p>) | |
17 | assert_equal orig, | |
18 | CamoFilter.call(orig, @options.merge(:disable_asset_proxy => true)).to_s | |
19 | end | |
20 | ||
21 | def test_camouflaging_http_image_urls | |
22 | orig = %(<p><img src="http://twitter.com/img.png"></p>) | |
23 | assert_equal %(<p><img src="https//assets.example.org/a5ad43494e343b20d745586282be61ff530e6fa0/687474703a2f2f747769747465722e636f6d2f696d672e706e67" data-canonical-src="http://twitter.com/img.png"></p>), | |
24 | CamoFilter.call(orig, @options).to_s | |
25 | end | |
26 | ||
27 | def test_doesnt_rewrite_dotcom_image_urls | |
28 | orig = %(<p><img src="https://github.com/img.png"></p>) | |
29 | assert_equal orig, CamoFilter.call(orig, @options).to_s | |
30 | end | |
31 | ||
32 | def test_doesnt_rewrite_dotcom_subdomain_image_urls | |
33 | orig = %(<p><img src="https://raw.github.com/img.png"></p>) | |
34 | assert_equal orig, CamoFilter.call(orig, @options).to_s | |
35 | end | |
36 | ||
37 | def test_doesnt_rewrite_dotcom_subsubdomain_image_urls | |
38 | orig = %(<p><img src="https://f.assets.github.com/img.png"></p>) | |
39 | assert_equal orig, CamoFilter.call(orig, @options).to_s | |
40 | end | |
41 | ||
42 | def test_camouflaging_github_prefixed_image_urls | |
43 | orig = %(<p><img src="https://notgithub.com/img.png"></p>) | |
44 | assert_equal %(<p><img src="https//assets.example.org/5d4a96c69713f850520538e04cb9661035cfb534/68747470733a2f2f6e6f746769746875622e636f6d2f696d672e706e67" data-canonical-src="https://notgithub.com/img.png"></p>), | |
45 | CamoFilter.call(orig, @options).to_s | |
46 | end | |
47 | ||
48 | def test_doesnt_rewrite_absolute_image_urls | |
49 | orig = %(<p><img src="/img.png"></p>) | |
50 | assert_equal orig, CamoFilter.call(orig, @options).to_s | |
51 | end | |
52 | ||
53 | def test_doesnt_rewrite_relative_image_urls | |
54 | orig = %(<p><img src="img.png"></p>) | |
55 | assert_equal orig, CamoFilter.call(orig, @options).to_s | |
56 | end | |
57 | ||
58 | def test_camouflaging_https_image_urls | |
59 | orig = %(<p><img src="https://foo.com/img.png"></p>) | |
60 | assert_equal %(<p><img src="https//assets.example.org/3c5c6dc74fd6592d2596209dfcb8b7e5461383c8/68747470733a2f2f666f6f2e636f6d2f696d672e706e67" data-canonical-src="https://foo.com/img.png"></p>), | |
61 | CamoFilter.call(orig, @options).to_s | |
62 | end | |
63 | ||
64 | def test_handling_images_with_no_src_attribute | |
65 | orig = %(<p><img></p>) | |
66 | assert_equal orig, CamoFilter.call(orig, @options).to_s | |
67 | end | |
68 | ||
69 | def test_required_context_validation | |
70 | exception = assert_raises(ArgumentError) { | |
71 | CamoFilter.call("", {}) | |
72 | } | |
73 | assert_match /:asset_proxy[^_]/, exception.message | |
74 | assert_match /:asset_proxy_secret_key/, exception.message | |
75 | end | |
76 | end |
0 | require 'test_helper' | |
1 | ||
2 | class HTML::Pipeline::EmojiFilterTest < Minitest::Test | |
3 | EmojiFilter = HTML::Pipeline::EmojiFilter | |
4 | ||
5 | def test_emojify | |
6 | filter = EmojiFilter.new("<p>:shipit:</p>", {:asset_root => 'https://foo.com'}) | |
7 | doc = filter.call | |
8 | assert_match "https://foo.com/emoji/shipit.png", doc.search('img').attr('src').value | |
9 | end | |
10 | ||
11 | def test_emojify_on_string | |
12 | filter = EmojiFilter.new(":shipit:", {:asset_root => 'https://foo.com'}) | |
13 | doc = filter.call | |
14 | assert_match "https://foo.com/emoji/shipit.png", doc.search('img').attr('src').value | |
15 | end | |
16 | ||
17 | def test_uri_encoding | |
18 | filter = EmojiFilter.new("<p>:+1:</p>", {:asset_root => 'https://foo.com'}) | |
19 | doc = filter.call | |
20 | assert_match "https://foo.com/emoji/%2B1.png", doc.search('img').attr('src').value | |
21 | end | |
22 | ||
23 | def test_required_context_validation | |
24 | exception = assert_raises(ArgumentError) { | |
25 | EmojiFilter.call("", {}) | |
26 | } | |
27 | assert_match /:asset_root/, exception.message | |
28 | end | |
29 | ||
30 | def test_custom_asset_path | |
31 | filter = EmojiFilter.new("<p>:+1:</p>", {:asset_path => ':file_name', :asset_root => 'https://foo.com'}) | |
32 | doc = filter.call | |
33 | assert_match "https://foo.com/%2B1.png", doc.search('img').attr('src').value | |
34 | end | |
35 | end |
0 | require "test_helper" | |
1 | ||
2 | HttpsFilter = HTML::Pipeline::HttpsFilter | |
3 | ||
4 | class HTML::Pipeline::AutolinkFilterTest < Minitest::Test | |
5 | def filter(html) | |
6 | HttpsFilter.to_html(html, @options) | |
7 | end | |
8 | ||
9 | def setup | |
10 | @options = {:base_url => "http://github.com"} | |
11 | end | |
12 | ||
13 | def test_http | |
14 | assert_equal %(<a href="https://github.com">github.com</a>), | |
15 | filter(%(<a href="http://github.com">github.com</a>)) | |
16 | end | |
17 | ||
18 | def test_https | |
19 | assert_equal %(<a href="https://github.com">github.com</a>), | |
20 | filter(%(<a href="https://github.com">github.com</a>)) | |
21 | end | |
22 | ||
23 | def test_subdomain | |
24 | assert_equal %(<a href="http://help.github.com">github.com</a>), | |
25 | filter(%(<a href="http://help.github.com">github.com</a>)) | |
26 | end | |
27 | ||
28 | def test_other | |
29 | assert_equal %(<a href="http://github.io">github.io</a>), | |
30 | filter(%(<a href="http://github.io">github.io</a>)) | |
31 | end | |
32 | ||
33 | def test_uses_http_url_over_base_url | |
34 | @options = {:http_url => "http://github.com", :base_url => "https://github.com"} | |
35 | ||
36 | assert_equal %(<a href="https://github.com">github.com</a>), | |
37 | filter(%(<a href="http://github.com">github.com</a>)) | |
38 | end | |
39 | ||
40 | def test_only_http_url | |
41 | @options = {:http_url => "http://github.com"} | |
42 | ||
43 | assert_equal %(<a href="https://github.com">github.com</a>), | |
44 | filter(%(<a href="http://github.com">github.com</a>)) | |
45 | end | |
46 | ||
47 | def test_validates_http_url | |
48 | @options.clear | |
49 | exception = assert_raises(ArgumentError) { filter("") } | |
50 | assert_match "HTML::Pipeline::HttpsFilter: :http_url", exception.message | |
51 | end | |
52 | end |
0 | require "test_helper" | |
1 | ||
2 | class HTML::Pipeline::ImageMaxWidthFilterTest < Minitest::Test | |
3 | def filter(html) | |
4 | HTML::Pipeline::ImageMaxWidthFilter.call(html) | |
5 | end | |
6 | ||
7 | def test_rewrites_image_style_tags | |
8 | body = "<p>Screenshot: <img src='screenshot.png'></p>" | |
9 | doc = Nokogiri::HTML::DocumentFragment.parse(body) | |
10 | ||
11 | res = filter(doc) | |
12 | assert_equal_html %q(<p>Screenshot: <a target="_blank" href="screenshot.png"><img src="screenshot.png" style="max-width:100%;"></a></p>), | |
13 | res.to_html | |
14 | end | |
15 | ||
16 | def test_leaves_existing_image_style_tags_alone | |
17 | body = "<p><img src='screenshot.png' style='width:100px;'></p>" | |
18 | doc = Nokogiri::HTML::DocumentFragment.parse(body) | |
19 | ||
20 | res = filter(doc) | |
21 | assert_equal_html '<p><img src="screenshot.png" style="width:100px;"></p>', | |
22 | res.to_html | |
23 | end | |
24 | ||
25 | def test_links_to_image | |
26 | body = "<p>Screenshot: <img src='screenshot.png'></p>" | |
27 | doc = Nokogiri::HTML::DocumentFragment.parse(body) | |
28 | ||
29 | res = filter(doc) | |
30 | assert_equal_html '<p>Screenshot: <a target="_blank" href="screenshot.png"><img src="screenshot.png" style="max-width:100%;"></a></p>', | |
31 | res.to_html | |
32 | end | |
33 | ||
34 | def test_doesnt_link_to_image_when_already_linked | |
35 | body = "<p>Screenshot: <a href='blah.png'><img src='screenshot.png'></a></p>" | |
36 | doc = Nokogiri::HTML::DocumentFragment.parse(body) | |
37 | ||
38 | res = filter(doc) | |
39 | assert_equal_html %q(<p>Screenshot: <a href="blah.png"><img src="screenshot.png" style="max-width:100%;"></a></p>), | |
40 | res.to_html | |
41 | end | |
42 | ||
43 | def test_doesnt_screw_up_inlined_images | |
44 | body = "<p>Screenshot <img src='screenshot.png'>, yes, this is a <b>screenshot</b> indeed.</p>" | |
45 | doc = Nokogiri::HTML::DocumentFragment.parse(body) | |
46 | ||
47 | assert_equal_html %q(<p>Screenshot <a target="_blank" href="screenshot.png"><img src="screenshot.png" style="max-width:100%;"></a>, yes, this is a <b>screenshot</b> indeed.</p>), filter(doc).to_html | |
48 | end | |
49 | end |
0 | require "test_helper" | |
1 | ||
2 | MarkdownFilter = HTML::Pipeline::MarkdownFilter | |
3 | ||
4 | class HTML::Pipeline::MarkdownFilterTest < Minitest::Test | |
5 | def setup | |
6 | @haiku = | |
7 | "Pointing at the moon\n" + | |
8 | "Reminded of simple things\n" + | |
9 | "Moments matter most" | |
10 | @links = | |
11 | "See http://example.org/ for more info" | |
12 | @code = | |
13 | "```\n" + | |
14 | "def hello()" + | |
15 | " 'world'" + | |
16 | "end" + | |
17 | "```" | |
18 | end | |
19 | ||
20 | def test_fails_when_given_a_documentfragment | |
21 | body = "<p>heyo</p>" | |
22 | doc = HTML::Pipeline.parse(body) | |
23 | assert_raises(TypeError) { MarkdownFilter.call(doc, {}) } | |
24 | end | |
25 | ||
26 | def test_gfm_enabled_by_default | |
27 | doc = MarkdownFilter.to_document(@haiku, {}) | |
28 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment) | |
29 | assert_equal 2, doc.search('br').size | |
30 | end | |
31 | ||
32 | def test_disabling_gfm | |
33 | doc = MarkdownFilter.to_document(@haiku, :gfm => false) | |
34 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment) | |
35 | assert_equal 0, doc.search('br').size | |
36 | end | |
37 | ||
38 | def test_fenced_code_blocks | |
39 | doc = MarkdownFilter.to_document(@code) | |
40 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment) | |
41 | assert_equal 1, doc.search('pre').size | |
42 | end | |
43 | ||
44 | def test_fenced_code_blocks_with_language | |
45 | doc = MarkdownFilter.to_document(@code.sub("```", "``` ruby")) | |
46 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment) | |
47 | assert_equal 1, doc.search('pre').size | |
48 | assert_equal 'ruby', doc.search('pre').first['lang'] | |
49 | end | |
50 | end | |
51 | ||
52 | class GFMTest < Minitest::Test | |
53 | def gfm(text) | |
54 | MarkdownFilter.call(text, :gfm => true) | |
55 | end | |
56 | ||
57 | def test_not_touch_single_underscores_inside_words | |
58 | assert_equal "<p>foo_bar</p>", | |
59 | gfm("foo_bar") | |
60 | end | |
61 | ||
62 | def test_not_touch_underscores_in_code_blocks | |
63 | assert_equal "<pre><code>foo_bar_baz\n</code></pre>", | |
64 | gfm(" foo_bar_baz") | |
65 | end | |
66 | ||
67 | def test_not_touch_underscores_in_pre_blocks | |
68 | assert_equal "<pre>\nfoo_bar_baz\n</pre>", | |
69 | gfm("<pre>\nfoo_bar_baz\n</pre>") | |
70 | end | |
71 | ||
72 | def test_not_touch_two_or_more_underscores_inside_words | |
73 | assert_equal "<p>foo_bar_baz</p>", | |
74 | gfm("foo_bar_baz") | |
75 | end | |
76 | ||
77 | def test_turn_newlines_into_br_tags_in_simple_cases | |
78 | assert_equal "<p>foo<br>\nbar</p>", | |
79 | gfm("foo\nbar") | |
80 | end | |
81 | ||
82 | def test_convert_newlines_in_all_groups | |
83 | assert_equal "<p>apple<br>\npear<br>\norange</p>\n\n" + | |
84 | "<p>ruby<br>\npython<br>\nerlang</p>", | |
85 | gfm("apple\npear\norange\n\nruby\npython\nerlang") | |
86 | end | |
87 | ||
88 | def test_convert_newlines_in_even_long_groups | |
89 | assert_equal "<p>apple<br>\npear<br>\norange<br>\nbanana</p>\n\n" + | |
90 | "<p>ruby<br>\npython<br>\nerlang</p>", | |
91 | gfm("apple\npear\norange\nbanana\n\nruby\npython\nerlang") | |
92 | end | |
93 | ||
94 | def test_not_convert_newlines_in_lists | |
95 | assert_equal "<h1>foo</h1>\n\n<h1>bar</h1>", | |
96 | gfm("# foo\n# bar") | |
97 | assert_equal "<ul>\n<li>foo</li>\n<li>bar</li>\n</ul>", | |
98 | gfm("* foo\n* bar") | |
99 | end | |
100 | end |
0 | require "test_helper" | |
1 | ||
2 | class HTML::Pipeline::MentionFilterTest < Minitest::Test | |
3 | def filter(html, base_url='/', info_url=nil) | |
4 | HTML::Pipeline::MentionFilter.call(html, :base_url => base_url, :info_url => info_url) | |
5 | end | |
6 | ||
7 | def test_filtering_a_documentfragment | |
8 | body = "<p>@kneath: check it out.</p>" | |
9 | doc = Nokogiri::HTML::DocumentFragment.parse(body) | |
10 | ||
11 | res = filter(doc, '/') | |
12 | assert_same doc, res | |
13 | ||
14 | link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>" | |
15 | assert_equal "<p>#{link}: check it out.</p>", | |
16 | res.to_html | |
17 | end | |
18 | ||
19 | def test_filtering_plain_text | |
20 | body = "<p>@kneath: check it out.</p>" | |
21 | res = filter(body, '/') | |
22 | ||
23 | link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>" | |
24 | assert_equal "<p>#{link}: check it out.</p>", | |
25 | res.to_html | |
26 | end | |
27 | ||
28 | def test_not_replacing_mentions_in_pre_tags | |
29 | body = "<pre>@kneath: okay</pre>" | |
30 | assert_equal body, filter(body).to_html | |
31 | end | |
32 | ||
33 | def test_not_replacing_mentions_in_code_tags | |
34 | body = "<p><code>@kneath:</code> okay</p>" | |
35 | assert_equal body, filter(body).to_html | |
36 | end | |
37 | ||
38 | def test_not_replacing_mentions_in_style_tags | |
39 | body = "<style>@media (min-width: 768px) { color: red; }</style>" | |
40 | assert_equal body, filter(body).to_html | |
41 | end | |
42 | ||
43 | def test_not_replacing_mentions_in_links | |
44 | body = "<p><a>@kneath</a> okay</p>" | |
45 | assert_equal body, filter(body).to_html | |
46 | end | |
47 | ||
48 | def test_entity_encoding_and_whatnot | |
49 | body = "<p>@kneath what's up</p>" | |
50 | link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>" | |
51 | assert_equal "<p>#{link} what's up</p>", filter(body, '/').to_html | |
52 | end | |
53 | ||
54 | def test_html_injection | |
55 | body = "<p>@kneath <script>alert(0)</script></p>" | |
56 | link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>" | |
57 | assert_equal "<p>#{link} <script>alert(0)</script></p>", | |
58 | filter(body, '/').to_html | |
59 | end | |
60 | ||
61 | def test_links_to_nothing_when_no_info_url_given | |
62 | body = "<p>How do I @mention someone?</p>" | |
63 | assert_equal "<p>How do I @mention someone?</p>", | |
64 | filter(body, '/').to_html | |
65 | end | |
66 | ||
67 | def test_links_to_more_info_when_info_url_given | |
68 | body = "<p>How do I @mention someone?</p>" | |
69 | link = "<a href=\"https://github.com/blog/821\" class=\"user-mention\">@mention</a>" | |
70 | assert_equal "<p>How do I #{link} someone?</p>", | |
71 | filter(body, '/', 'https://github.com/blog/821').to_html | |
72 | end | |
73 | ||
74 | MarkdownPipeline = | |
75 | HTML::Pipeline.new [ | |
76 | HTML::Pipeline::MarkdownFilter, | |
77 | HTML::Pipeline::MentionFilter | |
78 | ] | |
79 | ||
80 | def mentioned_usernames | |
81 | result = {} | |
82 | MarkdownPipeline.call(@body, {}, result) | |
83 | result[:mentioned_usernames] | |
84 | end | |
85 | ||
86 | def test_matches_usernames_in_body | |
87 | @body = "@test how are you?" | |
88 | assert_equal %w[test], mentioned_usernames | |
89 | end | |
90 | ||
91 | def test_matches_usernames_with_dashes | |
92 | @body = "hi @some-user" | |
93 | assert_equal %w[some-user], mentioned_usernames | |
94 | end | |
95 | ||
96 | def test_matches_usernames_followed_by_a_single_dot | |
97 | @body = "okay @some-user." | |
98 | assert_equal %w[some-user], mentioned_usernames | |
99 | end | |
100 | ||
101 | def test_matches_usernames_followed_by_multiple_dots | |
102 | @body = "okay @some-user..." | |
103 | assert_equal %w[some-user], mentioned_usernames | |
104 | end | |
105 | ||
106 | def test_does_not_match_email_addresses | |
107 | @body = "aman@tmm1.net" | |
108 | assert_equal [], mentioned_usernames | |
109 | end | |
110 | ||
111 | def test_does_not_match_domain_name_looking_things | |
112 | @body = "we need a @github.com email" | |
113 | assert_equal [], mentioned_usernames | |
114 | end | |
115 | ||
116 | def test_does_not_match_organization_team_mentions | |
117 | @body = "we need to @github/enterprise know" | |
118 | assert_equal [], mentioned_usernames | |
119 | end | |
120 | ||
121 | def test_matches_colon_suffixed_names | |
122 | @body = "@tmm1: what do you think?" | |
123 | assert_equal %w[tmm1], mentioned_usernames | |
124 | end | |
125 | ||
126 | def test_matches_list_of_names | |
127 | @body = "@defunkt @atmos @kneath" | |
128 | assert_equal %w[defunkt atmos kneath], mentioned_usernames | |
129 | end | |
130 | ||
131 | def test_matches_list_of_names_with_commas | |
132 | @body = "/cc @defunkt, @atmos, @kneath" | |
133 | assert_equal %w[defunkt atmos kneath], mentioned_usernames | |
134 | end | |
135 | ||
136 | def test_matches_inside_brackets | |
137 | @body = "(@mislav) and [@rtomayko]" | |
138 | assert_equal %w[mislav rtomayko], mentioned_usernames | |
139 | end | |
140 | ||
141 | def test_doesnt_ignore_invalid_users | |
142 | @body = "@defunkt @mojombo and @somedude" | |
143 | assert_equal ['defunkt', 'mojombo', 'somedude'], mentioned_usernames | |
144 | end | |
145 | ||
146 | def test_returns_distinct_set | |
147 | @body = "/cc @defunkt, @atmos, @kneath, @defunkt, @defunkt" | |
148 | assert_equal %w[defunkt atmos kneath], mentioned_usernames | |
149 | end | |
150 | ||
151 | def test_does_not_match_inline_code_block_with_multiple_code_blocks | |
152 | @body = "something\n\n`/cc @defunkt @atmos @kneath` `/cc @atmos/atmos`" | |
153 | assert_equal %w[], mentioned_usernames | |
154 | end | |
155 | ||
156 | def test_mention_at_end_of_parenthetical_sentence | |
157 | @body = "(We're talking 'bout @ymendel.)" | |
158 | assert_equal %w[ymendel], mentioned_usernames | |
159 | end | |
160 | end |
0 | require "test_helper" | |
1 | ||
2 | class HTML::Pipeline::PlainTextInputFilterTest < Minitest::Test | |
3 | PlainTextInputFilter = HTML::Pipeline::PlainTextInputFilter | |
4 | ||
5 | def test_fails_when_given_a_documentfragment | |
6 | body = "<p>heyo</p>" | |
7 | doc = Nokogiri::HTML::DocumentFragment.parse(body) | |
8 | assert_raises(TypeError) { PlainTextInputFilter.call(doc, {}) } | |
9 | end | |
10 | ||
11 | def test_wraps_input_in_a_div_element | |
12 | doc = PlainTextInputFilter.call("howdy pahtner", {}) | |
13 | assert_equal "<div>howdy pahtner</div>", doc.to_s | |
14 | end | |
15 | ||
16 | def test_html_escapes_plain_text_input | |
17 | doc = PlainTextInputFilter.call("See: <http://example.org>", {}) | |
18 | assert_equal "<div>See: <http://example.org></div>", | |
19 | doc.to_s | |
20 | end | |
21 | end |
0 | require "test_helper" | |
1 | ||
2 | class HTML::Pipeline::SanitizationFilterTest < Minitest::Test | |
3 | SanitizationFilter = HTML::Pipeline::SanitizationFilter | |
4 | ||
5 | def test_removing_script_tags | |
6 | orig = %(<p><img src="http://github.com/img.png" /><script></script></p>) | |
7 | html = SanitizationFilter.call(orig).to_s | |
8 | refute_match /script/, html | |
9 | end | |
10 | ||
11 | def test_removing_style_tags | |
12 | orig = %(<p><style>hey now</style></p>) | |
13 | html = SanitizationFilter.call(orig).to_s | |
14 | refute_match /style/, html | |
15 | end | |
16 | ||
17 | def test_removing_style_attributes | |
18 | orig = %(<p style='font-size:1000%'>YO DAWG</p>) | |
19 | html = SanitizationFilter.call(orig).to_s | |
20 | refute_match /font-size/, html | |
21 | refute_match /style/, html | |
22 | end | |
23 | ||
24 | def test_removing_script_event_handler_attributes | |
25 | orig = %(<a onclick='javascript:alert(0)'>YO DAWG</a>) | |
26 | html = SanitizationFilter.call(orig).to_s | |
27 | refute_match /javscript/, html | |
28 | refute_match /onclick/, html | |
29 | end | |
30 | ||
31 | def test_sanitizes_li_elements_not_contained_in_ul_or_ol | |
32 | stuff = "a\n<li>b</li>\nc" | |
33 | html = SanitizationFilter.call(stuff).to_s | |
34 | assert_equal "a\nb\nc", html | |
35 | end | |
36 | ||
37 | def test_does_not_sanitize_li_elements_contained_in_ul_or_ol | |
38 | stuff = "a\n<ul><li>b</li></ul>\nc" | |
39 | assert_equal stuff, SanitizationFilter.call(stuff).to_s | |
40 | end | |
41 | ||
42 | def test_github_specific_protocols_are_not_removed | |
43 | stuff = '<a href="github-windows://spillthelog">Spill this yo</a> and so on' | |
44 | assert_equal stuff, SanitizationFilter.call(stuff).to_s | |
45 | end | |
46 | ||
47 | def test_unknown_schemes_are_removed | |
48 | stuff = '<a href="something-weird://heyyy">Wat</a> is this' | |
49 | html = SanitizationFilter.call(stuff).to_s | |
50 | assert_equal '<a>Wat</a> is this', html | |
51 | end | |
52 | ||
53 | def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes | |
54 | stuff = '<a href="http://www.example.com/">No href for you</a>' | |
55 | filter = SanitizationFilter.new(stuff, {:anchor_schemes => []}) | |
56 | html = filter.call.to_s | |
57 | assert_equal '<a>No href for you</a>', html | |
58 | end | |
59 | ||
60 | def test_custom_anchor_schemes_are_not_removed | |
61 | stuff = '<a href="something-weird://heyyy">Wat</a> is this' | |
62 | filter = SanitizationFilter.new(stuff, {:anchor_schemes => ['something-weird']}) | |
63 | html = filter.call.to_s | |
64 | assert_equal stuff, html | |
65 | end | |
66 | ||
67 | def test_anchor_schemes_are_merged_with_other_anchor_restrictions | |
68 | stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this' | |
69 | whitelist = { | |
70 | :elements => ['a'], | |
71 | :attributes => {'a' => ['href', 'ping']}, | |
72 | :protocols => {'a' => {'ping' => ['http']}} | |
73 | } | |
74 | filter = SanitizationFilter.new(stuff, {:whitelist => whitelist, :anchor_schemes => ['something-weird']}) | |
75 | html = filter.call.to_s | |
76 | assert_equal '<a href="something-weird://heyyy">Wat</a> is this', html | |
77 | end | |
78 | ||
79 | def test_uses_anchor_schemes_from_whitelist_when_not_separately_specified | |
80 | stuff = '<a href="something-weird://heyyy">Wat</a> is this' | |
81 | whitelist = { | |
82 | :elements => ['a'], | |
83 | :attributes => {'a' => ['href']}, | |
84 | :protocols => {'a' => {'href' => ['something-weird']}} | |
85 | } | |
86 | filter = SanitizationFilter.new(stuff, {:whitelist => whitelist}) | |
87 | html = filter.call.to_s | |
88 | assert_equal stuff, html | |
89 | end | |
90 | ||
91 | def test_whitelist_contains_default_anchor_schemes | |
92 | assert_equal SanitizationFilter::WHITELIST[:protocols]['a']['href'], ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'] | |
93 | end | |
94 | ||
95 | def test_whitelist_from_full_constant | |
96 | stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this' | |
97 | filter = SanitizationFilter.new(stuff, :whitelist => SanitizationFilter::FULL) | |
98 | html = filter.call.to_s | |
99 | assert_equal 'Wat is this', html | |
100 | end | |
101 | ||
102 | def test_exports_default_anchor_schemes | |
103 | assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'] | |
104 | end | |
105 | ||
106 | def test_script_contents_are_removed | |
107 | orig = '<script>JavaScript!</script>' | |
108 | assert_equal "", SanitizationFilter.call(orig).to_s | |
109 | end | |
110 | ||
111 | def test_table_rows_and_cells_removed_if_not_in_table | |
112 | orig = %(<tr><td>Foo</td></tr><td>Bar</td>) | |
113 | assert_equal 'FooBar', SanitizationFilter.call(orig).to_s | |
114 | end | |
115 | ||
116 | def test_table_sections_removed_if_not_in_table | |
117 | orig = %(<thead><tr><td>Foo</td></tr></thead>) | |
118 | assert_equal 'Foo', SanitizationFilter.call(orig).to_s | |
119 | end | |
120 | ||
121 | def test_table_sections_are_not_removed | |
122 | orig = %(<table> | |
123 | <thead><tr><th>Column 1</th></tr></thead> | |
124 | <tfoot><tr><td>Sum</td></tr></tfoot> | |
125 | <tbody><tr><td>1</td></tr></tbody> | |
126 | </table>) | |
127 | assert_equal orig, SanitizationFilter.call(orig).to_s | |
128 | end | |
129 | end |
0 | require "test_helper" | |
1 | ||
2 | SyntaxHighlightFilter = HTML::Pipeline::SyntaxHighlightFilter | |
3 | ||
4 | class HTML::Pipeline::SyntaxHighlightFilterTest < Minitest::Test | |
5 | def test_highlight_default | |
6 | filter = SyntaxHighlightFilter.new \ | |
7 | "<pre>hello</pre>", :highlight => "coffeescript" | |
8 | ||
9 | doc = filter.call | |
10 | assert !doc.css(".highlight-coffeescript").empty? | |
11 | end | |
12 | ||
13 | def test_highlight_default_will_not_override | |
14 | filter = SyntaxHighlightFilter.new \ | |
15 | "<pre lang='c'>hello</pre>", :highlight => "coffeescript" | |
16 | ||
17 | doc = filter.call | |
18 | assert doc.css(".highlight-coffeescript").empty? | |
19 | assert !doc.css(".highlight-c").empty? | |
20 | end | |
21 | end |
0 | # encoding: utf-8 | |
1 | require "test_helper" | |
2 | ||
3 | class HTML::Pipeline::TableOfContentsFilterTest < Minitest::Test | |
4 | TocFilter = HTML::Pipeline::TableOfContentsFilter | |
5 | ||
6 | TocPipeline = | |
7 | HTML::Pipeline.new [ | |
8 | HTML::Pipeline::TableOfContentsFilter | |
9 | ] | |
10 | ||
11 | def toc | |
12 | result = {} | |
13 | TocPipeline.call(@orig, {}, result) | |
14 | result[:toc] | |
15 | end | |
16 | ||
17 | def test_anchors_are_added_properly | |
18 | orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>) | |
19 | assert_includes TocFilter.call(orig).to_s, '<a id=' | |
20 | end | |
21 | ||
22 | def test_toc_list_added_properly | |
23 | @orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>) | |
24 | assert_includes toc, %Q{<ul class="section-nav">\n<li><a href="} | |
25 | end | |
26 | ||
27 | def test_anchors_have_sane_names | |
28 | orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>) | |
29 | result = TocFilter.call(orig).to_s | |
30 | ||
31 | assert_includes result, '"dr-dre"' | |
32 | assert_includes result, '"ice-cube"' | |
33 | assert_includes result, '"eazy-e"' | |
34 | assert_includes result, '"mc-ren"' | |
35 | end | |
36 | ||
37 | def test_anchors_have_aria_hidden | |
38 | orig = "<h1>Straight Outta Compton</h1>" | |
39 | result = TocFilter.call(orig).to_s | |
40 | assert_includes result, 'aria-hidden="true"' | |
41 | end | |
42 | ||
43 | def test_toc_hrefs_have_sane_values | |
44 | @orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>) | |
45 | assert_includes toc, '"#dr-dre"' | |
46 | assert_includes toc, '"#ice-cube"' | |
47 | assert_includes toc, '"#eazy-e"' | |
48 | assert_includes toc, '"#mc-ren"' | |
49 | end | |
50 | ||
51 | def test_dupe_headers_have_unique_trailing_identifiers | |
52 | orig = %(<h1>Straight Outta Compton</h1> | |
53 | <h2>Dopeman</h2> | |
54 | <h3>Express Yourself</h3> | |
55 | <h1>Dopeman</h1>) | |
56 | ||
57 | result = TocFilter.call(orig).to_s | |
58 | ||
59 | assert_includes result, '"dopeman"' | |
60 | assert_includes result, '"dopeman-1"' | |
61 | end | |
62 | ||
63 | def test_dupe_headers_have_unique_toc_anchors | |
64 | @orig = %(<h1>Straight Outta Compton</h1> | |
65 | <h2>Dopeman</h2> | |
66 | <h3>Express Yourself</h3> | |
67 | <h1>Dopeman</h1>) | |
68 | ||
69 | assert_includes toc, '"#dopeman"' | |
70 | assert_includes toc, '"#dopeman-1"' | |
71 | end | |
72 | ||
73 | def test_all_header_tags_are_found_when_adding_anchors | |
74 | orig = %(<h1>"Funky President" by James Brown</h1> | |
75 | <h2>"It's My Thing" by Marva Whitney</h2> | |
76 | <h3>"Boogie Back" by Roy Ayers</h3> | |
77 | <h4>"Feel Good" by Fancy</h4> | |
78 | <h5>"Funky Drummer" by James Brown</h5> | |
79 | <h6>"Ruthless Villain" by Eazy-E</h6> | |
80 | <h7>"Be Thankful for What You Got" by William DeVaughn</h7>) | |
81 | ||
82 | doc = TocFilter.call(orig) | |
83 | ||
84 | assert_equal 6, doc.search('a').size | |
85 | end | |
86 | ||
87 | def test_toc_is_complete | |
88 | @orig = %(<h1>"Funky President" by James Brown</h1> | |
89 | <h2>"It's My Thing" by Marva Whitney</h2> | |
90 | <h3>"Boogie Back" by Roy Ayers</h3> | |
91 | <h4>"Feel Good" by Fancy</h4> | |
92 | <h5>"Funky Drummer" by James Brown</h5> | |
93 | <h6>"Ruthless Villain" by Eazy-E</h6> | |
94 | <h7>"Be Thankful for What You Got" by William DeVaughn</h7>) | |
95 | ||
96 | expected = %Q{<ul class="section-nav">\n<li><a href="#funky-president-by-james-brown">"Funky President" by James Brown</a></li>\n<li><a href="#its-my-thing-by-marva-whitney">"It's My Thing" by Marva Whitney</a></li>\n<li><a href="#boogie-back-by-roy-ayers">"Boogie Back" by Roy Ayers</a></li>\n<li><a href="#feel-good-by-fancy">"Feel Good" by Fancy</a></li>\n<li><a href="#funky-drummer-by-james-brown">"Funky Drummer" by James Brown</a></li>\n<li><a href="#ruthless-villain-by-eazy-e">"Ruthless Villain" by Eazy-E</a></li>\n</ul>} | |
97 | ||
98 | assert_equal expected, toc | |
99 | end | |
100 | ||
101 | if RUBY_VERSION > "1.9" # not sure how to make this work on 1.8.7 | |
102 | ||
103 | def test_anchors_with_utf8_characters | |
104 | orig = %(<h1>日本語</h1> | |
105 | <h1>Русский</h1) | |
106 | ||
107 | rendered_h1s = TocFilter.call(orig).search('h1').map(&:to_s) | |
108 | ||
109 | assert_equal "<h1>\n<a id=\"日本語\" class=\"anchor\" href=\"#%E6%97%A5%E6%9C%AC%E8%AA%9E\" aria-hidden=\"true\"><span class=\"octicon octicon-link\"></span></a>日本語</h1>", | |
110 | rendered_h1s[0] | |
111 | assert_equal "<h1>\n<a id=\"Русский\" class=\"anchor\" href=\"#%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9\" aria-hidden=\"true\"><span class=\"octicon octicon-link\"></span></a>Русский</h1>", | |
112 | rendered_h1s[1] | |
113 | end | |
114 | ||
115 | def test_toc_with_utf8_characters | |
116 | @orig = %(<h1>日本語</h1> | |
117 | <h1>Русский</h1) | |
118 | ||
119 | rendered_toc = Nokogiri::HTML::DocumentFragment.parse(toc).to_s | |
120 | ||
121 | expected = %Q{<ul class="section-nav">\n<li><a href="#%E6%97%A5%E6%9C%AC%E8%AA%9E">日本語</a></li>\n<li><a href="#%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9">Русский</a></li>\n</ul>} | |
122 | ||
123 | assert_equal expected, rendered_toc | |
124 | end | |
125 | end | |
126 | end |
0 | require "test_helper" | |
1 | require "helpers/mocked_instrumentation_service" | |
2 | ||
3 | class HTML::PipelineTest < Minitest::Test | |
4 | Pipeline = HTML::Pipeline | |
5 | class TestFilter | |
6 | def self.call(input, context, result) | |
7 | input.reverse | |
8 | end | |
9 | end | |
10 | ||
11 | def setup | |
12 | @context = {} | |
13 | @result_class = Hash | |
14 | @pipeline = Pipeline.new [TestFilter], @context, @result_class | |
15 | end | |
16 | ||
17 | def test_filter_instrumentation | |
18 | service = MockedInstrumentationService.new | |
19 | events = service.subscribe "call_filter.html_pipeline" | |
20 | @pipeline.instrumentation_service = service | |
21 | filter(body = "hello") | |
22 | event, payload, res = events.pop | |
23 | assert event, "event expected" | |
24 | assert_equal "call_filter.html_pipeline", event | |
25 | assert_equal TestFilter.name, payload[:filter] | |
26 | assert_equal @pipeline.class.name, payload[:pipeline] | |
27 | assert_equal body.reverse, payload[:result][:output] | |
28 | end | |
29 | ||
30 | def test_pipeline_instrumentation | |
31 | service = MockedInstrumentationService.new | |
32 | events = service.subscribe "call_pipeline.html_pipeline" | |
33 | @pipeline.instrumentation_service = service | |
34 | filter(body = "hello") | |
35 | event, payload, res = events.pop | |
36 | assert event, "event expected" | |
37 | assert_equal "call_pipeline.html_pipeline", event | |
38 | assert_equal @pipeline.filters.map(&:name), payload[:filters] | |
39 | assert_equal @pipeline.class.name, payload[:pipeline] | |
40 | assert_equal body.reverse, payload[:result][:output] | |
41 | end | |
42 | ||
43 | def test_default_instrumentation_service | |
44 | service = 'default' | |
45 | Pipeline.default_instrumentation_service = service | |
46 | pipeline = Pipeline.new [], @context, @result_class | |
47 | assert_equal service, pipeline.instrumentation_service | |
48 | ensure | |
49 | Pipeline.default_instrumentation_service = nil | |
50 | end | |
51 | ||
52 | def test_setup_instrumentation | |
53 | assert_nil @pipeline.instrumentation_service | |
54 | ||
55 | service = MockedInstrumentationService.new | |
56 | events = service.subscribe "call_pipeline.html_pipeline" | |
57 | @pipeline.setup_instrumentation name = 'foo', service | |
58 | ||
59 | assert_equal service, @pipeline.instrumentation_service | |
60 | assert_equal name, @pipeline.instrumentation_name | |
61 | ||
62 | filter(body = 'foo') | |
63 | ||
64 | event, payload, res = events.pop | |
65 | assert event, "expected event" | |
66 | assert_equal name, payload[:pipeline] | |
67 | assert_equal body.reverse, payload[:result][:output] | |
68 | end | |
69 | ||
70 | def filter(input) | |
71 | @pipeline.call(input) | |
72 | end | |
73 | end |
0 | require 'bundler/setup' | |
1 | require 'html/pipeline' | |
2 | require 'minitest/autorun' | |
3 | ||
4 | require 'active_support/core_ext/string' | |
5 | ||
6 | module TestHelpers | |
7 | # Asserts that two html fragments are equivalent. Attribute order | |
8 | # will be ignored. | |
9 | def assert_equal_html(expected, actual) | |
10 | assert_equal Nokogiri::HTML::DocumentFragment.parse(expected).to_hash, | |
11 | Nokogiri::HTML::DocumentFragment.parse(actual).to_hash | |
12 | end | |
13 | end | |
14 | ||
15 | Minitest::Test.send(:include, TestHelpers) |