diff --git a/ARTISTIC b/ARTISTIC new file mode 100644 index 0000000..8f9bdef --- /dev/null +++ b/ARTISTIC @@ -0,0 +1,124 @@ +The "Artistic License" + + Preamble + + The intent of this document is to state the conditions under which a + Package may be copied, such that the Copyright Holder maintains some + semblance of artistic control over the development of the package, + while giving the users of the package the right to use and distribute + the Package in a more-or-less customary fashion, plus the right to + make reasonable modifications. + + Definitions + + "Package" refers to the collection of files distributed by the + Copyright Holder, and derivatives of that collection of files + created through textual modification. + + "Standard Version" refers to such a Package if it has not been + modified, or has been modified in accordance with the wishes of the + Copyright Holder as specified below. + + "Copyright Holder" is whoever is named in the copyright or + copyrights for the package. + + "You" is you, if you're thinking about copying or distributing this + Package. + + "Reasonable copying fee" is whatever you can justify on the basis + of media cost, duplication charges, time of people involved, and so + on. (You will not be required to justify it to the Copyright + Holder, but only to the computing community at large as a market + that must bear the fee.) + + "Freely Available" means that no fee is charged for the item + itself, though there may be fees involved in handling the item. It + also means that recipients of the item may redistribute it under + the same conditions they received it. + + 1. You may make and give away verbatim copies of the source form of + the Standard Version of this Package without restriction, provided + that you duplicate all of the original copyright notices and + associated disclaimers. + 2. You may apply bug fixes, portability fixes and other modifications + derived from the Public Domain or from the Copyright Holder. A + Package modified in such a way shall still be considered the + Standard Version. + 3. You may otherwise modify your copy of this Package in any way, + provided that you insert a prominent notice in each changed file + stating how and when you changed that file, and provided that you + do at least ONE of the following: + + a. place your modifications in the Public Domain or otherwise make + them Freely Available, such as by posting said modifications to + Usenet or an equivalent medium, or placing the modifications on a + major archive site such as uunet.uu.net, or by allowing the + Copyright Holder to include your modifications in the Standard + Version of the Package. + b. use the modified Package only within your corporation or + organization. + c. rename any non-standard executables so the names do not conflict + with standard executables, which must also be provided, and + provide a separate manual page for each non-standard executable + that clearly documents how it differs from the Standard Version. + d. make other distribution arrangements with the Copyright Holder. + + You may distribute the programs of this Package in object code or + executable form, provided that you do at least ONE of the following: + + a. distribute a Standard Version of the executables and library + files, together with instructions (in the manual page or + equivalent) on where to get the Standard Version. + b. accompany the distribution with the machine-readable source of the + Package with your modifications. + c. give non-standard executables non-standard names, and clearly + document the differences in manual pages (or equivalent), together + with instructions on where to get the Standard Version. + d. make other distribution arrangements with the Copyright Holder. + + You may charge a reasonable copying fee for any distribution of this + Package. You may charge any fee you choose for support of this + Package. You may not charge a fee for this Package itself. However, + you may distribute this Package in aggregate with other (possibly + commercial) programs as part of a larger (possibly commercial) + software distribution provided that you do not advertise this Package + as a product of your own. You may embed this Package's interpreter + within an executable of yours (by linking); this shall be construed as + a mere form of aggregation, provided that the complete Standard + Version of the interpreter is so embedded. + + The scripts and library files supplied as input to or produced as + output from the programs of this Package do not automatically fall + under the copyright of this Package, but belong to whomever generated + them, and may be sold commercially, and may be aggregated with this + Package. If such scripts or library files are aggregated with this + Package via the so-called "undump" or "unexec" methods of producing a + binary executable image, then distribution of such an image shall + neither be construed as a distribution of this Package nor shall it + fall under the restrictions of Paragraphs 3 and 4, provided that you + do not represent such an executable image as a Standard Version of + this Package. + + C subroutines (or comparably compiled subroutines in other + languages) supplied by you and linked into this Package in order to + emulate subroutines and variables of the language defined by this + Package shall not be considered part of this Package, but are the + equivalent of input as in Paragraph 6, provided these subroutines do + not change the language in any way that would cause it to fail the + regression tests for the language. + + Aggregation of this Package with a commercial distribution is always + permitted provided that the use of this Package is embedded; that is, + when no overt attempt is made to make this Package's interfaces + visible to the end user of the commercial distribution. Such use shall + not be construed as a distribution of this Package. + + The name of the Copyright Holder may not be used to endorse or + promote products derived from this software without specific prior + written permission. + + THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + + The End diff --git a/Build.PL b/Build.PL new file mode 100644 index 0000000..64a20ae --- /dev/null +++ b/Build.PL @@ -0,0 +1,58 @@ +#! perl + +use Module::Build; + +my $class = Module::Build->subclass( + class => 'Module::Build::FilterTests', + code => <<'END_HERE', + + use File::Glob; + use File::Spec::Functions; + + sub ACTION_disttest + { + my $self = shift; + local $ENV{PERL_RUN_ALL_TESTS} = 1; + $self->SUPER::ACTION_disttest (@_); + } + + sub find_test_files + { + my $self = shift; + my $tests = $self->SUPER::find_test_files (@_); + + return $tests unless $ENV{PERL_RUN_ALL_TESTS}; + + my $test_pattern = catfile (qw(t developer *.t)); + push @$tests, File::Glob::bsd_glob( $test_pattern ); + return $tests; + } +END_HERE +); + +my $build = $class->new( + license => 'perl', + module_name => 'Text::MediawikiFormat', + requires => + { + 'Scalar::Util' => '1.14', + 'URI' => '', + 'URI::Escape' => '', + 'version' => '0.74', + }, + recommends => + { + 'HTML::Parser' => '', + 'HTML::Tagset' => '', + }, + build_requires => + { + 'Test::More' => 0.30, + 'Test::NoWarnings' => 0, + 'Test::Warn' => 0, + }, + create_makefile_pl => 'traditional', + sign => '1', +); + +$build->create_build_script(); diff --git a/Changes b/Changes new file mode 100644 index 0000000..1f6edf0 --- /dev/null +++ b/Changes @@ -0,0 +1,203 @@ +Revision history for Text-MediawikiFormat + +1.0 June 19, 2008 + - Empty tags are handled like they should be. This should make it + easier to implement & (fixes + rt.cpan.org #25386). + - Disable HTML inside
 tags, as with  tags (fixes
+	  rt.cpan.org #25417)
+	- Angle backets (<>) around URIs are ignored as per Mediawiki 1.12's
+	  observed behavior.
+	- Use "vars" pragma instead of "our" for Perl 5.005.
+	- Check for undefined values in the extended_link_delimiters field and
+	  warn but ignore when found (fixes rt.cpan.org #26879).
+	- Use uri_escape_utf8 unless charset is set to something other than
+	  utf-8 in the options hash (fixes rt.cpan.org #26880).
+	- Documentation fixes.
+
+0.06    June 17, 2008
+	- Tests skip HTML processing when HTML::Parser and HTML::Tagset are
+	  not installed.
+	- format() actually processes the options hash.
+	- Change _clone to Return arrays and a deep copy of hashes. 
+	  Rather than a copy of arrays.
+
+0.05    September 28, 2006
+	- Remove the <> when linkifying .
+
+0.04	September 27, 2006
+	- Process absolute links more robustly.
+
+0.03	September 27, 2006
+	- Default to absolute_links => 1.
+	- Prefer "our" to "use vars".
+
+0.02	September 26, 2006
+	- Improved documentation.
+	- Defaults to Mediawiki behaviors.
+
+0.01	September 20, 2006
+	- Avoid applying wikification when block level allowed html elements
+	  are present, when process_html option is set.
+	- content of list items is now formatted by default.
+	- (non)formatting of blocks is now configurable.
+	- additional tests added, re RT 4700.
+	- the $tags{link} func is now passed the tags hash, re RT 21393.
+	- extended_link_delimiters may now be specified as a regex, re RT 21330.
+	- merge_hash() now copies hashes.
+	- several nesting bugs fixed, re RT 21269.
+	- code refs now preserve indenting after the "  ", not before.
+
+
+
+###
+### as Text::WikiFormat
+###
+
+0.78
+	Thu Mar 30 06:13:42 UTC 2006
+	- fixed merge_blocks() bug reported by Richard Harman
+	- moved developer tests to t/developer and skipped them for everyone else
+	- updated copyright notices
+
+0.77
+	Sat Oct 29 02:54:02 UTC 2005 (Rev: 9337, Author: chromatic)
+	- removed MANIFEST.SKIP (hopefully fixing Test::Signature errors)
+
+0.76
+	Mon Jul 25 05:58:24 UTC 2005 (Rev: 6518, Author: chromatic)
+	- bumped up version number
+	- added documentation to Text::WikiFormat::Blocks
+	- checked in t/pod.t
+
+	Thu Jul 14 06:45:57 UTC 2005 (Rev: 6309, Author: chromatic)
+	- fixed CAMELCASE linking bug reported by punkish on Perl Monks
+	- fixed inadvertent $title reuse in find_link_title()
+
+	Wed Jul 13 03:16:14 UTC 2005 (Rev: 6249, Author: chromatic)
+	- fixed a bug in list detection (find paragraphs correctly)
+	- removed unnecessary .t files in top-level directory (not distributed)
+	- removed unused functions:
+		- get_block()
+		- get_indentation()
+		- find_list()
+		- end_list()
+		- end_all_lists()
+	- added POD and POD coverage tests
+	- added stub documentation to Text::WikiFormat::Blocks
+	- modified tests to work better with Devel::Cover
+
+0.75
+	Sat Jul  2 19:48:00 UTC 2005 (Rev: 5727, Author: chromatic)
+	- support absolute_links (Alex Vandiver and Best Practical)
+	- edited documentation to use better English
+	- added tests for a few undertested functions (t/subs.t)
+	- signed distribution
+	- added passthrough Makefile
+	- bumped up copyright year
+
+	Sun Apr 17 05:10:19 UTC 2005 (Rev: 5727, Author: chromatic)
+	- added t/embedded-links.t from Art Henry
+	- allowed nested embedded links (MediaWiki support)
+
+	Sun Nov 28 23:58:18 UTC 2004 (Rev: 16, Author: chromatic)
+	- re-set properties on Changes
+	- bumped up version number (also stringify it)
+	- added Text::WikiFormat::Blocks to MANIFEST (oops)
+
+	Sun Nov 28 23:38:20 UTC 2004 (Rev: 15, chromatic)
+	- added more nesting tests (Teun van Eijsden)
+	- fix multiple levels of nesting (Teun van Eijsden)
+
+	Sun Nov 28 08:39:37 UTC 2004 (Rev: 14, chromatic)
+	- moved blocks into their own classes
+	- added Text::WikiFormat::Blocks
+	- fixed up failing tests from that fallout
+	- simplified lots of Text::WikiFormat internals
+
+	Sat Nov 27 16:44:38 UTC 2004
+	- mark build_requires in Build.PL
+	- use Scalar::Util::reftype() for checking ref
+	- added missing t/links.t test (don't know where it came from)
+
+0.72 Sun May  9 00:20:26 UTC 2004
+	- further code block nested indentation fixes (reported by Chris Winters)
+
+	Sat May  8 06:44:54 UTC 2004
+	- improve code block handling (reported by Chris Winters)
+
+	Sat Apr 24 18:57:17 UTC 2004
+	- don't add prefix to absolute links when expecting them (Paul Schmidt and
+	  Chris Winters)
+
+0.71 Tue Aug  5 00:49:09 GMT 2003
+	- fixed a bug in default paragraph regex (Sam Vilain and Kake)
+
+0.70 Thu Jul 31 04:56:51 GMT 2003
+	- apply slightly modified patch from Andy H. for absolute extended links
+
+Wed Jul 30 01:23:01 GMT 2003
+	- process header block contents (Kake)
+	- fix an unintentional static variable bug in find_link_title()
+	- fix a hash order bug in base.t (found by Kake)
+	
+Tue Jul 29 04:11:05 GMT 2003
+	- fix Kake's last nesting problem
+	- allow overriding strong and emphasized tag regexes
+
+Fri May 16 07:29:15 GMT 2003
+	- allow nested lists
+	- major cleanup (see first point)
+	- add indent tag
+	- remove indent from list regexes
+
+0.60 Sat Mar  1 18:54:06 GMT 2003
+	- ported to use Module::Build
+
+      Wed Feb 19 22:25:56 GMT 2003
+	- added t/merge-hash.t (Kake)
+	- make sure nested hashes merge even if not in destination (Kake)
+	- protect against overwriting values of 0 (almost wrote a bug!)
+	- fix a doc typo (Kake)
+	- allow alternate extended link delimiters (Kake)
+	- addede explicit.t (Kake)
+	- added t/tag-override-use-as.t (Kake)
+
+0.50 Thu Dec 26 23:07:07 GMT 2002
+	- added t/tag-override.t tests for tag overriding (Kake)
+	- minor refactorings to improve design
+	- use hash merging to fix HoH overriding (Kake, again)
+	- added t/lists-no-indent.t to test that non-indented lists work (kake)
+	- added patch from Kake to allow non-indented lists
+
+0.45 Fri Oct 18 01:14:53 UTC 2002
+	- signed distribution with Module::Signature (thanks, Autrijus!)
+
+     Thu Sep 12 18:11:10 UTC 2002
+	- end lists followed by empty paragraphs (RT #1455b, )
+	- add 'implicit_links' flag (suggestion from Kate (kake))
+	- fixed an import bug (also from kake, who provided tests!)
+	- encode links property (yet another kake idea!)
+
+	 Sat Aug 24 23:35:44 UTC 2002
+	- don't end a list if no list is active (RT #1455, )
+
+0.40 Tue Jun 11 05:23:44 UTC 2002
+	- added _available_lists(), 'linkorder', and t/lists.t
+	- made format() respect 'linkorder'
+	- made end_list() return blank code for empty list
+	(all suggested by Tom Hukins, see CPAN RT #671)
+
+	- avoid 'Subroutine redefined' warning with 5.8 in t/Wiki.t
+
+0.30 Thu May  2 20:42:14 PDT 2002
+	- added import() and its tests, suggested by Tony Bowden 
+
+0.20 Lost in the Mists of Time
+	- initial CPAN release
+
+0.10 Before Beer Was Invented
+	- distributed with SlashWiki
+
+0.01 Pre-History
+	- part of the Jellybean project
diff --git a/GPL b/GPL
new file mode 100644
index 0000000..c7aea18
--- /dev/null
+++ b/GPL
@@ -0,0 +1,280 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                          675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..5507fe4
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,29 @@
+ARTISTIC
+Build.PL
+Changes
+GPL
+MANIFEST
+Makefile.PL
+META.yml
+README
+lib/Text/MediawikiFormat.pm
+lib/Text/MediawikiFormat/Blocks.pm
+t/Wiki.t
+t/absolute_links.t
+t/base.t
+t/bugs.t
+t/developer/0-signature.t
+t/developer/pod.t
+t/developer/pod-coverage.t
+t/embedded-links.t
+t/explicit.t
+t/implicit.t
+t/kake.t
+t/lists.t
+t/lists-nested.t
+t/lists-no-indent.t
+t/merge-hash.t
+t/tag-override-use-as.t
+t/tag-override.t
+SIGNATURE    Added here by Module::Build
+SIGNATURE    Added here by Module::Build
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..f84cfb4
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,32 @@
+---
+name: Text-MediawikiFormat
+version: v1.0
+author: []
+abstract: Translate Wiki markup into other text formats
+license: perl
+resources:
+  license: http://dev.perl.org/licenses/
+requires:
+  Scalar::Util: 1.14
+  URI: ''
+  URI::Escape: ''
+  version: 0.74
+build_requires:
+  Test::More: 0.3
+  Test::NoWarnings: 0
+  Test::Warn: 0
+recommends:
+  HTML::Parser: ''
+  HTML::Tagset: ''
+provides:
+  Text::MediawikiFormat:
+    file: lib/Text/MediawikiFormat.pm
+    version: v1.0
+  Text::MediawikiFormat::Block:
+    file: lib/Text/MediawikiFormat/Blocks.pm
+  Text::MediawikiFormat::Blocks:
+    file: lib/Text/MediawikiFormat/Blocks.pm
+generated_by: Module::Build version 0.2808
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.2.html
+  version: 1.2
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..6c27ccb
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,20 @@
+# Note: this file was auto-generated by Module::Build::Compat version 0.03
+use ExtUtils::MakeMaker;
+WriteMakefile
+(
+          'NAME' => 'Text::MediawikiFormat',
+          'VERSION_FROM' => 'lib/Text/MediawikiFormat.pm',
+          'PREREQ_PM' => {
+                           'Scalar::Util' => '1.14',
+                           'Test::More' => '0.3',
+                           'Test::NoWarnings' => '0',
+                           'Test::Warn' => '0',
+                           'URI' => '',
+                           'URI::Escape' => '',
+                           'version' => '0.74'
+                         },
+          'INSTALLDIRS' => 'site',
+          'EXE_FILES' => [],
+          'PL_FILES' => {}
+        )
+;
diff --git a/README b/README
new file mode 100644
index 0000000..64d48db
--- /dev/null
+++ b/README
@@ -0,0 +1,7 @@
+Text::WikiFormat converts text in a simple Wiki markup language to whatever
+your little heart desires, provided you can describe it accurately in a
+semi-regular tag language.
+
+This program is Free Software, provided without warranty or implied
+merchantability, but available under the same terms as Perl itself.  What a
+deal!  It's copyrighted and copylefted 2002 - 2006, chromatic.
diff --git a/SIGNATURE b/SIGNATURE
new file mode 100644
index 0000000..e2c2b8b
--- /dev/null
+++ b/SIGNATURE
@@ -0,0 +1,50 @@
+This file contains message digests of all files listed in MANIFEST,
+signed via the Module::Signature module, version 0.55.
+
+To verify the content in this distribution, first make sure you have
+Module::Signature installed, then type:
+
+    % cpansign -v
+
+It will check each file's integrity, as well as the signature's
+validity.  If "==> Signature verified OK! <==" is not displayed,
+the distribution may already have been compromised, and you should
+not run its Makefile.PL or Build.PL.
+
+-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA1
+
+SHA1 de99730c9cff5401331cc9b10da8fffc2607119e ARTISTIC
+SHA1 22316eae2efc4afadca11b79369f9e173d6039b1 Build.PL
+SHA1 205fdf6b110d7a4ab9935d5c0ad4bfcff2294e3a Changes
+SHA1 2d29c273fda30310211bbf6a24127d589be09b6c GPL
+SHA1 eccb0808083e42742ab218aade252010ec49a567 MANIFEST
+SHA1 32787552984162e780b38633dd67f4809fb5e992 META.yml
+SHA1 c37ec8e62f2d6b0fffe4b4a73c2bdf3c3f2def3b Makefile.PL
+SHA1 32770eb383f51fec27a092d2c39f0b1c302df6e6 README
+SHA1 ac1b2db56ba408051f88f7515b5d041400547b0f lib/Text/MediawikiFormat.pm
+SHA1 fd66bd52dab924fbdf8185b56b4b0f835cba8b44 lib/Text/MediawikiFormat/Blocks.pm
+SHA1 645310aa31699333b7d7bfaa9da48e7a8fdbb8f7 t/Wiki.t
+SHA1 b617b7515b2c9cc7a194693af3f002b7665c943d t/absolute_links.t
+SHA1 d6b24c5b497740c653882d6f3a691b7e51ea8f02 t/base.t
+SHA1 d7db75f52a1631a3f78ceb00ece490f1f0d6c6b1 t/bugs.t
+SHA1 e7fbd29bd994639e82a480ca7668208c84faf780 t/developer/0-signature.t
+SHA1 9f8e6742d15fc02f70fea8c7883e534b5ab0027e t/developer/pod-coverage.t
+SHA1 0190346d7072d458c8a10a45c19f86db641dcc48 t/developer/pod.t
+SHA1 c09a0d0ba5a0b9c152a1f93315b9c49fcfbd865c t/embedded-links.t
+SHA1 8f75e7cd212a52a42eea81ed150fe5450c70e87a t/explicit.t
+SHA1 bbc7124baf9098da6c2dcf89f2a3284683413b6a t/implicit.t
+SHA1 6b7053b05703eb121a6b45452dca7372a5ae4d7b t/kake.t
+SHA1 784b48b387dd561d52de82cd2b94af222c61d26d t/lists-nested.t
+SHA1 dcacb77dfbcb5a2036f5aba0aa5aabf0a1082098 t/lists-no-indent.t
+SHA1 29d22f52586b606688d50640d15266439d74eb93 t/lists.t
+SHA1 e8317c38218cf0420d4cc4c2fb66cb50c3caed99 t/merge-hash.t
+SHA1 1a5a502110014d7694d78dad6fdd8d182bd16eb9 t/tag-override-use-as.t
+SHA1 e8161481a91f596eaa2bf6852cadb4ac0b9379df t/tag-override.t
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.2.2 (GNU/Linux)
+
+iD8DBQFIWr9dLD1OTBfyMaQRAhnjAJ91HRbRtBZOsbp7PCzqw+NYnxaRKwCffQAA
+ujduk8fkZPX40g58mtBNZlY=
+=gfGy
+-----END PGP SIGNATURE-----
diff --git a/lib/Text/MediawikiFormat/Blocks.pm b/lib/Text/MediawikiFormat/Blocks.pm
new file mode 100644
index 0000000..fcaecbc
--- /dev/null
+++ b/lib/Text/MediawikiFormat/Blocks.pm
@@ -0,0 +1,247 @@
+package Text::MediawikiFormat::Blocks;
+
+use strict;
+use warnings::register;
+
+sub import
+{
+	my $caller = caller();
+	no strict 'refs';
+	*{ $caller . '::new_block' } = sub
+	{
+		my $type  = shift;
+		my $class = "Text::MediawikiFormat::Block::$type";
+		
+		*{ $class . '::ISA' } = [ 'Text::MediawikiFormat::Block' ]
+			unless $class->can( 'new' );
+
+		return $class->new( type => $type, @_ );
+	};
+}
+
+package Text::MediawikiFormat::Block;
+
+use Scalar::Util qw( blessed reftype );
+
+sub new
+{
+	my ($class, %args) = @_;
+
+	$args{text} = $class->arg_to_ref (delete $args{text} || '');
+	$args{args} = [$class->arg_to_ref (delete $args{args} || [])];
+
+	bless \%args, $class;
+}
+
+sub arg_to_ref
+{
+	my ($class, $value) = @_;
+	return   $value if ( reftype( $value ) || '' ) eq 'ARRAY';
+	return [ $value ];
+}
+
+sub shift_args
+{
+	my $self = shift;
+	my $args = shift @{ $self->{args} };
+	return wantarray ? @$args : $args;
+}
+
+sub all_args
+{ 
+	my $args = $_[0]{args};
+	return wantarray ? @$args : $args;
+}
+
+sub text
+{
+	my $text = $_[0]{text};
+	return wantarray ? @$text : $text;
+}
+
+sub add_text
+{
+	my $self = shift;
+	push @{ $self->{text} }, @_;
+}
+
+sub formatted_text
+{
+	my $self = shift;
+	return map
+	{
+		blessed( $_ ) ? $_ : $self->formatter( $_ )
+	} $self->text();
+}
+
+sub formatter
+{
+	my ($self, $line) = @_;
+	Text::MediawikiFormat::format_line ($line, $self->tags(),
+					    $self->opts());
+}
+
+sub add_args
+{
+	my $self = shift;
+	push @{ $self->{args} }, @_;
+}
+
+{
+	no strict 'refs';
+	for my $attribute (qw( level opts tags type ))
+	{
+		*{ $attribute } = sub { $_[0]{$attribute} };
+	}
+}
+
+sub merge
+{
+	my ($self, $next_block) = @_;
+
+	return $next_block unless $self->type()  eq $next_block->type();
+	return $next_block unless $self->level() == $next_block->level();
+
+	$self->add_text( $next_block->text() );
+	$self->add_args( $next_block->all_args() );
+	return;
+}
+
+sub nests
+{
+	my ($self, $maynest) = @_;
+	my $tags = $self->{tags};
+
+	return exists $tags->{nests}{$self->type()}
+	       && exists $tags->{nests}{$maynest->type()}
+	       && $self->level() < $maynest->level()
+	       #  tags nest anywhere, regardless of level and parent
+	       || exists $tags->{nests_anywhere}{$maynest->type()};
+}
+
+sub nest
+{
+	my ($self, $next_block) = @_;
+
+	return unless $next_block = $self->merge ($next_block);
+	return $next_block unless $self->nests ($next_block);
+
+	# if there's a nested block at the end, maybe it can nest too
+	my $last_item = ( $self->text() )[-1];
+	return $last_item->nest( $next_block ) if blessed( $last_item );
+
+	$self->add_text( $next_block );
+	return;
+}
+
+1;
+__END__
+=head1 NAME
+
+Text::MediawikiFormat::Blocks - blocktypes for Text::MediawikiFormat
+
+=head1 SYNOPSIS
+
+None.  Use L as the public interface, unless you want to
+create your own block type.
+
+=head1 DESCRIPTION
+
+This module merely creates subclasses of Text::MediawikiFormat::Block, which is
+the interesting code.  A block is a collection of related lines, such as a code
+block (text to display verbatim in a monospaced font), a header, an unordered
+list, an ordered list, and a paragraph (text to display in a proportional
+font).
+
+Every block extends C.
+
+=head1 METHODS
+
+The following methods exist:
+
+=over 4
+
+=item * C
+
+Creates and returns a new block.  The valid arguments are:
+
+=over 4
+
+=item * C
+
+The text of the line found in the block.
+
+=item * C
+
+The arguments captured by the block-identifying regular expression.
+
+=item * C
+
+The level of indentation for the block (usually only useful for list blocks).
+
+=item * C
+
+The tags in effect for the current type of wiki formatting.
+
+=item * C
+
+The options in effect for the current type of wiki formatting.
+
+=back
+
+Use the accessors of the same names to retrieve the values of the attributes.
+
+=item * C
+
+Adds a list of lines of text to the current text for the block.  This is very
+useful when you encounter a block and want to merge it with the previous block
+of the same type
+
+=item * C
+
+Adds further arguments to the block; useful when merging blocks.
+
+=item * C
+
+Returns text formatted appropriately for this block.  Blocks don't have to have
+formatters, but they may.
+
+=item * C
+
+Formats the C<$line> using C.  You can add
+your own formatter here; this is worth overriding.
+
+=item * C
+
+Merges the current block with C<$next_block> (the next block encountered) if
+they're of the same type and are at the same level.  This adds the text and
+args of C<$next_block> to the current block.  It's your responsibility to
+remove C<$next_block> from whatever your code iterates over.
+
+=item * C
+
+Returns true if this block should nest (as in lists and unordered lists) for
+the active wiki formatting.
+
+=item * C
+
+Nests C<$next_block> under this block if the both nest and if C<$next_block>
+has a level greater than the current block.  This actually adds C<$next_block>
+as a text item within the current block.  Beware.
+
+=back
+
+=head1 AUTHOR
+
+chromatic, C<< chromatic at wgz dot org >>
+
+=head1 BUGS
+
+No known bugs.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2006, chromatic.  Some rights reserved.
+
+This module is free software; you can use, redistribute, and modify it under
+the same terms as Perl 5.8.x.
diff --git a/lib/Text/MediawikiFormat.pm b/lib/Text/MediawikiFormat.pm
new file mode 100644
index 0000000..0a77763
--- /dev/null
+++ b/lib/Text/MediawikiFormat.pm
@@ -0,0 +1,1438 @@
+package Text::MediawikiFormat;
+
+use strict;
+use warnings::register;
+
+=head1 NAME
+
+Text::MediawikiFormat - Translate Wiki markup into other text formats
+
+=head1 VERSION
+
+Version 1.0
+
+=cut
+
+use vars qw($VERSION);
+use version; $VERSION = qv('1.0');
+
+=head1 SYNOPSIS
+
+	use Text::MediawikiFormat 'wikiformat';
+	my $html = wikiformat ($raw);
+	my $text = wikiformat ($raw, {}, {implicit_links => 1});
+
+=head1 DESCRIPTION
+
+L and its sister projects use the PHP Mediawiki to format
+their pages.  This module attempts to duplicate the Mediawiki formatting rules.
+Those formatting rules can be simple and easy to use, while providing more
+advanced options for the power user.  They are also easy to translate into
+other, more complicated markup languages with this module.  It creates HTML by
+default, but could produce valid POD, DocBook, XML, or any other format
+imaginable.
+
+The most important function is C.  It is
+not exported by default, but will be exported as C if any
+options at all are passed to the exporter, unless the name is overridden
+explicitly.  See L<"EXPORT"> for more information.
+
+It should be noted that this module is written as a drop in replacement for
+L that expands on that modules functionality and provides
+a default rule set that may be used to format text like the PHP Mediawiki.  It
+is also well to note early that if you just want a Mediawiki clone (you don't
+need to customize it heavily and you want integration with a back end
+database), you should look at L.
+
+=cut
+
+use Carp qw(carp confess croak);
+use CGI qw(:standard);
+use Scalar::Util qw(blessed);
+use Text::MediawikiFormat::Blocks;
+use URI;
+use URI::Escape qw(uri_escape uri_escape_utf8);
+
+use vars qw($missing_html_packages %tags %opts %merge_matrix
+	    $uric $uricCheat $uriCruft);
+
+BEGIN
+{
+    # Try to load optional HTML packages, recording any errors.
+    eval {require HTML::Parser};
+    $missing_html_packages = $@;
+    eval {require HTML::Tagset};
+    $missing_html_packages .= $@;
+}
+
+
+
+###
+### Defaults
+###
+%tags =
+(
+    indent		=> qr/^(?:[:*#;]*)(?=[:*#;])/,
+    link		=> \&_make_html_link,
+    strong		=> sub {"$_[0]"},
+    emphasized		=> sub {"$_[0]"},
+    strong_tag		=> qr/'''(.+?)'''/,
+    emphasized_tag	=> qr/''(.+?)''/,
+
+    code		=> ['
', "
\n", '', "\n"], + line => ['', '', '
', "\n"], + paragraph => ["

", "

\n", '', "\n", 1], + paragraph_break => ['', '', '', "\n"], + unordered => ["
    \n", "
\n", '
  • ', "
  • \n"], + ordered => ["
      \n", "
    \n", '
  • ', "
  • \n"], + definition => ["
    \n", "
    \n", \&_dl], + header => ['', "\n", \&_make_header], + + blocks => + { + code => qr/^ /, + header => qr/^(=+)\s*(.+?)\s*\1$/, + line => qr/^-{4,}$/, + ordered => qr/^#\s*/, + unordered => qr/^\*\s*/, + definition => qr/^([;:])\s*/, + paragraph => qr/^/, + paragraph_break => qr/^\s*$/, + }, + + indented => {map {$_ => 1} qw(ordered unordered definition)}, + nests => {map {$_ => 1} qw(ordered unordered definition)}, + nests_anywhere => {map {$_ => 1} qw(nowiki)}, + + blockorder => [qw(code header line ordered unordered definition + paragraph_break paragraph)], + implicit_link_delimiters + => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!, + extended_link_delimiters + => qr!\[(?:\[[^][]*\]|[^][]*)\]!, + + schemas => [qw(http https ftp mailto gopher)], + + unformatted_blocks => [qw(header nowiki pre)], + + allowed_tags => [#HTML + qw(b big blockquote br caption center cite code dd + div dl dt em font h1 h2 h3 h4 h5 h6 hr i li ol p + pre rb rp rt ruby s samp small strike strong sub + sup table td th tr tt u ul var), + # Mediawiki Specific + qw(nowiki),], + allowed_attrs => [qw(title align lang dir width height bgcolor), + qw(clear), # BR + qw(noshade), # HR + qw(cite), # BLOCKQUOTE, Q + qw(size face color), # FONT + # For various lists, mostly deprecated but safe + qw(type start value compact), + # Tables + qw(summary width border frame rules cellspacing + cellpadding valign char charoff colgroup col + span abbr axis headers scope rowspan colspan), + qw(id class name style), # For CSS + ], + + _toc => [], +); + +%opts = +( + extended => 1, + implicit_links => 0, + absolute_links => 1, + prefix => '', + process_html => 1, + charset => 'utf-8', +); + +# Make sure import's argument hash contains an `as' entry. `as' defaults to +# `wikiformat' when none is given. +sub _process_args +{ + shift; # Class + return as => shift if @_ == 1; + return as => 'wikiformat', @_; +} + +# Delete the options (prefix, extended, implicit_links, ...) from a hash, +# returning a new hash with the deleted options. +sub _extract_opts +{ + my %newopts; + + for my $key (qw{prefix extended implicit_links absolute_links + process_html debug}) + { + if (defined (my $val = delete $_[0]->{$key})) + { + $newopts{$key} = $val; + } + } + + return \%newopts; +} + +# Shamelessly ripped from Hash::Merge, which doesn't work in a threaded +# environment with two threads trying to use different merge matrices. +%merge_matrix = +( + SCALAR => + { + SCALAR => sub {return $_[0]}, + ARRAY => sub {# Need to be able to replace scalar with array + # for extended_link_delimiters (could be array + # or regex). + return $_[0];}, + HASH => sub {confess "Attempt to replace hash with scalar" + if defined $_[0]; + return _clone ($_[1]);} + }, + + ARRAY => + { + SCALAR => sub {# Need to be able to replace array with scalar + # for extended_link_delimiters (could be array + # or regex). + return _clone ($_[0]);}, + ARRAY => sub {return _clone ($_[0]);}, + HASH => sub {confess "Attempt to replace hash with array"} + }, + + HASH => + { + SCALAR => sub {confess "Attempt to replace scalar with hash"}, + ARRAY => sub {confess "Attempt to replace array with hash"}, + HASH => sub {_merge_hash_elements ($_[0], $_[1])} + } +); +# Return arrays and a deep copy of hashes. +sub _clone +{ + my ($obj) = @_; + my $type; + if (!defined $obj) { # Perl 5.005 compatibility + $type = 'SCALAR'; + } elsif (ref $obj eq 'HASH') { + $type = 'HASH'; + } elsif (ref $obj eq 'ARRAY') { + $type = 'ARRAY'; + } else { + $type = 'SCALAR'; + } + + return $obj if $type eq 'SCALAR'; + return $obj if $type eq 'ARRAY'; + + my %copy; + foreach my $key (keys %$obj) + { + $copy{$key} = _clone ($obj->{$key}); + } + return \%copy; +} +# This does a straight merge of hashes, delegating the merge-specific +# work to '_merge_hashes'. +sub _merge_hash_elements +{ + my ($left, $right) = @_; + die "Arguments for _merge_hash_elements must be hash references" unless + UNIVERSAL::isa ($left, 'HASH') && UNIVERSAL::isa ($right, 'HASH'); + + my %newhash; + foreach my $leftkey (keys %$left) + { + if (exists $right->{$leftkey}) + { + $newhash{$leftkey} = + _merge_hashes ($left->{$leftkey}, $right->{$leftkey}); + } + else + { + $newhash{$leftkey} = _clone ($left->{$leftkey}); + } + } + foreach my $rightkey (keys %$right) + { + $newhash{$rightkey} = _clone ($right->{$rightkey}) + if !exists $left->{$rightkey}; + } + return \%newhash; +} +sub _merge_hashes +{ + my ($left, $right) = @_; + + # if one argument or the other is undefined or empty, don't worry about + # copying, just return the original. + return $right unless defined $left; + return $left unless defined $right; + + # For the general use of this function, we want to create duplicates + # of all data that is merged. + + my ($lefttype, $righttype); + if (ref $left eq 'HASH') { + $lefttype = 'HASH'; + } elsif (ref $left eq 'ARRAY') { + $lefttype = 'ARRAY'; + } else { + $lefttype = 'SCALAR'; + } + + if (ref $right eq 'HASH') { + $righttype = 'HASH'; + } elsif (ref $right eq 'ARRAY') { + $righttype = 'ARRAY'; + } else { + $righttype = 'SCALAR'; + } + + return $merge_matrix{$lefttype}->{$righttype} ($left, $right); +} + +sub _require_html_packages +{ + croak "$missing_html_packages\n" + . "HTML::Parser & HTML::Tagset is required for process_html\n" + if $missing_html_packages; +} + +sub import +{ + return unless @_ > 1; + + my $class = shift; + my %args = $class->_process_args (@_); + my $name = delete $args{as}; + + my $caller = caller(); + my $iopts = _merge_hashes _extract_opts (\%args), \%opts; + my $itags = _merge_hashes \%args, \%tags; + + _require_html_packages + if $iopts->{process_html}; + + # Could verify ITAGS here via _check_blocks, but what if a user + # wants to add a block to block_order that they intend to override + # the implementation of with every call to format()? + + no strict 'refs'; + *{ $caller . "::" . $name } = sub + { + Text::MediawikiFormat::_format ($itags, $iopts, @_); + } +} + + + +=head1 FUNCTIONS + +=head2 format + +C takes one required argument, the text to convert, and returns the +converted text. It allows two optional arguments. The first is a reference to +a hash of tags used to override the function's default behavior. Anything +passed in here will override the default tags. The second argument is a hash +reference of options. The options are currently: + +=over 4 + +=item prefix + +The prefix of any links to wiki pages. In HTML mode, this is the path to the +Wiki. The actual linked item itself will be appended to the prefix. This is +useful to create full URIs: + + {prefix => 'http://example.com/wiki.pl?page='} + +=item extended + +A boolean flag, true by default, to let square brackets mark links. +An optional title may occur after the Wiki targets, preceded by an open pipe. +URI titles are separated from their title with a space. These are valid +extended links: + + [[A wiki page|and the title to display]] + [http://ximbiot.com URI title] + +Where the linking semantics of the destination format allow it, the result will +display the title instead of the URI. In HTML terms, the title is the content +of an C element (not the content of its C attribute). + +You can use delimiters other than single square brackets for marking extended +links by passing a value for C in the C<%tags> hash +when calling C. + +Note that if you disable this flag, you should probably enable +C or there will be no automated way to link to other pages in +your wiki. + +=item implicit_links + +A boolean flag, false by default, to create links from StudlyCapsStrings. + +=item absolute_links + +A boolean flag, true by default, which treats any links that are absolute URIs +(such as C) specially. Any prefix will not apply. +This should maybe be called implicit_absolute_links since the C +option enables absolute links inside square brackets by default. + +A link is any text that starts with a known schema followed by a colon and one +or more non-whitespace characters. This is a distinct subset of what L +recognizes as a URI, but is a good first-order approximation. If you need to +recognize more complex URIs, use the standard wiki formatting explained +earlier. + +The recognized schemas are those defined in the C value in the C<%tags> +hash. C defaults to C, C, C, C, and +C. + +=item process_html + +This flag, true by default, causes the formatter to ignore block level wiki +markup (code, ordered, unordered, etc...) when they occur on lines which also +contain allowed block-level HTML tags (
    , 
      ,
        ,
    , etc...). +Phrase level wiki markup (emphasis, strong, & links) is unaffected by this +flag. + +=back + +=cut + +sub format +{ + _format (\%tags, \%opts, @_); +} + +# Turn the contents after a ; or : into a dictionary list. +# Using : without ; just looks like an indent. +sub _dl +{ + #my ($line, $indent, $lead) = @_; + my ($term, $def); + + if ($_[2] eq ';') + { + if ($_[0] =~ /^(.*?)\s+:\s+(.*)$/) + { + $term = $1; + $def = $2; + } + else + { + $term = $_[0]; + } + } + else + { + $def = $_[0]; + } + + my @retval; + push @retval, "
    ", $term, "
    \n" if defined $term; + push @retval, "
    ", $def, "
    \n" if defined $def; + return @retval; +} + +# Makes a regex out of the allowed schema array. +sub _make_schema_regex +{ + my $re = join "|", map {qr/\Q$_\E/} @_; + return qr/(?:$re)/; +} + +$uric = $URI::uric; +$uricCheat = $uric; + +# We need to avoid picking up 'HTTP::Request::Common' so we have a +# subset of uric without a colon. +$uricCheat =~ tr/://d; + +# Identifying characters often accidentally picked up trailing a URI. +$uriCruft = q/]),.!'";}/; + +# escape a URI based on our charset. +sub _escape_uri +{ + my ($opts, $uri) = @_; + confess "charset not initialized" unless $opts->{charset}; + return uri_escape_utf8 $uri if $opts->{charset} =~ /^utf-?8$/i; + return uri_escape $uri; +} + +# Turn [[Wiki Link|Title]], [URI Title], scheme:url, or StudlyCaps into links. +sub _make_html_link +{ + my ($tag, $opts, $tags) = @_; + + my ($class, $trailing) = ('', ''); + my ($href, $title); + if ($tag =~ /^\[\[([^|#]*)(?:(#)([^|]*))?(?:(\|)(.*))?\]\]$/) + { + # Wiki link + $href = $opts->{prefix} . _escape_uri $opts, $1 if $1; + $href .= $2 . _escape_uri $opts, $3 if $2; + + if ($4) + { + # Title specified explicitly. + if (length $5) + { + $title = $5; + } + else + { + # An empty title asks Mediawiki to strip any parens off the end + # of the node name. + $1 =~ /^([^(]*)(?:\s*\()?/; + $title = $1; + } + } + else + { + # Title defaults to the node name. + $title = $1; + } + } + elsif ($tag =~ /^\[(\S*)(?:(\s+)(.*))?\]$/) + { + # URI + $href = $1; + if ($2) + { + $title = $3; + } + else + { + $title = ++$opts->{_uri_refs}; + } + $href =~ s/'/%27/g; + } + else + { + # Shouldn't be able to get here without either $opts->{absolute_links} + # or $opts->{implicit_links}; + $tags->{_schema_regex} ||= _make_schema_regex @{$tags->{schemas}}; + my $s = $tags->{_schema_regex}; + + if ($tag =~ /^$s:[$uricCheat][$uric]*$/) + { + # absolute link + $href = $&; + $trailing = $& if $href =~ s/[$uriCruft]$//; + $title = $href; + } + else + { + # StudlyCaps + $href = $opts->{prefix} . _escape_uri $opts, $tag; + $title = $tag; + } + } + + return "$title
    $trailing"; +} + +# Store a TOC line for later. +# +# ASSUMPTIONS +# $level >= 1 +sub _store_toc_line +{ + my ($toc, $level, $title, $name) = @_; + + # TODO: Strip formatting from $title. + + if (@$toc && $level > $toc->[-1]->{level}) + { + # Nest a sublevel. + $toc->[-1]->{sublevel} = [] + unless exists $toc->[-1]->{sublevel}; + _store_toc_line ($toc->[-1]->{sublevel}, $level, $title, $name); + } + else + { + push @$toc, {level => $level, title => $title, name => $name}; + } + + return $level; +} + +# Make header text, storing the line for the TOC. +# +# ASSUMPTIONS +# $tags->{_toc} has been initialized to an array ref. +sub _make_header +{ + my $level = length $_[2]; + my $n = _escape_uri $_[-1], $_[3]; + + _store_toc_line ($_[-2]->{_toc}, $level, $_[3], $n); + + return "", + Text::MediawikiFormat::format_line ($_[3], @_[-2, -1]), + "\n"; +} + +sub _format +{ + my ($itags, $iopts, $text, $tags, $opts) = @_; + + # Overwriting the caller's hashes locally after merging its contents + # is okay. + $tags = _merge_hashes ($tags || {}, $itags); + $opts = _merge_hashes ($opts || {}, $iopts); + + _require_html_packages + if $opts->{process_html}; + + # Always verify the blocks since the user may have slagged the + # default hash on import. + _check_blocks ($tags); + + my @blocks = _find_blocks ($text, $tags, $opts); + @blocks = _nest_blocks (\@blocks); + return _process_blocks (\@blocks, $tags, $opts); +} + +sub _check_blocks +{ + my $tags = shift; + my %blocks = %{$tags->{blocks}}; + delete @blocks{@{$tags->{blockorder}}}; + + carp + "No order specified for blocks: " + . join (', ', keys %blocks) + . ".\n" + if keys %blocks; +} + +# This sub recognizes three states: +# +# 1. undef +# Normal wiki processing will be done on this line. +# +# 2. html +# Links and phrasal processing will be done, but formatting should be +# ignored. +# +# 3. nowiki +# No further wiki processing should be done. +# +# Each state may override the lower ones if already set on a given line. +# +sub _append_processed_line +{ + my ($parser, $text, $state) = @_; + my $lines = $parser->{processed_lines}; + + $state ||= ''; + + my @newlines = split /(?<=\n)/, $text; + if (@$lines && $lines->[-1]->[1] !~ /\n$/ + && # State not changing from or to 'nowiki' + !($state ne $lines->[-1]->[0] + && grep /^nowiki$/, $state, $lines->[-1]->[0])) + { + $lines->[-1]->[1] .= shift @newlines; + $lines->[-1]->[0] = $state if $state eq 'html'; + } + + foreach my $line (@newlines) + { + $lines->[-1]->[2] = '1' if @$lines; + push @$lines, [$state, $line]; + } + $lines->[-1]->[2] = '1' + if @$lines && $lines->[-1]->[1] =~ /\n$/; +} + +sub _html_tag +{ + my ($parser, $type, $tagname, $orig, $attr) = @_; + my $tags = $parser->{tags}; + + # $tagname may have been generated by an empty tag. If so, HTML::Parser + # will sometimes include the trailing / in the tag name. + my $isEmptyTag = $orig =~ m#/>$#; + $tagname =~ s#/$## if $isEmptyTag; + + unless (grep /^\Q$tagname\E$/, @{$tags->{allowed_tags}}) + { + _append_processed_line $parser, CGI::escapeHTML $orig; + return; + } + # Any $tagname must now be in the allowed list, including . + + my $tagstack = $parser->{tag_stack}; + my $stacktop = @$tagstack ? $tagstack->[-1] : ''; + + # First, process end tags, since they can change our state. + if ($type eq 'E' && $stacktop eq $tagname) + { + # The closing tag is at the top of the stack, like it should be. + # Pop it and append the close tag to the output. + pop @$tagstack; + my $newtag; + + if ($tagname eq 'nowiki') + { + # The browser doesn't need to see the tag. + $newtag = ''; + } + else + { + $newtag = ""; + } + + # Can't close a state into
     or 
    +	_append_processed_line $parser, $newtag, 'html';
    +	return;
    +    }
    +
    +    if (@$tagstack && grep /^\Q$stacktop\E$/, qw{nowiki pre})
    +    {
    +	# Ignore all markup within 
     or  tags.
    +	_append_processed_line $parser, CGI::escapeHTML ($orig), 'nowiki';
    +	return;
    +    }
    +
    +    if ($type eq 'E' && $HTML::Tagset::isPhraseMarkup{$tagname})
    +	# If we ask for artificial end element events for self-closed elements,
    +	# then we need to check $HTML::Tagset::emptyElement($tagname) here too.
    +    {
    +	# We didn't record phrase markup on the stack, so it's okay to just
    +	# let it close.
    +	_append_processed_line $parser, "";
    +	return;
    +    }
    +
    +    if ($type eq 'E')
    +    {
    +	# We got a non-phrase end tag that wasn't on the stack.  Escape it.
    +	_append_processed_line $parser, CGI::escapeHTML ($orig);
    +	return;
    +    }
    +
    +
    +    ###
    +    ### $type must now eq 'S'.
    +    ###
    +
    +    # The browser doesn't need to see the  tag.
    +    if ($tagname eq 'nowiki')
    +    {
    +	push @$tagstack, $tagname
    +	    unless $isEmptyTag;
    +	return;
    +    }
    +
    +    # Strip disallowed attributes.
    +    my $newtag = "<$tagname";
    +    foreach (@{$tags->{allowed_attrs}})
    +    {
    +	    if (defined $attr->{$_})
    +	    {
    +		    $newtag .= " $_";
    +		    unless ($attr->{$_}
    +			    eq '__TEXT_MEDIAWIKIFORMAT_BOOL__')
    +		    {
    +			    # CGI::escapeHTML escapes single quotes.
    +			    $attr->{$_} = CGI::escapeHTML $attr->{$_};
    +			    $newtag .= "='" . $attr->{$_} . "'";
    +		    }
    +	    }
    +    }
    +    $newtag .= " /" if $HTML::Tagset::emptyElement{$tagname} || $isEmptyTag;
    +    $newtag .= ">";
    +
    +    # If this isn't a block level element, there's no need to track nesting.
    +    if ($HTML::Tagset::isPhraseMarkup{$tagname}
    +	|| $HTML::Tagset::emptyElement{$tagname})
    +    {
    +	_append_processed_line $parser, $newtag;
    +	return;
    +    }
    +
    +    # Some elements can close implicitly
    +    if (@$tagstack)
    +    {
    +	if ($tagname eq $stacktop
    +	    && $HTML::Tagset::optionalEndTag{$tagname})
    +	{
    +	    pop @$tagstack;
    +	}
    +	elsif (!$HTML::Tagset::is_Possible_Strict_P_Content{$tagname})
    +	{
    +	    # Need to check more than the last item for paragraphs.
    +	    for (my $i = $#{$tagstack}; $i >= 0; $i--)
    +	    {
    +		my $checking = $tagstack->[$i];
    +		last if grep /^\Q$checking\E$/,
    +			@HTML::Tagset::p_closure_barriers;
    +
    +		if ($checking eq 'p')
    +		{
    +		    # pop 'em all.
    +		    splice @$tagstack, $i;
    +		    last;
    +		}
    +	    }
    +	}
    +    }
    +
    +    # Could verify here that 
  • and sub-elements only appear where + # they belong. + + # Push the new tag onto the stack. + push @$tagstack, $tagname + unless $isEmptyTag; + + _append_processed_line $parser, $newtag, + $tagname eq 'pre' ? 'nowiki' : 'html'; + return; +} + +sub _html_comment +{ + my ($parser, $text) = @_; + + _append_processed_line $parser, $text, 'nowiki'; +} + +sub _html_text +{ + my ($parser, $dtext, $skipped_text, $is_cdata) = @_; + my $tagstack = $parser->{tag_stack}; + my ($newtext, $newstate); + + warnings::warnif ("Got skipped_text: `$skipped_text'") + if $skipped_text; + + if (@$tagstack) + { + if (grep /\Q$tagstack->[-1]\E/, qw{nowiki pre}) + { + $newstate = 'nowiki' + } + elsif ($is_cdata && $HTML::Tagset::isCDATA_Parent{$tagstack->[-1]}) + { + # If the user hadn't specifically allowed a tag which contains + # CDATA, then it won't be on the tag stack. + $newtext = $dtext; + } + } + + unless (defined $newtext) + { + $newtext = CGI::escapeHTML $dtext unless defined $newtext; + # CGI::escapeHTML escapes single quotes so the text may be included + # in attribute values, but we know we aren't processing an attribute + # value here. + $newtext =~ s/'/'/g; + } + + _append_processed_line $parser, $newtext, $newstate; +} + +sub _find_blocks_in_html +{ + my ($text, $tags, $opts) = @_; + + my $parser = HTML::Parser->new + (start_h => [\&_html_tag, 'self, "S", tagname, text, attr'], + end_h => [\&_html_tag, 'self, "E", tagname, text'], + comment_h => [\&_html_comment, 'self, text'], + text_h => [\&_html_text, 'self, dtext, skipped_text, is_cdata'], + marked_sections => 1, + boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__', + ); + $parser->{opts} = $opts; + $parser->{tags} = $tags; + $parser->{processed_lines} = []; + $parser->{tag_stack} = []; + + my @blocks; + my @lines = split /\r?\n/, $text; + for (my $i = 0; $i < @lines; $i++) + { + $parser->parse ($lines[$i]); + $parser->parse ("\n"); + $parser->eof if $i == $#lines; + + # @{$parser->{processed_lines}} may be empty when tags are + # still open. + while (@{$parser->{processed_lines}} + && $parser->{processed_lines}->[0]->[2]) + { + my ($type, $dtext) + = @{shift @{$parser->{processed_lines}}}; + + my $block; + if ($type) + { + $block = _start_block ($dtext, $tags, $opts, $type); + } + else + { + chomp $dtext; + $block = _start_block ($dtext, $tags, $opts); + } + push @blocks, $block if $block; + } + } + + return @blocks; +} + +sub _find_blocks +{ + my ($text, $tags, $opts) = @_; + my @blocks; + + if ($opts->{process_html}) + { + @blocks = _find_blocks_in_html $text, $tags, $opts; + } + else + { + # The original behavior. + for my $line (split /\r?\n/, $text) + { + my $block = _start_block ($line, $tags, $opts); + push @blocks, $block if $block; + } + } + + return @blocks; +} + +sub _start_block +{ + my ($text, $tags, $opts, $type) = @_; + + return new_block ('end', level => 0) unless $text; + return new_block ($type, + level => 0, + opts => $opts, + text => $text, + tags => $tags,) + if $type; + + for my $block (@{$tags->{blockorder}}) + { + my ($line, $level, $indentation) = ($text, 0, ''); + + ($level, $line, $indentation) = _get_indentation ($tags, $line) + if $tags->{indented}{$block}; + + my $marker_removed = length ($line =~ s/$tags->{blocks}{$block}//); + + next unless $marker_removed; + + return new_block ($block, + args => [grep {defined} $1, $2, $3, $4, $5, $6, $7, + $8, $9], + level => $level || 0, + opts => $opts, + text => $line, + tags => $tags, + ); + } +} + +sub _nest_blocks +{ + my $blocks = shift; + return unless @$blocks; + + my @processed = shift @$blocks; + + for my $block (@$blocks) + { + push @processed, $processed[-1]->nest( $block ); + } + + return @processed; +} + +sub _process_blocks +{ + my ($blocks, $tags, $opts) = @_; + + my @open; + for my $block (@$blocks) + { + push @open, _process_block ($block, $tags, $opts) + unless $block->type() eq 'end'; + } + + return join '', @open ; +} + +sub _process_block +{ + my ($block, $tags, $opts) = @_; + my $type = $block->type(); + + my ($start, $end, $start_line, $end_line, $between); + if ($tags->{$type}) + { + ($start, $end, $start_line, $end_line, $between) = @{$tags->{$type}}; + } + else + { + ($start, $end, $start_line, $end_line) = ('', '', '', ''); + } + + my @text = (); + for my $line (grep (/^\Q$type\E$/, @{$tags->{unformatted_blocks}}) + ? $block->text() + : $block->formatted_text()) + { + if (blessed $line) + { + my $prev_end = pop @text || (); + push @text, _process_block ($line, $tags, $opts), $prev_end; + next; + } + + my @triplets; + if ((ref ($start_line) || '') eq 'CODE') + { + @triplets = $start_line->($line, $block->level(), + $block->shift_args(), $tags, $opts); + } + else + { + @triplets = ($start_line, $line, $end_line); + } + push @text, @triplets; + } + + pop @text if $between; + return join '', $start, @text, $end; +} + +sub _get_indentation +{ + my ($tags, $text) = @_; + + return 1, $text unless $text =~ s/($tags->{indent})//; + return length ($1) + 1, $text, $1; +} + +=head2 format_line + + $formatted = format_line ($raw, $tags, $opts); + +This function is never exported. It formats the phrase elements of a single +line of text (emphasised, strong, and links). + +This is only meant to be called from L and so +requires $tags and $opts to have all elements filled in. If you find a use for +it, please let me know and maybe I will have it default the missing elements as +C does. + +=cut + +sub format_line +{ + my ($text, $tags, $opts) = @_; + + $text =~ s!$tags->{strong_tag}!$tags->{strong}->($1, $opts)!eg; + $text =~ s!$tags->{emphasized_tag}!$tags->{emphasized}->($1, $opts)!eg; + + $text = _find_links ($text, $tags, $opts) + if $opts->{extended} + || $opts->{absolute_links} + || $opts->{implicit_links}; + + return $text; +} + +sub _find_innermost_balanced_pair +{ + my ($text, $open, $close) = @_; + + my $start_pos = rindex $text, $open; + return if $start_pos == -1; + + my $end_pos = index $text, $close, $start_pos; + return if $end_pos == -1; + + my $open_length = length $open; + my $close_length = length $close; + my $close_pos = $end_pos + $close_length; + my $enclosed_length = $close_pos - $start_pos; + + my $enclosed_atom = substr $text, $start_pos, $enclosed_length; + return substr ($enclosed_atom, $open_length, 0 - $close_length), + substr ($text, 0, $start_pos), + substr ($text, $close_pos); +} + +sub _find_links +{ + my ($text, $tags, $opts) = @_; + + # Build Regexp + my @res; + + if ($opts->{absolute_links}) + { + # URI + my $s; + $tags->{_schema_regex} ||= _make_schema_regex @{$tags->{schemas}}; + $s = $tags->{_schema_regex}; + push @res, qr/\b$s:[$uricCheat][$uric]*/ + } + + if ($opts->{implicit_links}) + { + # StudlyCaps + if ($tags->{implicit_link_delimiters}) + { + push @res, qr/$tags->{implicit_link_delimiters}/; + } + else + { + warnings::warnif ("Ignoring implicit_links option since implicit_link_delimiters is empty"); + } + } + + if ($opts->{extended}) + { + # [[Wiki Page]] + if (!$tags->{extended_link_delimiters}) + { + warnings::warnif ("Ignoring extended option since extended_link_delimiters is empty"); + } + elsif (ref $tags->{extended_link_delimiters} eq "ARRAY") + { + # Backwards compatibility for extended links. + # Bypasses the regex substitution used by absolute and implicit + # links. + my ($start, $end) = @{$tags->{extended_link_delimiters}}; + while (my @pieces = _find_innermost_balanced_pair ($text, $start, + $end)) + { + my ($tag, $before, $after) = map { defined $_ ? $_ : '' } + @pieces; + my $extended = $tags->{link}->($tag, $opts, $tags) || ''; + $text = $before . $extended . $after; + } + } + else + { + push @res, qr/$tags->{extended_link_delimiters}/; + } + } + + if (@res) + { + my $re = join "|", @res; + $text =~ s/$re/$tags->{link}->($&, $opts, $tags)/ge; + } + + return $text; +} + +=head1 Wiki Format + +Refer to L for +description of the default wiki format, as interpreted by this module. Any +discrepencies will be considered bugs in this module, with a few exceptions. + +=head2 Unimplemented Wiki Markup + +=over 4 + +=item Templates, Magic Words, and Wanted Links + +Templates, magic words, and the colorization of wanted links all require a back +end data store that can be consulted on the existance and content of named +pages. C has deliberately been constructed such that it +operates independantly from such a back end. For an interface to +C which implements these features, see +L. + +=item Tables + +This is on the TODO list. + +=back + +=head1 EXPORT + +If you'd like to make your life more convenient, you can optionally import a +subroutine that already has default tags and options set up. This is +especially handy if you use a prefix: + + use Text::MediawikiFormat prefix => 'http://www.example.com/'; + wikiformat ('some text'); + +Tags are interpreted as default members of the $tags hash normally passed to +C, except for the five options (see above) and the C key, who's +value is interpreted as an alternate name for the imported function. + +To use the C flag to control the name by which your code calls the imported +function, for example, + + use Text::MediawikiFormat as => 'formatTextWithWikiStyle'; + formatTextWithWikiStyle ('some text'); + +You might choose a better name, though. + +The calling semantics are effectively the same as those of the C +function. Any additional tags or options to the imported function will +override the defaults. This code: + + use Text::MediawikiFormat as => 'wf', extended => 0; + wf ('some text', {}, {extended => 1}); + +enables extended links, after specifying that the default behavior should be +to disable them. + +=head1 GORY DETAILS + +=head2 Tags + +There are two types of Wiki markup: phrase markup and blocks. Blocks include +lists, which are made up of lines and can also contain other lists. + +=head3 Phrase Markup + +The are currently three types of wiki phrase markup. These are the +strong and emphasized markup and links. Links may additionally be of three +subtypes, extended, implicit, or absolute. + +You can change the regular expressions used to find strong and emphasized tags: + + %tags = ( + strong_tag => qr/\*([^*]+?)\*/, + emphasized_tag => qr|/([^/]+?)/|, + ); + + $wikitext = 'this is *strong*, /emphasized/, and */em+strong/*'; + $htmltext = wikiformat ($wikitext, \%tags, {}); + +You can also change the regular expressions used to find links. The following +just sets them to their default states (but enables parsing of implicit links, +which is I the default): + + my $html = wikiformat + ( + $raw, + {implicit_link_delimiters => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!, + extended_link_delimiters => qr!\[(?:\[[^][]*\]|[^][]*)\]!, + }, + {implicit_links => 1} + ); + +In addition, you may set the function references that format strong and +emphasized text and links. The strong and emphasized functions receive only +the text to be formatted as an argument and are expected to return the +formatted text. The link formatter also recieves references to the C<$tags> +and C<$opts> arrays. For example, the following sets the strong and +emphasized formatters to their default state while replacing the link formatter +with one which strips href information and returns only the title text: + + my $html = wikiformat + ( + $raw, + {strong => sub {"$_[0]"}, + emphasized => sub {"$_[0]"}, + link => sub + { + my ($tag, $opts, $tags) = @_; + if ($tag =~ s/^\[\[([^][]+)\]\]$/$1/) + { + my ($page, $title) = split qr/\|/, $tag, 2; + return $title if $title; + return $page; + } + elsif ($tag =~ s/^\[([^][]+)\]$/$1/) + { + my ($href, $title) = split qr/ /, $tag, 2; + return $title if $title; + return $href; + } + else + { + return $tag; + } + }, + }, + ); + +=head3 Blocks + +The default block types are C, C, C, C, +C, C, C, and C
    . + +Block entries in the tag hashes must contain array references. The first two +items are the tags used at the start and end of the block. The third and +fourth contain the tags used at the start and end of each line. Where there +needs to be more processing of individual lines, use a subref as the third +item. This is how the module processes ordered lines in HTML lists and +headers: + + my $html = wikiformat + ( + $raw, + {ordered => ['
      ', "
    \n", '
  • ', "
  • \n"], + header => ['', "\n", \&_make_header], + }, + ); + +The first argument to these subrefs is the post-processed text of the line +itself. (Processing removes the indentation and tokens used to mark this as a +list and checks the rest of the line for other line formattings.) The second +argument is the indentation level (see below). The subsequent arguments are +captured variables in the regular expression used to find this list type. The +regexp for headers is: + + $html = wikiformat + ( + $raw, + {blocks => {header => qr/^(=+)\s*(.+?)\s*\1$/}} + ); + +The module processes indentation first, if applicable, and stores the +indentation level (the length of the indentation removed). + +Lists automatically start and end as necessary. + +Because regular expressions could conceivably match more than one line, block +level markup is processed in a specific order. The C tag governs +this order. It contains a reference to an array of the names of the +appropriate blocks to process. If you add a block type, be sure to add an +entry for it in C: + + my $html = wikiformat + ( + $raw, + {invisible => ['', '', '', ''], + blocks => {invisible => qr!^--(.*?)--$!}, + blockorder => [qw(code header line ordered + unordered definition invisible + paragraph_break paragraph)] + }, + }, + ); + +=head3 Finding blocks + +As has already been mentioned in passing, C uses regular +expressions to find blocks. These are in the C<%tags> hash under the C +key. For example, to change the regular expression to find code block items, +use: + + my $html = wikiformat ($raw, {blocks => {code => qr/^:\s+/}}); + +This will require a leading colon to mark code lines (note that as writted +here, this would interfere with the default processing of definition lists). + +=head3 Finding Blocks in the Correct Order + +As intrepid bug reporter Tom Hukins pointed out in CPAN RT bug #671, the order +in which C searches for blocks varies by platform and +version of Perl. Because some block-finding regular expressions are more +specific than others, what you intend to be one type of block may turn into a +different list type. + +If you're adding new block types, be aware of this. The C entry in +C<%tags> exists to force C to apply its regexes from +most specific to least specific. It contains an array reference. By default, +it looks for ordered lists first, unordered lists second, and code references +at the end. + +=head1 SEE ALSO + +L + +=head1 SUPPORT + +You can find documentation for this module with the perldoc command. + + perldoc Text::MediawikiFormat + +You can also look for information at: + +=over 4 + +=item * AnnoCPAN: Annotated CPAN documentation + +L + +=item * CPAN Ratings + +L + +=item * RT: CPAN's request tracker + +L + +=item * Search CPAN + +L + +=back + +=head1 AUTHOR + +Derek Price C is the author. + +=head1 ACKNOWLEDGEMENTS + +This module is derived from L, written by chromatic. +chromatic's original credits are below: + +chromatic, C, with much input from the Jellybean team +(including Jonathan Paulett). Kate L Pugh has also provided several patches, +many failing tests, and is usually the driving force behind new features and +releases. If you think this module is worth buying me a beer, she deserves at +least half of it. + +Alex Vandiver added a nice patch and tests for extended links. + +Tony Bowden, Tom Hukins, and Andy H. all suggested useful features that are now +implemented. + +Sam Vilain, Chris Winters, Paul Schmidt, and Art Henry have all found and +reported silly bugs. + +Blame me for the implementation. + +=head1 BUGS + +The link checker in C may fail to detect existing links that do +not follow HTML, XML, or SGML style. They may die with some SGML styles too. +I. + +=head1 TODO + +=over 4 + +=item * Optimize C to work on a list of lines + +=back + +=head1 COPYRIGHT & LICENSE + + Copyright (c) 2006-2008 Derek R. Price, all rights reserved. + Copyright (c) 2002 - 2006, chromatic, all rights reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + +1; # End of Text::MediaiwkiFormat diff --git a/t/Wiki.t b/t/Wiki.t new file mode 100644 index 0000000..441e585 --- /dev/null +++ b/t/Wiki.t @@ -0,0 +1,193 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +# for testing 'rootdir' in links +my %constants = ( + rootdir => 'rootdir', +); + +local *Text::MediawikiFormat::getCurrentStatic; +*Text::MediawikiFormat::getCurrentStatic = sub { + return \%constants; +}; + +use Test::More tests => 34; +use Test::NoWarnings; + +use_ok 'Text::MediawikiFormat'; + +my $wikitext =< 'rootdir/wiki.pl?page=', + implicit_links => 1, + extended => 0, + process_html => 0, +); + +my $htmltext = Text::MediawikiFormat::format_line ($wikitext, \%tags, \%opts); + +like $htmltext, qr!\[!, + 'format_line () should link StudlyCaps where found)'; +like $htmltext, qr!hello!, 'three ticks should mark strong'; +like $htmltext, qr!hi!, 'two ticks should mark emphasized'; +like $htmltext, qr!LinkMeSomewhere\n!m, 'should catch StudlyCaps'; +like $htmltext, qr!\[\[!, 'should not handle extended links without flag'; + +$opts{extended} = 1; +$htmltext = Text::MediawikiFormat::format_line ($wikitext, \%tags, \%opts); +like $htmltext, qr!^BYE!m, + 'should handle extended links with flag'; + +$htmltext = Text::MediawikiFormat::format ($wikitext, {}, {process_html => 0}); +like $htmltext, qr!hello!, 'three ticks should mark strong'; +like $htmltext, qr!hi!, 'two ticks should mark emphasized'; + +is scalar @{$tags{ordered}}, 4, + '...default ordered entry should have four items'; +is join ('', map {ref $_} @{$tags{ordered}}), '', + '...and should have no subrefs'; + +# make sure this starts a paragraph (buglet) +$htmltext = Text::MediawikiFormat::format ("nothing to see here\nmoveAlong\n", + {}, + {prefix => 'foo=', + process_html => 0}); +like $htmltext, qr!^

    nothing!, '...should start new text with paragraph'; + +# another buglet had the wrong tag pairs when ending a list +my $wikiexample =< 'foo=', + process_html => 0}); + +like $htmltext, qr!^

    I am modifying this!, + '... should use correct tags when ending lists'; +like $htmltext, qr!

    Here is a paragraph.\n!, + '...should add no newline before paragraph, but at newline in paragraph'; +like $htmltext, qr!

    Here is another paragraph.

    !, + '... should add no newline at end of paragraph'; +like $htmltext, qr|emphatic text|, + '...should sub markup in code sections'; +unlike $htmltext, qr!<(\w+)>!, '...but should not create empty lists'; + +$wikitext =< 'rootdir/wiki.pl?page=', + process_html => 0, +); + +$htmltext = Text::MediawikiFormat::format ($wikitext, {}, \%opts); +like $htmltext, qr!
    !m, + '...should leave spaces alone in titles of extended links'; + +$wikitext =<<'WIKI'; += heading = +== sub heading == + +some text + +=== sub sub heading === + +more text + +WIKI + +$htmltext = Text::MediawikiFormat::format($wikitext, \%tags, \%opts); +like $htmltext, qr!

    heading

    !, 'headings should be marked'; +like $htmltext, qr!

    sub heading

    !, '... and numbered appropriately'; + +# test overridable tags + +ok !UNIVERSAL::can ('main', 'wikiformat'), + 'Module should import nothing by default'; + +can_ok 'Text::MediawikiFormat', 'import'; + +SKIP: { + # process_html defaults to 1, so we can't test the single-argument version + # of the importer without the HTML modules. + eval { require HTML::Parser; require HTML::Tagset; }; + skip "HTML::Parser or HTML::Tagset not installed", 1 if $@; + + # given an argument, export wikiformat() somehow + package Foo; + + Text::MediawikiFormat->import('wikiformat'); + ::can_ok 'Foo', 'wikiformat'; +} + +package Bar; +Text::MediawikiFormat->import(as => 'wf', prefix => 'foo', tag => 'bar', + process_html => 0); +::can_ok 'Bar', 'wf'; +::isnt \&wf, \&Text::MediawikiFormat::format, + '...and should be a wrapper around format()'; + +my @args; +local *Text::MediawikiFormat::_format; +*Text::MediawikiFormat::_format = sub { + @args = @_; +}; + +wf(); +::is $args[1]{prefix}, 'foo', + 'imported sub should pass through default option'; +::is $args[0]{tag}, 'bar', '... and default tag'; + +wf ('text', {tag2 => 1}, {prefix => 'baz'}); +::is $args[2], 'text', '...passing through text unharmed'; +::is $args[3]{tag2}, 1, '...along with new tags'; +::is $args[4]{prefix}, 'baz', '...overriding default args as needed'; + +1; diff --git a/t/absolute_links.t b/t/absolute_links.t new file mode 100644 index 0000000..80d66ec --- /dev/null +++ b/t/absolute_links.t @@ -0,0 +1,64 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 8; +use Test::NoWarnings; +use Text::MediawikiFormat as => 'wf', implicit_links => 0, absolute_links => 0, + process_html => 0; + +my $wikitext = <<'WIKI'; + +I download code from http://www.cpan.org/ or ftp://ftp.cpan.org/ and +email mailto:chromatic@example.com + +WIKI + +my $htmltext = wf ($wikitext, {}, {absolute_links => 1}); + +is $htmltext, + qq{

    I download code from } + . qq{http://www.cpan.org/ } + . qq{or ftp://ftp.cpan.org/ and\n} + . q{email } + . q{mailto:chromatic@example.com} + . qq{

    \n}, + 'Picking up absolute links'; + +$htmltext = wf ($wikitext, {}, {absolute_links => 0}); +is $htmltext, + qq{

    I download code from http://www.cpan.org/ or ftp://ftp.cpan.org/ } + . qq{and\n} + . q{email mailto:chromatic@example.com} + . qq{

    \n}, + q{Doesn't pick up links when absolute_links is off}; + +$wikitext = "this is a moose:notalink"; + +$htmltext = wf ($wikitext, {}, {absolute_links => 1}); +is $htmltext, + qq{

    this is a moose:notalink

    \n}, + q{Doesn't pick up things that might look like links}; + +$htmltext = wf ($wikitext, {schemas => ['moose']}, {absolute_links => 1}); +is $htmltext, + qq{

    this is a moose:notalink

    \n}, + q{Schema tag allows specifying what is a link}; + +$wikitext = <<'WIKI'; + +http://www.cpan.org/. + +A link in angle brackets: . +WIKI + +$htmltext = wf ($wikitext, {}, {absolute_links => 1}); +like $htmltext, qr{href='http://www.cpan.org/'>}, + 'Links work at beginning of line and lose cruft'; +like $htmltext, qr{org/\.}, + 'Cruft restored after link'; +like $htmltext, qr{>http://link\.org>\.}, + 'Angle brackets around links are left alone'; diff --git a/t/base.t b/t/base.t new file mode 100644 index 0000000..3509aa6 --- /dev/null +++ b/t/base.t @@ -0,0 +1,189 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 35; + +my $module = 'Text::MediawikiFormat'; +use_ok $module or exit; + +can_ok $module, '_start_block'; +my $text =<can( $_[0] ); +} + +my $tags = \%Text::MediawikiFormat::tags; +local *Text::MediawikiFormat::tags = $tags; +my $opts = \%Text::MediawikiFormat::opts; +local *Text::MediawikiFormat::opts = $opts; + +my $sb = fetchsub '_start_block'; +my ($result) = $sb->('= heading =', $tags); + +ok $result->isa ('Text::MediawikiFormat::Block::header'), + '_start_block() should find headings' or diag "... it's a $result"; + +is $result->level(), 0, '... at the correct level'; + +($result) = $sb->('** unordered item', $tags); + +ok $result->isa ('Text::MediawikiFormat::Block::unordered'), + '_start_block() should find unordered lists' or diag "... it's a $result"; +is $result->level(), 2, '... at the correct level'; +is join ('', $result->text()), 'unordered item', '... with the correct text'; + +($result) = $sb->('## ordered item', $tags); + +ok $result->isa ('Text::MediawikiFormat::Block::ordered'), + '_start_block() should find ordered lists' or diag "... it's a $result"; +is $result->level(), 2, '... at the correct level'; +is join ('', $result->text()), 'ordered item', '... with the correct text'; + +($result) = $sb->(' some code', $tags); + +ok $result->isa ('Text::MediawikiFormat::Block::code'), + '_start_block() should find code' or diag "... it's a $result"; +is $result->level(), 0, '... at the correct level'; +is join ('', $result->text()), "some code", '... with the correct text'; + +($result) = $sb->('paragraph', $tags); + +ok $result->isa ('Text::MediawikiFormat::Block::paragraph'), + '_start_block() should find paragraph' or diag "... it's a $result"; +is $result->level(), 0, '... at the correct level'; +is join ('', $result->text()), 'paragraph', '...with the correct text'; + +can_ok $module, '_nest_blocks'; +my $nb = fetchsub '_nest_blocks'; +my @result = $nb->([ + map {Text::MediawikiFormat::new_block (@$_)} + ['code', text => 'a', level => 1], + ['code', text => 'b', level => 1], +]); +is @result, 1, '_nest_blocks() should merge identical blocks together'; +is_deeply $result[0]{text}, [qw(a b)], '...merging their text'; + +@result = $nb->([ + map {Text::MediawikiFormat::new_block (@$_)} + ['unordered', text => 'foo', level => 1], + ['unordered', text => 'bar', level => 1], +], $tags); +is @result, 1, '... merging unordered blocks'; +is_deeply $result[0]{text}, [qw(foo bar)], '...and their text'; + +@result = $nb->([ + map {Text::MediawikiFormat::new_block (@$_)} + ['ordered', text => 'foo', level => 2], + ['ordered', text => 'bar', level => 3], +], $tags); +is @result, 2, '... not merging blocks at different levels'; + +can_ok $module, '_process_blocks'; +my $pb = fetchsub '_process_blocks'; +my @opts = (tags => $tags, opts => $opts); +my @blocks = map {Text::MediawikiFormat::new_block (@$_, @opts)} + ['header', text => [''], level => 0, + args => ['==', 'my header']], ['end', text => [ '' ], + level => 0, @opts], + ['paragraph', text => [qw(my lines of text)], args => [], + level => 0], + ['end', text => [ '' ], level => 0, @opts ], + ['ordered', text => [qw(my ordered lines), + Text::MediawikiFormat::new_block + ('unordered', + text => [qw(my unordered lines)], + level => 3, args => [], @opts),], + level => 2, args => []]; + +# it's hard to fake these up; this may be a bad test +$blocks[2]{args} = [[], [], [] ]; +$blocks[4]{args} = [[2], [3], [5]]; +$blocks[4]{text}[3]{args} = [[], [], []]; + +@result = $pb->(\@blocks, $tags, $opts); + +is @result, 1, '_process_blocks() should return processed text'; +$result = $result[0]; +like $result, qr!

    my header

    !, '...marking header'; +like $result, qr!

    my[^<]+text

    \n!s, '...paragraph'; +like $result, qr!
      \n
    1. my
    2. .+
    3. lines!s, '...ordered list'; +like $result, qr!
        \n
      • my
      • !m, '...and unordered list'; +like $result, qr!\n
      \n
    4. \n
    !, '...nesting properly'; + +my $f = fetchsub( 'format' ); +my $fullresult = $f->(< 0}); +== my header == + +my +lines +of +text + +# my +# ordered +# lines +#* my +#* unordered +#* lines +END_WIKI + +is $fullresult, $result, 'format() should give same results'; + +$fullresult = $f->(< 0}); += heading = + +* aliases can expire +** use the Expires directive +** no messages sent after the expiration date +* aliases can be closed +** use the Closed directive +** messages allowed only from people on the list +* aliases can auto-add people +** use the Auto-add directive +** anyone in the Cc line is added to the alias +** they won't get duplicates +** makes "just reply to alias" easier + +END_WIKI + +like $fullresult, qr!expire
      !, 'nested list should start immediately'; +like $fullresult, qr!date\n
    !, '... ending after last nested item'; + +can_ok $module, '_check_blocks'; + +my @warnings; +local $SIG{__WARN__} = sub { + push @warnings, shift; +}; + +my $cb = \&Text::MediawikiFormat::_check_blocks; +my $newtags = { + blocks => {foo => 1, bar => 1, baz => 1}, + blockorder => [qw(bar baz)], +}; +$cb->($newtags); +my $warning = shift @warnings; +like $warning, qr/No order specified for blocks: foo\./, + '_check_blocks() should warn if block is not ordered'; + +$newtags->{blockorder} = ['baz']; +$cb->($newtags); +$warning = shift @warnings; +ok $warning =~ /foo/ && $warning =~ /bar/, '... for all missing blocks' +or diag $warning; diff --git a/t/bugs.t b/t/bugs.t new file mode 100644 index 0000000..ede4f7c --- /dev/null +++ b/t/bugs.t @@ -0,0 +1,115 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 16; +use Test::NoWarnings; + +use_ok 'Text::MediawikiFormat', as => 'wf', process_html => 0; + +my $wikitext =<unordered
  • !, + 'ensure that lists followed by paragraphs are included correctly'; + +package Baz; +use Text::MediawikiFormat as => 'wf', process_html => 0; + +::can_ok( 'Baz', 'wf' ); + +package main; + +## +## make sure tag overrides work for Kake +## + +$wikitext = < qr/^(?:\t+|\s{4,}|(?=\*+))/, + blocks => { unordered => qr/^\s*\*+\s*/ }, + nests => { unordered => 1 }, +); + +$htmltext = wf ($wikitext, \%format_tags); + +like $htmltext, qr/
  • foo<\/li>/, "first level of unordered list"; +like $htmltext, qr/
  • bar<\/li>/, "nested unordered lists OK"; + +## +## Check that blocks not in blockorder are not fatal +## +%format_tags = ( + blocks => { + definition => qr/^:\s*/ + }, + definition => [ "
    \n", "
    \n", '
    ', "\n" ], + blockorder => [ 'definition' ], +); + +my $warning; +local $SIG{__WARN__} = sub { $warning = shift }; +eval { wf ($wikitext, \%format_tags) }; +is $@, '', 'format() should not die if a block is missing from blockorder'; +like $warning, qr/No order specified/, '... warning instead'; + +my $foo = 'x'; +$foo .= '' unless $foo =~ /x/; +my $html = wf ('test'); +is $html, "

    test

    \n", 'successful prior match should not whomp format()'; + +$wikitext =<<'WIKI'; +Here is some example code: + + sub example_code + { + my ($foo) = @_; + my $this = call_that $foo; + } + +Isn't it nice? +WIKI + +$htmltext = wf ($wikitext, {blocks => {code => qr/^\t/}}); + +like $htmltext, qr!
    sub example_code[^<]+}\s*
    !m, + 'pre tags should work'; + +like $htmltext, qr!^\tmy \(\$foo\)!m, '... not removing further indents'; + +$wikitext =< 1}); + +like $htmltext, qr!CamelCase!, + 'parse actual CamelCase words into links'; +like $htmltext, qr!CamooseCase!, + '... not repeating if using link as title'; +like $htmltext, qr!^NOTCAMELCASE!m, '... but not words in all uppercase'; + +my @processed = Text::MediawikiFormat::_nest_blocks ([]); +is @processed, 0, '_nest_blocks() should not autovivify empty blocks array'; diff --git a/t/developer/0-signature.t b/t/developer/0-signature.t new file mode 100644 index 0000000..4e8fd30 --- /dev/null +++ b/t/developer/0-signature.t @@ -0,0 +1,20 @@ +#!perl + +use strict; +use warnings; + +use Test::More tests => 1; + +SKIP: { + if (eval { require Module::Signature; 1 }) { + ok(Module::Signature::verify() == Module::Signature::SIGNATURE_OK() + => "Valid signature" ); + } + else { + diag("Next time around, consider installing Module::Signature,\n". + "so you can verify the integrity of this distribution.\n"); + skip("Module::Signature not installed", 1) + } +} + +__END__ diff --git a/t/developer/pod-coverage.t b/t/developer/pod-coverage.t new file mode 100644 index 0000000..638cd3f --- /dev/null +++ b/t/developer/pod-coverage.t @@ -0,0 +1,14 @@ +#!perl -T + +use lib 'lib'; +use Test::More; + +eval "use Test::Pod::Coverage 1.04"; + +plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" + if $@; + +plan tests => 2; + +pod_coverage_ok ('Text::MediawikiFormat'); +pod_coverage_ok ('Text::MediawikiFormat::Blocks'); diff --git a/t/developer/pod.t b/t/developer/pod.t new file mode 100644 index 0000000..976d7cd --- /dev/null +++ b/t/developer/pod.t @@ -0,0 +1,6 @@ +#!perl -T + +use Test::More; +eval "use Test::Pod 1.14"; +plan skip_all => "Test::Pod 1.14 required for testing POD" if $@; +all_pod_files_ok(); diff --git a/t/embedded-links.t b/t/embedded-links.t new file mode 100644 index 0000000..2607c9c --- /dev/null +++ b/t/embedded-links.t @@ -0,0 +1,67 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 4; +use Test::NoWarnings; + +use Text::MediawikiFormat as => 'wf', process_html => 0; + +my $wikitext = <[[SuperLink|Desc of the } + . qq{Link]]

    \n}, + '...ignore embedded links by default'; +} + +{ + # Redefine the delimiters to something different. + my %tags = (extended_link_delimiters => [qw{[[ ]]}], + link => \&_make_html_link); + + my $htmltext = wf ($wikitext, \%tags); + is $htmltext, + qq{

    Desc of the } + . qq{Link

    \n}, + '...processing all embedded links'; + + sub _make_html_link + { + my ($link) = @_; + my ($href, $title) = split qr/\|/, $link, 2; + $title ||= $href; + return "$title"; + } +} + +TODO: +{ + # Art Henry's bug; but not sure it's really a bug + local $TODO = "Unsupported MediaWiki features."; + + my %tags = (link => \&link_handler); + + # Or with the link handler overridden. + my $htmltext = wf ($wikitext, \%tags); + is $htmltext, + "

    Desc of the

    \n", + '...and also work with a handler override.'; + + sub link_handler + { + my ($link, $opts) = @_; + ($link, my $title) = split /\|/, $link, 2; + $title ||= $link; + return $title; + } +} diff --git a/t/explicit.t b/t/explicit.t new file mode 100644 index 0000000..870815e --- /dev/null +++ b/t/explicit.t @@ -0,0 +1,74 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 14; +use Test::NoWarnings; +use Test::Warn; + +use Text::MediawikiFormat as => 'wf', process_html => 0; + +my $wikitext =<extended link!m, + 'extended links rendered correctly with default delimiters'; +like $htmltext, qr!'http://nowhere\.com'>explicit URI!m, + 'explicit URIs rendered correctly with default delimiters'; +like $htmltext, qr!Usemod%20extended%20link'>Usemod extended link!m, + 'Wiki URIs rendered correctly with default delimiters'; + +# Redefine the delimiters to the same thing again. +my %tags = ( + extended_link_delimiters => qr/(\[(?:\[[^][]*\]|[^][]*)\])/, +); + +$htmltext = wf ($wikitext, \%tags); +like $htmltext, qr!'Ordinary'>extended link!m, + 'extended links rendered correctly with default delimiters'; +like $htmltext, qr!'http://nowhere\.com'>explicit URI!m, + 'explicit URIs rendered correctly with default delimiters'; +like $htmltext, qr!Usemod%20extended%20link'>Usemod extended link!m, + 'Wiki URIs rendered correctly with default delimiters'; + +# Redefine the delimiters to something different. +%tags = ( + extended_link_delimiters => [qw([ ])], +); + +$htmltext = wf ($wikitext, \%tags); + +unlike $htmltext, qr!'Ordinary'>extended link!m, + 'extended links ignored with overridden delimiters'; +unlike $htmltext, qr!'http://nowhere\.com'>explicit URI!m, + 'explicit URIs ignored with overridden delimiters'; +like $htmltext, qr!Usemod extended link[^\]]!m, + '...and new delimiters recognised'; + +# Make sure we handle empty delimiters +%tags = ( + extended_link_delimiters => '', +); + + +warning_like {$htmltext = wf ($wikitext, \%tags)} + {carped => [map {qr/^Ignoring/} (1..3)]}, + "warn of empty extended_link_delimiters"; + +unlike $htmltext, qr!'Ordinary'>extended link!m, + 'extended links ignored with empty delimiters'; +unlike $htmltext, qr!'http://nowhere\.com'>explicit URI!m, + 'explicit URIs ignored with empty delimiters'; +unlike $htmltext, qr!Usemod extended link[^\]]!m, + 'Wiki URIs ignored with empty delimiters'; diff --git a/t/implicit.t b/t/implicit.t new file mode 100644 index 0000000..343640f --- /dev/null +++ b/t/implicit.t @@ -0,0 +1,29 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 4; +use Test::NoWarnings; + +use Text::MediawikiFormat as => 'wf', prefix => 'rootdir/wiki.pl?page=', + process_html => 0; + +my $wikitext =<!m, + 'should create links from StudlyCaps if implicit_links is left alone'; + +$htmltext = wf ($wikitext, {}, {implicit_links => 0}); +unlike ($htmltext, qr!!m, + '...and if implicit_links set to 0'); + +$htmltext = wf ($wikitext, {}, {implicit_links => 1}); +like ($htmltext, qr!!m, + '...and if implicit_links set to 0'); diff --git a/t/kake.t b/t/kake.t new file mode 100644 index 0000000..c2e491c --- /dev/null +++ b/t/kake.t @@ -0,0 +1,82 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 8; +use Test::NoWarnings; + +use Text::MediawikiFormat as => 'wikiformat', implicit_links => 1, + process_html => 0; + + my $wikitext = " +WikiTest + +code: foo bar baz + +"; + + my %format_tags = ( + blocks => {code => qr/^code: /}, + ); + + my $cooked = wikiformat ($wikitext, \%format_tags); + like $cooked, qr|
    foo bar baz\n
    |, + 'unindented code markers should still work'; + +$wikitext = < qr/^(?:\t+|\s{4,}|\*?(?=\*+))/, + blocks => {unordered => qr/^\s*\*+\s*/}, + nests => {unordered => 1}, +); + +$cooked = wikiformat $wikitext, \%format_tags; + +like $cooked, qr/
  • foo/, 'first level of unordered list'; +like $cooked, qr/|s, + '... and end correctly'; + +$wikitext =<\s* +
  • 1
  • \s* +
  • 2
      \s* +
    • 2\.1
        \s* +
      • 2\.1\.1
      • \s* +
      \s* +
    • \s* +
    \s* +
  • \s* +
  • 3
  • \s* + \s* +
      \s* +
    • 4
        \s* +
      • 4\.1
          \s* +
        • 4\.1\.1
        • \s* +
        • 4\.1\.2
        • \s* +
        \s* +
      • \s* +
      \s* +
    • \s* +
    • 5
    • \s* +
    |sx, + 'nesting should be correct for multiple levels'; +like $htmltext, qr|
  • 4<|s, + 'spaces should work instead of tabs'; +like $htmltext, + qr|
  • 4
      \s*
    • 4.1
        \s*
      • 4.1.1
      • \s*
      • 4.1.2
      • \s*
      + \s*
    • |sx, + 'nesting should be correct for spaces too'; + + +TODO: { + local $TODO = 'Dictionary lists not nesting correctly.'; + +### +### Dictionary Lists +### +$wikitext =< +
      A
      +
      A.a
      +
        +
      1. A.a.1
          +
        1. A.a.1.1
        2. +
        +
      2. +
      3. A.a.2
          +
        • A.a.2.*
        • +
        • A.a.2.*
            +
          1. A.a.2.*.1
          2. +
          +
        • +
        +
      4. +
      +
      A.b
      + +', 'lists nest correctly within dictionary lists'; +}; diff --git a/t/lists-no-indent.t b/t/lists-no-indent.t new file mode 100644 index 0000000..9540d20 --- /dev/null +++ b/t/lists-no-indent.t @@ -0,0 +1,79 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 8; +use Test::NoWarnings; + +use Text::MediawikiFormat as => 'wf', process_html => 0; + +my $wikitext =<This should be a list.!m, + 'unordered lists should render correctly'; +like $htmltext, qr!
    • This should be an ordered list.
    • !m, + '...ordered lists too'; + +# Redefine all the list regexps to what they were to start with. +my %tags = ( + lists => { + ordered => qr/^#\s*/, + unordered => qr/^\*\s*/, + code => qr/^ /, + }, +); + +$htmltext = wf ($wikitext, \%tags); +like $htmltext, qr!
    • This should be a list.
    • !m, + 'unordered should remain okay when we redefine all list regexps'; +like $htmltext, qr!
    • This should be an ordered list.
    • !m, + '...ordered lists too'; + +# Redefine again, set one of them to something different. +%tags = ( + blocks => { + ordered => qr/^#\s*/, + unordered => qr/^!\s*/, + code => qr/^ /, + }, +); + +$htmltext = wf ($wikitext, \%tags); +like $htmltext, qr!
    • But marked differently
    • !m, + 'unordered should still work when redefined'; +like $htmltext, qr!
    • This should be an ordered list.
    • !m, + '...ordered should be unaffected'; + +# Now try it without requiring an indent. +%tags = ( + indent => qr/^\s*/, + blocks => { + ordered => qr/^#\s*/, + unordered => qr/^\*\s*/, + code => qr/^ /, + }, + indented => {unordered => 0}, +); + +$htmltext = wf ($wikitext, \%tags); +like $htmltext, qr!
    • # But not indented!m, + 'redefining a list type to require no indent should work'; diff --git a/t/lists.t b/t/lists.t new file mode 100644 index 0000000..1a37f2a --- /dev/null +++ b/t/lists.t @@ -0,0 +1,65 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 8; +use Test::NoWarnings; + +use_ok 'Text::MediawikiFormat', as => 'wf', process_html => 0 or exit; +ok exists $Text::MediawikiFormat::tags{blockorder}, + 'T:MF should have a blockorder entry in %tags'; + +# isan ARRAY +isa_ok $Text::MediawikiFormat::tags{blockorder}, 'ARRAY', + '...and it should be an array'; + +like join(' ', @{$Text::MediawikiFormat::tags{blockorder}}), + qr/^code/, + '...and code should come before everything'; + +my $wikitext =<first list item!, + 'lists should be able to start on the first line of text'; +like $htmltext, qr!href='Wiki%20Link'!, + 'list item content should be formatted'; + +### +### Dictionary Lists +### +$wikitext =< +
      Term 1
      +
      definition 1.1
      +
      definition 1.2
      +
      Term 2
      +
      definition 2.1
      +
      definition 2.2
      + +
      +
      indented 1
      +
      indented 2
      +
      +', + 'dictionary lists format correctly'; diff --git a/t/merge-hash.t b/t/merge-hash.t new file mode 100644 index 0000000..820e12f --- /dev/null +++ b/t/merge-hash.t @@ -0,0 +1,45 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 9; +use Test::NoWarnings; + +use_ok( 'Text::MediawikiFormat' ) or exit; + +my $full = { foo => { bar => 'baz' } }; +my $empty = {}; +my $nonempty = { foo => { a => 'b' } }; +my $full_flat = { a => 'b' }; +my $empty_flat = {}; +my $zero = { foo => 0, bar => { baz => 0 } }; + +$nonempty = Text::MediawikiFormat::_merge_hashes ($full, $nonempty); +is_deeply $nonempty, {foo => {a => 'b', bar => 'baz'}}, + "merge should work when all keys in from exist in to"; +$full->{foo}->{bar} = 'boo'; +is_deeply $nonempty, {foo => {a => 'b', bar => 'baz'}}, + "merge should copy subhashes"; + +$empty_flat = Text::MediawikiFormat::_merge_hashes ($full_flat, $empty_flat); +is_deeply $empty_flat, $full_flat, + '... in flat case when keys exist in from but not in to'; + +$empty = Text::MediawikiFormat::_merge_hashes ($full, $empty); +is_deeply $empty, $full, + '... in non-flat case when keys exist in but not in to'; + +$empty = {}; +$empty = Text::MediawikiFormat::_merge_hashes ($zero, $empty); +is_deeply $empty, $zero, '...and when value is zero but defined'; + +my $regexer = {a => "regex"}; +my $arrayer = {a => ["X", "Y", "Z"]}; +my $merged; +$merged = Text::MediawikiFormat::_merge_hashes ($regexer, $arrayer); +is_deeply $merged, {a => "regex"}, "regexes should replace arrays"; +$merged = Text::MediawikiFormat::_merge_hashes ($arrayer, $regexer); +is_deeply $merged, {a => ["X", "Y", "Z"]}, "...and vice versa"; diff --git a/t/tag-override-use-as.t b/t/tag-override-use-as.t new file mode 100644 index 0000000..fc1b06b --- /dev/null +++ b/t/tag-override-use-as.t @@ -0,0 +1,44 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 3; +use Test::NoWarnings; + +use Text::MediawikiFormat as => 'wf', process_html => 0; + +my $wikitext =< {unordered => qr/^!\s*/}); + +my $htmltext = wf ($wikitext, \%format_tags); +like ($htmltext, qr!
    • But marked differently
    • !m, + 'redefining a list type works with use as'); + +%format_tags = ( + indent => qr//, + blocks => { + ordered => qr/^#\s*/, + unordered => qr/^\*\s*/ + }, + indented => {unordered => 0}, +); + +$htmltext = wf ($wikitext, \%format_tags); +like ($htmltext, qr!
    • \* But not indented!m, + 'redefining a list type to require no indent works with use as'); diff --git a/t/tag-override.t b/t/tag-override.t new file mode 100644 index 0000000..a15581f --- /dev/null +++ b/t/tag-override.t @@ -0,0 +1,101 @@ +#!perl + +BEGIN { chdir 't' if -d 't' } + +use strict; +use warnings; + +use Test::More tests => 14; +use Test::NoWarnings; + +use Text::MediawikiFormat as => 'wf', process_html => 0; + +my $wikitext =<This should be a list.
    • !m, + 'unordered lists should be rendered correctly'); +like ($htmltext, qr!
    • This should be an ordered list.
    • !m, + '...and ordered lists too'); + +# Redefine all the list regexps to what they were to start with. +my %tags = ( + blocks => { + ordered => qr/^#\s*/, + unordered => qr/^\*\s*/, + code => qr/^ /, + }, +); + +$htmltext = wf ($wikitext, \%tags); +like ($htmltext, qr!
    • This should be a list.
    • !m, + 'unordered should remain okay when we redefine all list regexps'); +like ($htmltext, qr!
    • This should be an ordered list.
    • !m, + '... and so should ordered'); + +# Redefine again, set one of them to something different. +%tags = ( + blocks => { + ordered => qr/^#\s*/, + unordered => qr/^!\s*/, + code => qr/^ /, + }, +); + +$htmltext = wf ($wikitext, \%tags); +like ($htmltext, qr!
    • But marked differently
    • !m, + 'unordered should still work when redefined'); +like ($htmltext, qr!
    • This should be an ordered list.
    • !m, + '...and ordered should be unaffected'); + +# Now try redefining just one list type. +%tags = ( + blocks => {unordered => qr/^!\s*/}, +); + +$htmltext = wf ($wikitext, \%tags); +like ($htmltext, qr!
    • This is like the default unordered list
    • !m, + 'redefining just one list type should work for that type'); +like ($htmltext, qr!
    • This should be an ordered list.
    • !m, + '...and should not affect other types too'); + +# now test overriding strong and emphasized tags +# don't use // to mark emphasized tags unless you /like/ this lookbehind +%tags = ( + strong_tag => qr/\*(.+?)\*/, + emphasized_tag => qr|(?strong!, '... overriding strong tag' ); +like( $htmltext, qr!emphasized!, '... overriding emphasized tag' ); +like( $htmltext, qr!em.+ng!, + '... and both at once' ); + +# Test redefining just one list type after using import with a list definition. +package Bar; +Text::MediawikiFormat->import( + as => 'wf', + blocks => { + unordered => qr/^!\s*/ + }, + process_html => 0, +); + +$htmltext = wf ("!1. Ordered list\n! Unordered list", + {blocks => {ordered => qr/^\s*!([\d]+)\.\s*/}}, {}); +::like ($htmltext, qr!
    • Ordered list
    • !m, + 'redefining a single list type after import should work for that type'); +::like ($htmltext, qr!
    • Unordered list
    • !m, + '...and also for a different type defined on import');