1 #!/usr/bin/perl 2 3 # tw-parsewiki - Translate a subset of the UseMod Wiki dialect 4 # into either HTML or XHTML. 5 # 6 # This is a stripped-down an modified version of parsewiki(1), 7 # a wiki syntax parser written by Jaime Villate . 8 # 9 # Significant portions of the original parsewiki(1) are in turn 10 # based on UseMod Wiki , 11 # version 0.92 (April 21, 2001) by Clifford A. Adams . 12 13 sub Copying 14 { 15 print <<'EndTerms'; 16 Copyright (C) 2009,2010,2011 Carlo Strozzi 17 18 This program is free software; you can redistribute it and/or 19 modify it under the terms of the GNU General Public License 20 as published by the Free Software Foundation; version 2 dated 21 June, 1991. 22 23 This program is distributed in the hope that it will be useful, 24 but WITHOUT ANY WARRANTY; without even the implied warranty of 25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 26 GNU General Public License for more details. 27 28 You should have received a copy of the GNU General Public License 29 along with this program; if not, write to the Free Software 30 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 31 32 EndTerms 33 exit; 34 } 35 sub Usage 36 { 37 print STDERR <<"EndUsage"; 38 Usage: $0 [OPTION]... [FILE] 39 40 Options: 41 -f, --format=FORMAT Output format; one of html or xhtml (default html). 42 -c, --copyright Display copyright and copying permission statement. 43 -h, --help Show this usage summary. 44 45 FILE is a simple text file with wiki formating syntax. The result will be 46 sent to the Standard Output. If FILE is not given, input will be taken from 47 the Standard Input. 48 49 Examples: 50 $0 myfile.wiki 51 cat file.txt | $0 -fhtml >file.html 52 53 Report bugs to . 54 EndUsage 55 exit 1; 56 } 57 58 use strict; 59 use vars qw($SaveUrlIndex $UrlProtocols $UrlPattern $UrlProtocols2 60 $UrlPattern2 $ImageExtensions $FS $FreeLinkPattern 61 $IndentLimit $Format $LT $GT $BL $BR @Tag %SaveUrl 62 %OpenTag %CloseTag %OpenItem %CloseItem $LF $TableMode 63 $TableSyntax $RawHtml); 64 65 # Configuration variables and Default options 66 $Format = 'html'; 67 $TableSyntax = 1; # 1 = wiki syntax tables, 0 = no table syntax 68 $RawHtml = 0; # 1 = allow tag, 0 = no raw HTML in pages 69 70 %SaveUrl = (); 71 $SaveUrlIndex = 0; 72 73 # See UseMod Wiki @HtmlPairs() for more allowed block tags -- CS 74 @Tag = qw(ul ol dl pre em strong del code 75 img a p dt small sub sup table tr td center left right); 76 77 %OpenItem = qw(ol
  • ul
  • dl
    table ); 78 %CloseItem = qw(ol
  • ul dl table ); 79 80 my $file = &GetOpts(); # Process command line options 81 82 # Set up output format 83 $_ = $Format; 84 FORMAT: 85 { 86 # Warning: plain html mode is still largely untested -- Carlo 87 if (/^html$/i) { &SetUpHTML; last FORMAT} 88 if (/^xhtml$/i) { &SetUpXHTML; last FORMAT} 89 die "$0: Unknown format \"$Format\"\n\n"; 90 } 91 92 # Get input file 93 open (IN,"<$file") or die "$0: Cannot read file $file\n\n" ; 94 undef $/; 95 my $page = ; 96 close IN; 97 98 &InitLinkPatterns(); 99 100 # Hide CPIs to the parser, or free links may get screwed up -- CS 101 $page =~ s/\(:/\001/g; 102 $page =~ s/:\)/\002/g; 103 104 $page =~ s/([^\\])\\\\\r?\n/$1$FS$FS/go; # Encode paragraph breaks 105 $page =~ s/([^\\])\\\r?\n/$1 /g; # Join (text) lines ending in backslash 106 $page = &QuoteHtml($page); 107 $page = &CommonMarkup($page, 1, 0); # Multi-line markup 108 $page = &WikiLinesToHtml($page); # Line-oriented markup 109 110 $page =~ s/$FS$FS/\\\\$LF/g; # Decode \\ which are not paragraph breaks 111 $page =~ s/$FS(\d+)$FS/$SaveUrl{$1}/ge; # Restore saved text 112 $page =~ s/$FS(\d+)$FS/$SaveUrl{$1}/ge; # Restore nested saved text 113 114 # Add CSS class to table elements. 115 $page =~ s/(<(table|t(d|r)))([^>]*>)/$1 class='tw-wikitable'$4/g; 116 117 # Remove possible spurious whitespace before closing tags. 118 $page =~ s/\s+(<\/)/$1/g; 119 120 # Remove possible spurious section headings in output body -- CS 121 $page =~ s/\s*<\/h\d>//ige; 122 123 # Handle target="_blank" directives in URLs -- CS 124 $page =~ s/(_)(/