JTextile 1.0.2


001 /*

002 

003 This is Textile

004 A Humane Web Text Generator

005 

006 

007 Original PHP Version

008 Version 1.0

009 21 Feb, 2003

010 

011 Copyright (c) 2003, Dean Allen, www.textism.com

012 All rights reserved.

013 

014 This java version

015 Gareth Simpson April 2003

016 

017 _______

018 LICENSE

019 

020 Redistribution and use in source and binary forms, with or without 

021 modification, are permitted provided that the following conditions are met:

022 

023 * Redistributions of source code must retain the above copyright notice, 

024   this list of conditions and the following disclaimer.

025 

026 * Redistributions in binary form must reproduce the above copyright notice,

027   this list of conditions and the following disclaimer in the documentation

028   and/or other materials provided with the distribution.

029 

030 * Neither the name Textile nor the names of its contributors may be used to

031   endorse or promote products derived from this software without specific

032   prior written permission.

033 

034 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

035 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

036 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

037 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

038 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

039 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

040 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

041 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

042 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

043 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

044 POSSIBILITY OF SUCH DAMAGE.

045 

046 _____________

047 USING TEXTILE

048 

049 Block modifier syntax:

050 

051 Header: hn. 

052 Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags.

053 Example: <h1>Text</h1>

054 

055 Header with CSS class: hn(class).

056 Paragraphs beginning with 'hn(class). ' receive a CSS class attribute. 

057 Example: <h1 class="class">Text</h1>

058 

059 Paragraph: p. (applied by default)

060 Paragraphs beginning with 'p. ' are wrapped in paragraph tags.

061 Example: <p>Text</p>

062 

063 Paragraph with CSS class: p(class).

064 Paragraphs beginning with 'p(class). ' receive a CSS class attribute. 

065 Example: <p class="class">Text</p>

066 

067 Blockquote: bq.

068 Paragraphs beginning with 'bq. ' are wrapped in block quote tags.

069 Example: <blockquote>Text</blockquote>

070 

071 Blockquote with citation: bq(citeurl).

072 Paragraphs beginning with 'bq(citeurl). ' recieve a citation attribute. 

073 Example: <blockquote cite="citeurl">Text</blockquote>

074 

075 Numeric list: #

076 Consecutive paragraphs beginning with # are wrapped in ordered list tags.

077 Example: <ol><li>ordered list</li></ol>

078 

079 Bulleted list: *

080 Consecutive paragraphs beginning with * are wrapped in unordered list tags.

081 Example: <ul><li>unordered list</li></ul>

082 

083 

084 Phrase modifier syntax:

085 

086 _emphasis_             <em>emphasis</em>

087 __italic__             <i>italic</i>

088 *strong*               <strong>strong</strong>

089 **bold**               <b>bold</b>

090 ??citation??           <cite>citation</cite>

091 -deleted text-         <del>deleted</del>

092 +inserted text+        <ins>inserted</ins>

093 ^superscript^          <sup>superscript</sup>

094 ~subscript~            <sub>subscript</sub>

095 @code@                 <code>computer code</code>

096 

097 ==notextile==          leave text alone (do not format)

098 

099 "linktext":url         <a href="url">linktext</a>

100 "linktext(title)":url  <a href="url" title="title">linktext</a>

101 

102 !imageurl!             <img src="imageurl">

103 !imageurl(alt text)!   <img src="imageurl" alt="alt text" />

104 !imageurl!:linkurl     <a href="linkurl"><img src="imageurl" /></a>

105 

106 ABC(Always Be Closing) <acronym title="Always Be Closing">ABC</acronym>

107 

108 */

109 

110 import gnu.regexp.*;

111 

112 import java.util.StringTokenizer;

113 import java.util.ArrayList;

114 

115 public class JTextile 

116 {

117   private static final int ENT_COMPAT = 0;

118   private static final int ENT_NOQUOTES = 2;

119   private static final int ENT_QUOTES = 3;

120 

121   

122   

123   public JTextile() 

124   {

125   } 

126 

127   

128   public static String textile(String text) throws Exception

129   {

130     //$text = stripslashes($text);

131       

132     //# turn any incoming ampersands into a dummy character for now.

133     //#  This uses a negative lookahead for alphanumerics followed by a semicolon,

134     //#  implying an incoming html entity, to be skipped 

135     text = preg_replace("&(?![#a-zA-Z0-9]+;)","x%x%",text);

136     

137     //# unentify angle brackets and ampersands

138     text = replace(text,"&gt;", ">");

139     text = replace(text,"&lt;", "<");

140     text = replace(text,"&amp;", "&");

141     

142     

143     //# zap carriage returns

144     text = replace(text,"\r\n", "\n");

145   

146     //# zap tabs

147     text = replace(text,"\t", "" );

148     

149     //  trim each line

150     StringBuffer splitBuffer = new StringBuffer();

151     StringTokenizer tokenizer = new StringTokenizer(text,"\n",true);

152     while(tokenizer.hasMoreTokens())

153     {

154       splitBuffer.append(tokenizer.nextToken().trim());

155       splitBuffer.append("\n");

156     }

157     

158     text = splitBuffer.toString();

159     

160 

161     //### Find and replace quick tags

162   

163     //# double equal signs mean <notextile>

164     text = preg_replace("(^|\\s)==(.*?)==([^\\w]{0,2})","$1<notextile>$2</notextile>$3$4",text);

165     

166     //# image qtag

167     text = preg_replace("!([^\\s\\(=]+?)\\s?(\\(([^\\)]+?)\\))?!","<img src=\"$1\" alt=\"$3\" />",text);

168       

169     //# image with hyperlink

170     text = preg_replace("(<img.+ \\/>):(\\S+)","<a href=\"$2\">$1</a>",text);

171 

172     //# hyperlink qtag

173     text = preg_replace("\"([^\"\\(]+)\\s?(\\(([^\\)]+)\\))?\":(\\S+?)([^\\w\\s\\/;]|[1-9]*?)(\\s|$)","<a href=\"$4\" title=\"$3\">$1</a>$5$6",text);

174   

175     //# arrange qtag delineators and replacements in an array

176     String[] srcTags = {"\\*\\*","\\*","\\?\\?","-","\\+","~","@"};

177     String[] replaceTags = {"b","strong","cite","del","ins","sub","code"};

178   

179     //# loop through the array, replacing qtags with html

180     for(int i = 0; i < srcTags.length; i++)

181     {

182       text = preg_replace("(^|\\s|>)" + srcTags[i] + "\\b(.+?)\\b([^\\w\\s]*?)" + srcTags[i] + "([^\\w\\s]{0,2})","$1<" + replaceTags[i] + ">$2$3</" + replaceTags[i] + ">$4",text);

183     }

184     

185     //# some weird bs with underscores and \b word boundaries, 

186     //#  so we'll do those on their own

187     

188     text = preg_replace("(^|\\s)__(.*?)__([^\\w\\s]{0,2})","$1<i>$2</i>$3",text);   

189     

190     text = preg_replace("(^|\\s)_(.*?)_([^\\w\\s]{0,2})","$1<em>$2</em>$3",text); 

191     

192     text = preg_replace("\\^(.*?)\\^","$1<sup>$2</sup>$3",text);

193 

194     // ### Find and replace typographic chars and special tags

195   

196     //# small problem with double quotes at the end of a string

197     

198     text = preg_replace("\"$","\" ",text);

199     

200     //# NB: all these will wreak havoc inside <html> tags

201       

202     String[] glyph_search = {

203       "([^\\s[{<])?\\'([dmst]\\b|ll\\b|ve\\b|\\s|$)",  // single closing

204       "\\'", // single opening

205       "([^\\s[{])?\"(\\s|$)", // # double closing

206       "\"(\\s|$)", // double opening

207       "\\b( )?\\.{3}", // # ellipsis

208       "\\b([A-Z][A-Z0-9]{2,})\\b(\\(([^\\)]+)\\))", // # 3+ uppercase acronym

209       "(^|[^\"][>\\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)", // # 3+ uppercase caps

210       "\\s?--\\s?", // # em dash

211       "\\s-\\s", // # en dash

212       "(\\d+)-(\\d+)", // # en dash

213       "(\\d+) ?x ?(\\d+)", //# dimension sign

214       "\\b ?(\\((tm|TM)\\))", // trademark

215       "\\b ?(\\([rR]\\))", // # registered

216       "\\b ?(\\([cC]\\))" // # registered     

217     };

218       

219       

220     String[] glyph_replace = {     

221       "$1&#8217;$2",              //# single closing

222       "&#8216;",                //# single opening

223       "$1&#8221;$2",              //# double closing

224       "&#8220;",                //# double opening

225       "$1&#8230;",              //# ellipsis

226       "<acronym title=\"$2\">$1</acronym>", //# 3+ uppercase acronym

227       "$1<span class=\"caps\">$2</span>$3", //# 3+ uppercase caps

228       "$1$2$3", //# 3+ uppercase caps

229       "&#8212;",                //# em dash

230       " &#8211; ",              //# en dash

231       "$1&#8211;$2",              //# en dash

232       "$1&#215;$2",             //# dimension sign

233       "&#8482;",                //# trademark

234       "&#174;",               //# registered

235       "&#169;"                //# copyright

236     };

237         

238     

239 

240     

241     //    # set toggle for turning off replacements between <code> or <pre>

242     boolean codepre = false;

243   

244     //# if there is no html, do a simple search and replace

245     

246     if(!preg_match("<.[^<]*>",text))

247     {

248       text = preg_replace(glyph_search,glyph_replace,text);

249     }

250     else 

251     {

252       

253       StringBuffer out = new StringBuffer();

254         //# else split the text into an array at <.*>

255       //$text = preg_split("/(<.*>)/U",$text,-1,PREG_SPLIT_DELIM_CAPTURE);

256       String[] textSplit = preg_split("<.[^<]*>",text);

257       for(int i = 0; i < textSplit.length; i++)

258       {

259           

260           //  # matches are off if we're between <code>, <pre> etc. 

261           if(preg_match("<(code|pre|kbd|notextile)>",textSplit[i].toLowerCase()))

262           {

263             codepre = true; 

264           }

265           else if(preg_match("</(code|pre|kbd|notextile)>",textSplit[i].toLowerCase()))

266           {

267             codepre = false; 

268           }

269           

270           if(!preg_match("<.[^<]*?>",textSplit[i]) && codepre == false)

271           {

272             textSplit[i] = preg_replace(glyph_search,glyph_replace,textSplit[i]);

273           }

274   

275           //# convert htmlspecial if between <code>

276           if (codepre == true){

277             textSplit[i] = htmlspecialchars(textSplit[i],ENT_NOQUOTES);

278             textSplit[i] = replace(textSplit[i],"&lt;pre&gt;","<pre>");

279             textSplit[i] = replace(textSplit[i],"&lt;code&gt;","<code>");

280             textSplit[i] = replace(textSplit[i],"&lt;notextile&gt;","<notextile>");

281           }

282   

283           //# each line gets pushed to a new array

284         out.append( textSplit[i]);

285       }

286         

287       text = out.toString();

288   

289       

290     }

291 

292   //### Block level formatting

293   

294     //# deal with forced breaks; this is going to be a problem between

295     //#  <pre> tags, but we'll clean them later

296 

297 

298     //////!!! not working 

299     //text = preg_replace("(\\S)(_*)([[:punct:]]*) *\n([^#*\\s])", "$1$2$3<br />$4", text);

300     //text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);

301     text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);

302   

303     //# might be a problem with lists

304     text = replace(text,"l><br />", "l>\n");

305   

306     boolean pre = false;

307 

308     String[] block_find = {

309       "^\\s?\\*\\s(.*)",            //# bulleted list *

310       "^\\s?#\\s(.*)",            //# numeric list #

311       "^bq\\. (.*)",              //# blockquote bq.

312       "^h(\\d)\\(([\\w]+)\\)\\.\\s(.*)",  //# header hn(class).  w/ css class

313       "^h(\\d)\\. (.*)",            //# plain header hn.

314       "^p\\(([[:alnum:]]+)\\)\\.\\s(.*)",   //# para p(class).  w/ css class

315       "^p\\. (.*)",             //# plain paragraph

316       "^([^\\t ]+.*)"           //# remaining plain paragraph

317       };

318     

319     String[] block_replace = {

320       "\t<liu>$1</liu>$2",

321       "\t<lio>$1</lio>$2",

322       "\t<blockquote>$1</blockquote>$2",

323       "\t<h$1 class=\"$2\">$3</h$1>$4",

324       "\t<h$1>$2</h$1>$3",

325       "\t<p class=\"$1\">$2</p>$3",

326       "\t<p>$1</p>",

327       "\t<p>$1</p>$2"

328       };

329   

330 

331     StringBuffer blockBuffer = new StringBuffer();

332     

333     String list = "";

334     

335     //  This done to ensure that lists close after themselves

336     text += " \n";

337 

338 

339     //# split the text into an array by newlines

340     StringTokenizer blockTokenizer = new StringTokenizer(text,"\n",false);

341     

342     while(blockTokenizer.hasMoreTokens())

343     {

344       String line = blockTokenizer.nextToken() ; 

345       

346       //#make sure the line isn't blank

347       if (!preg_match("^$",line)) 

348       {

349   

350           //# matches are off if we're between <pre> or <code> tags 

351         if(line.toLowerCase().indexOf("<pre>") > -1)

352         { 

353           pre = true; 

354         }

355   

356         //# deal with block replacements first, then see if we're in a list

357         if (!pre)

358         {

359           line = preg_replace(block_find,block_replace,line);

360         }

361   

362         //# kill any br tags that slipped in earlier

363         if (pre == true)

364         {

365           line = replace(line,"<br />","\n");

366         } 

367                 

368           //# matches back on after </pre> 

369         if(line.toLowerCase().indexOf("</pre>") > -1)

370         { 

371           //System.out.println("endpre");

372           pre = false; 

373         }

374   

375         //# at the beginning of a list, $line switches to a value

376         if (list.length() == 0 && preg_match("\\t<li",line))

377         {

378           line = preg_replace("^(\\t<li)(o|u)","\n<$2l>\n$1$2",line);

379           list = line.substring(2,3);

380             

381         //# at the end of a list, $line switches to empty

382         } 

383         else if (list.length() > 0 && !preg_match("\\t<li" + list,line))

384         {

385           line = preg_replace("^(.*)$","</" + list + "l>\n$1",line); 

386           list = "";

387         }

388       }

389       // push each line to a new array once it's processed

390       blockBuffer.append(line);

391       blockBuffer.append("\n");

392   

393     }

394     text = blockBuffer.toString();

395 

396     

397     //#clean up <notextile>

398     text = preg_replace("<\\/?notextile>", "",text);  

399     

400     //# clean up liu and lio

401     text = preg_replace("<(\\/?)li(u|o)>", "<$1li>",text);

402   

403     //# turn the temp char back to an ampersand entity

404     text = replace(text,"x%x%","&#38;");

405     

406     //# Newline linebreaks, just for markup tidiness

407     text = replace(text,"<br />","<br />\n");   

408   

409     return text;

410   } 

411   

412   

413   

414   /**

415    * Does just that.

416    * 

417    * @param source      The string to start with

418    * @param searchFor   The string we are looking for

419    * @param replaceWith The replacement

420    * 

421    * @return  The reformatted string

422    * 

423    */

424   private static String replace ( String source , String searchFor , String replaceWith )

425   {

426     if (source == null || "".equals(source)) {

427         return source;

428     }

429 

430     if (replaceWith == null) {

431         return source;

432     }

433 

434     if ("".equals(searchFor)) {

435         return source;

436     }

437 

438     int s = 0;

439     int e = 0;

440     StringBuffer result = new StringBuffer();

441 

442     while ((e = source.indexOf(searchFor, s)) >= 0) 

443     {

444         result.append(source.substring(s, e));

445         result.append(replaceWith);

446         s = e + searchFor.length();

447     }

448     result.append(source.substring(s));

449     return result.toString();

450 

451   }

452   

453   private static String htmlspecialchars(String text, int mode)

454   {

455       text = replace(text,"&", "&amp;");

456       if (mode != ENT_NOQUOTES)

457           text = replace(text,"\"", "&quot;");

458       if (mode == ENT_QUOTES)

459           text = replace(text,"'", "&#039;");

460       text = replace(text,"<", "&lt;");

461       text = replace(text,">", "&gt;");

462       return text ;

463   }

464     

465   private static String preg_replace(String pattern,String replace,String text) throws Exception

466   {

467     gnu.regexp.RE r = new gnu.regexp.RE(pattern);

468     return r.substituteAll(text,replace);

469   }

470 

471   private static String preg_replace(String[] pattern,String[] replace,String text) throws Exception

472   {

473     for(int i = 0; i < pattern.length; i++)

474     {

475       text = preg_replace(pattern[i],replace[i],text);

476     }

477     return text;

478   }

479   

480   private static boolean preg_match(String pattern,String text) throws Exception

481   {

482     gnu.regexp.RE r = new gnu.regexp.RE(pattern);

483     return r.getMatch(text) != null;   

484   }

485   

486   private static String[] preg_split(String pattern,String text) throws Exception

487   {

488     

489     int startAt = 0;

490     ArrayList tempList = new ArrayList();

491     

492     gnu.regexp.RE r = new gnu.regexp.RE(pattern);

493     

494     gnu.regexp.REMatch match = r.getMatch(text);

495     

496     while(match != null)

497     {                  

498       String beforeMatch = text.substring(startAt,match.getStartIndex());      

499       tempList.add(beforeMatch);

500       tempList.add(match.toString());           

501       startAt = match.getEndIndex();      

502       match = r.getMatch(text,startAt);

503     }

504     

505     tempList.add(text.substring(startAt));

506         

507     //  copy out our templist to an array of strings which is what we return

508     String[] ret = new String[tempList.size()];

509     

510     for(int i = 0; i < ret.length; i++)

511     {

512       ret[i] = (String)tempList.get(i);

513     }

514        

515     return ret;

516     

517   }

518 

519 }
Java2html