001 /*
002
003 This is Textile
004 A Humane Web Text Generator
005
006
007 Original PHP Version
008 Version 1.0
009 21 Feb, 2003
010
011 Copyright (c) 2003, Dean Allen, www.textism.com
012 All rights reserved.
013
014 This java version
015 Gareth Simpson April 2003
016
017 _______
018 LICENSE
019
020 Redistribution and use in source and binary forms, with or without
021 modification, are permitted provided that the following conditions are met:
022
023 * Redistributions of source code must retain the above copyright notice,
024 this list of conditions and the following disclaimer.
025
026 * Redistributions in binary form must reproduce the above copyright notice,
027 this list of conditions and the following disclaimer in the documentation
028 and/or other materials provided with the distribution.
029
030 * Neither the name Textile nor the names of its contributors may be used to
031 endorse or promote products derived from this software without specific
032 prior written permission.
033
034 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
035 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
036 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
037 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
038 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
039 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
040 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
041 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
042 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
043 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
044 POSSIBILITY OF SUCH DAMAGE.
045
046 _____________
047 USING TEXTILE
048
049 Block modifier syntax:
050
051 Header: hn.
052 Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags.
053 Example: <h1>Text</h1>
054
055 Header with CSS class: hn(class).
056 Paragraphs beginning with 'hn(class). ' receive a CSS class attribute.
057 Example: <h1 class="class">Text</h1>
058
059 Paragraph: p. (applied by default)
060 Paragraphs beginning with 'p. ' are wrapped in paragraph tags.
061 Example: <p>Text</p>
062
063 Paragraph with CSS class: p(class).
064 Paragraphs beginning with 'p(class). ' receive a CSS class attribute.
065 Example: <p class="class">Text</p>
066
067 Blockquote: bq.
068 Paragraphs beginning with 'bq. ' are wrapped in block quote tags.
069 Example: <blockquote>Text</blockquote>
070
071 Blockquote with citation: bq(citeurl).
072 Paragraphs beginning with 'bq(citeurl). ' recieve a citation attribute.
073 Example: <blockquote cite="citeurl">Text</blockquote>
074
075 Numeric list: #
076 Consecutive paragraphs beginning with # are wrapped in ordered list tags.
077 Example: <ol><li>ordered list</li></ol>
078
079 Bulleted list: *
080 Consecutive paragraphs beginning with * are wrapped in unordered list tags.
081 Example: <ul><li>unordered list</li></ul>
082
083
084 Phrase modifier syntax:
085
086 _emphasis_ <em>emphasis</em>
087 __italic__ <i>italic</i>
088 *strong* <strong>strong</strong>
089 **bold** <b>bold</b>
090 ??citation?? <cite>citation</cite>
091 -deleted text- <del>deleted</del>
092 +inserted text+ <ins>inserted</ins>
093 ^superscript^ <sup>superscript</sup>
094 ~subscript~ <sub>subscript</sub>
095 @code@ <code>computer code</code>
096
097 ==notextile== leave text alone (do not format)
098
099 "linktext":url <a href="url">linktext</a>
100 "linktext(title)":url <a href="url" title="title">linktext</a>
101
102 !imageurl! <img src="imageurl">
103 !imageurl(alt text)! <img src="imageurl" alt="alt text" />
104 !imageurl!:linkurl <a href="linkurl"><img src="imageurl" /></a>
105
106 ABC(Always Be Closing) <acronym title="Always Be Closing">ABC</acronym>
107
108 */
109
110 import gnu.regexp.*;
111
112 import java.util.StringTokenizer;
113 import java.util.ArrayList;
114
115 public class JTextile
116 {
117 private static final int ENT_COMPAT = 0;
118 private static final int ENT_NOQUOTES = 2;
119 private static final int ENT_QUOTES = 3;
120
121
122
123 public JTextile()
124 {
125 }
126
127
128 public static String textile(String text) throws Exception
129 {
130 //$text = stripslashes($text);
131
132 //# turn any incoming ampersands into a dummy character for now.
133 //# This uses a negative lookahead for alphanumerics followed by a semicolon,
134 //# implying an incoming html entity, to be skipped
135 text = preg_replace("&(?![#a-zA-Z0-9]+;)","x%x%",text);
136
137 //# unentify angle brackets and ampersands
138 text = replace(text,">", ">");
139 text = replace(text,"<", "<");
140 text = replace(text,"&", "&");
141
142
143 //# zap carriage returns
144 text = replace(text,"\r\n", "\n");
145
146 //# zap tabs
147 text = replace(text,"\t", "" );
148
149 // trim each line
150 StringBuffer splitBuffer = new StringBuffer();
151 StringTokenizer tokenizer = new StringTokenizer(text,"\n",true);
152 while(tokenizer.hasMoreTokens())
153 {
154 splitBuffer.append(tokenizer.nextToken().trim());
155 splitBuffer.append("\n");
156 }
157
158 text = splitBuffer.toString();
159
160
161 //### Find and replace quick tags
162
163 //# double equal signs mean <notextile>
164 text = preg_replace("(^|\\s)==(.*?)==([^\\w]{0,2})","$1<notextile>$2</notextile>$3$4",text);
165
166 //# image qtag
167 text = preg_replace("!([^\\s\\(=]+?)\\s?(\\(([^\\)]+?)\\))?!","<img src=\"$1\" alt=\"$3\" />",text);
168
169 //# image with hyperlink
170 text = preg_replace("(<img.+ \\/>):(\\S+)","<a href=\"$2\">$1</a>",text);
171
172 //# hyperlink qtag
173 text = preg_replace("\"([^\"\\(]+)\\s?(\\(([^\\)]+)\\))?\":(\\S+?)([^\\w\\s\\/;]|[1-9]*?)(\\s|$)","<a href=\"$4\" title=\"$3\">$1</a>$5$6",text);
174
175 //# arrange qtag delineators and replacements in an array
176 String[] srcTags = {"\\*\\*","\\*","\\?\\?","-","\\+","~","@"};
177 String[] replaceTags = {"b","strong","cite","del","ins","sub","code"};
178
179 //# loop through the array, replacing qtags with html
180 for(int i = 0; i < srcTags.length; i++)
181 {
182 text = preg_replace("(^|\\s|>)" + srcTags[i] + "\\b(.+?)\\b([^\\w\\s]*?)" + srcTags[i] + "([^\\w\\s]{0,2})","$1<" + replaceTags[i] + ">$2$3</" + replaceTags[i] + ">$4",text);
183 }
184
185 //# some weird bs with underscores and \b word boundaries,
186 //# so we'll do those on their own
187
188 text = preg_replace("(^|\\s)__(.*?)__([^\\w\\s]{0,2})","$1<i>$2</i>$3",text);
189
190 text = preg_replace("(^|\\s)_(.*?)_([^\\w\\s]{0,2})","$1<em>$2</em>$3",text);
191
192 text = preg_replace("\\^(.*?)\\^","$1<sup>$2</sup>$3",text);
193
194 // ### Find and replace typographic chars and special tags
195
196 //# small problem with double quotes at the end of a string
197
198 text = preg_replace("\"$","\" ",text);
199
200 //# NB: all these will wreak havoc inside <html> tags
201
202 String[] glyph_search = {
203 "([^\\s[{<])?\\'([dmst]\\b|ll\\b|ve\\b|\\s|$)", // single closing
204 "\\'", // single opening
205 "([^\\s[{])?\"(\\s|$)", // # double closing
206 "\"(\\s|$)", // double opening
207 "\\b( )?\\.{3}", // # ellipsis
208 "\\b([A-Z][A-Z0-9]{2,})\\b(\\(([^\\)]+)\\))", // # 3+ uppercase acronym
209 "(^|[^\"][>\\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)", // # 3+ uppercase caps
210 "\\s?--\\s?", // # em dash
211 "\\s-\\s", // # en dash
212 "(\\d+)-(\\d+)", // # en dash
213 "(\\d+) ?x ?(\\d+)", //# dimension sign
214 "\\b ?(\\((tm|TM)\\))", // trademark
215 "\\b ?(\\([rR]\\))", // # registered
216 "\\b ?(\\([cC]\\))" // # registered
217 };
218
219
220 String[] glyph_replace = {
221 "$1’$2", //# single closing
222 "‘", //# single opening
223 "$1”$2", //# double closing
224 "“", //# double opening
225 "$1…", //# ellipsis
226 "<acronym title=\"$2\">$1</acronym>", //# 3+ uppercase acronym
227 "$1<span class=\"caps\">$2</span>$3", //# 3+ uppercase caps
228 "$1$2$3", //# 3+ uppercase caps
229 "—", //# em dash
230 " – ", //# en dash
231 "$1–$2", //# en dash
232 "$1×$2", //# dimension sign
233 "™", //# trademark
234 "®", //# registered
235 "©" //# copyright
236 };
237
238
239
240
241 // # set toggle for turning off replacements between <code> or <pre>
242 boolean codepre = false;
243
244 //# if there is no html, do a simple search and replace
245
246 if(!preg_match("<.[^<]*>",text))
247 {
248 text = preg_replace(glyph_search,glyph_replace,text);
249 }
250 else
251 {
252
253 StringBuffer out = new StringBuffer();
254 //# else split the text into an array at <.*>
255 //$text = preg_split("/(<.*>)/U",$text,-1,PREG_SPLIT_DELIM_CAPTURE);
256 String[] textSplit = preg_split("<.[^<]*>",text);
257 for(int i = 0; i < textSplit.length; i++)
258 {
259
260 // # matches are off if we're between <code>, <pre> etc.
261 if(preg_match("<(code|pre|kbd|notextile)>",textSplit[i].toLowerCase()))
262 {
263 codepre = true;
264 }
265 else if(preg_match("</(code|pre|kbd|notextile)>",textSplit[i].toLowerCase()))
266 {
267 codepre = false;
268 }
269
270 if(!preg_match("<.[^<]*?>",textSplit[i]) && codepre == false)
271 {
272 textSplit[i] = preg_replace(glyph_search,glyph_replace,textSplit[i]);
273 }
274
275 //# convert htmlspecial if between <code>
276 if (codepre == true){
277 textSplit[i] = htmlspecialchars(textSplit[i],ENT_NOQUOTES);
278 textSplit[i] = replace(textSplit[i],"<pre>","<pre>");
279 textSplit[i] = replace(textSplit[i],"<code>","<code>");
280 textSplit[i] = replace(textSplit[i],"<notextile>","<notextile>");
281 }
282
283 //# each line gets pushed to a new array
284 out.append( textSplit[i]);
285 }
286
287 text = out.toString();
288
289
290 }
291
292 //### Block level formatting
293
294 //# deal with forced breaks; this is going to be a problem between
295 //# <pre> tags, but we'll clean them later
296
297
298 //////!!! not working
299 //text = preg_replace("(\\S)(_*)([[:punct:]]*) *\n([^#*\\s])", "$1$2$3<br />$4", text);
300 //text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
301 text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
302
303 //# might be a problem with lists
304 text = replace(text,"l><br />", "l>\n");
305
306 boolean pre = false;
307
308 String[] block_find = {
309 "^\\s?\\*\\s(.*)", //# bulleted list *
310 "^\\s?#\\s(.*)", //# numeric list #
311 "^bq\\. (.*)", //# blockquote bq.
312 "^h(\\d)\\(([\\w]+)\\)\\.\\s(.*)", //# header hn(class). w/ css class
313 "^h(\\d)\\. (.*)", //# plain header hn.
314 "^p\\(([[:alnum:]]+)\\)\\.\\s(.*)", //# para p(class). w/ css class
315 "^p\\. (.*)", //# plain paragraph
316 "^([^\\t ]+.*)" //# remaining plain paragraph
317 };
318
319 String[] block_replace = {
320 "\t<liu>$1</liu>$2",
321 "\t<lio>$1</lio>$2",
322 "\t<blockquote>$1</blockquote>$2",
323 "\t<h$1 class=\"$2\">$3</h$1>$4",
324 "\t<h$1>$2</h$1>$3",
325 "\t<p class=\"$1\">$2</p>$3",
326 "\t<p>$1</p>",
327 "\t<p>$1</p>$2"
328 };
329
330
331 StringBuffer blockBuffer = new StringBuffer();
332
333 String list = "";
334
335 // This done to ensure that lists close after themselves
336 text += " \n";
337
338
339 //# split the text into an array by newlines
340 StringTokenizer blockTokenizer = new StringTokenizer(text,"\n",false);
341
342 while(blockTokenizer.hasMoreTokens())
343 {
344 String line = blockTokenizer.nextToken() ;
345
346 //#make sure the line isn't blank
347 if (!preg_match("^$",line))
348 {
349
350 //# matches are off if we're between <pre> or <code> tags
351 if(line.toLowerCase().indexOf("<pre>") > -1)
352 {
353 pre = true;
354 }
355
356 //# deal with block replacements first, then see if we're in a list
357 if (!pre)
358 {
359 line = preg_replace(block_find,block_replace,line);
360 }
361
362 //# kill any br tags that slipped in earlier
363 if (pre == true)
364 {
365 line = replace(line,"<br />","\n");
366 }
367
368 //# matches back on after </pre>
369 if(line.toLowerCase().indexOf("</pre>") > -1)
370 {
371 //System.out.println("endpre");
372 pre = false;
373 }
374
375 //# at the beginning of a list, $line switches to a value
376 if (list.length() == 0 && preg_match("\\t<li",line))
377 {
378 line = preg_replace("^(\\t<li)(o|u)","\n<$2l>\n$1$2",line);
379 list = line.substring(2,3);
380
381 //# at the end of a list, $line switches to empty
382 }
383 else if (list.length() > 0 && !preg_match("\\t<li" + list,line))
384 {
385 line = preg_replace("^(.*)$","</" + list + "l>\n$1",line);
386 list = "";
387 }
388 }
389 // push each line to a new array once it's processed
390 blockBuffer.append(line);
391 blockBuffer.append("\n");
392
393 }
394 text = blockBuffer.toString();
395
396
397 //#clean up <notextile>
398 text = preg_replace("<\\/?notextile>", "",text);
399
400 //# clean up liu and lio
401 text = preg_replace("<(\\/?)li(u|o)>", "<$1li>",text);
402
403 //# turn the temp char back to an ampersand entity
404 text = replace(text,"x%x%","&");
405
406 //# Newline linebreaks, just for markup tidiness
407 text = replace(text,"<br />","<br />\n");
408
409 return text;
410 }
411
412
413
414 /**
415 * Does just that.
416 *
417 * @param source The string to start with
418 * @param searchFor The string we are looking for
419 * @param replaceWith The replacement
420 *
421 * @return The reformatted string
422 *
423 */
424 private static String replace ( String source , String searchFor , String replaceWith )
425 {
426 if (source == null || "".equals(source)) {
427 return source;
428 }
429
430 if (replaceWith == null) {
431 return source;
432 }
433
434 if ("".equals(searchFor)) {
435 return source;
436 }
437
438 int s = 0;
439 int e = 0;
440 StringBuffer result = new StringBuffer();
441
442 while ((e = source.indexOf(searchFor, s)) >= 0)
443 {
444 result.append(source.substring(s, e));
445 result.append(replaceWith);
446 s = e + searchFor.length();
447 }
448 result.append(source.substring(s));
449 return result.toString();
450
451 }
452
453 private static String htmlspecialchars(String text, int mode)
454 {
455 text = replace(text,"&", "&");
456 if (mode != ENT_NOQUOTES)
457 text = replace(text,"\"", """);
458 if (mode == ENT_QUOTES)
459 text = replace(text,"'", "'");
460 text = replace(text,"<", "<");
461 text = replace(text,">", ">");
462 return text ;
463 }
464
465 private static String preg_replace(String pattern,String replace,String text) throws Exception
466 {
467 gnu.regexp.RE r = new gnu.regexp.RE(pattern);
468 return r.substituteAll(text,replace);
469 }
470
471 private static String preg_replace(String[] pattern,String[] replace,String text) throws Exception
472 {
473 for(int i = 0; i < pattern.length; i++)
474 {
475 text = preg_replace(pattern[i],replace[i],text);
476 }
477 return text;
478 }
479
480 private static boolean preg_match(String pattern,String text) throws Exception
481 {
482 gnu.regexp.RE r = new gnu.regexp.RE(pattern);
483 return r.getMatch(text) != null;
484 }
485
486 private static String[] preg_split(String pattern,String text) throws Exception
487 {
488
489 int startAt = 0;
490 ArrayList tempList = new ArrayList();
491
492 gnu.regexp.RE r = new gnu.regexp.RE(pattern);
493
494 gnu.regexp.REMatch match = r.getMatch(text);
495
496 while(match != null)
497 {
498 String beforeMatch = text.substring(startAt,match.getStartIndex());
499 tempList.add(beforeMatch);
500 tempList.add(match.toString());
501 startAt = match.getEndIndex();
502 match = r.getMatch(text,startAt);
503 }
504
505 tempList.add(text.substring(startAt));
506
507 // copy out our templist to an array of strings which is what we return
508 String[] ret = new String[tempList.size()];
509
510 for(int i = 0; i < ret.length; i++)
511 {
512 ret[i] = (String)tempList.get(i);
513 }
514
515 return ret;
516
517 }
518
519 }
|