|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Object | +--org.w3c.tidy.Tidy
HTML parser and pretty printer
(c) 1998-2000 (W3C) MIT, INRIA, Keio University See Tidy.java for the copyright notice. Derived from HTML Tidy Release 4 Aug 2000
Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts Institute of Technology, Institut National de Recherche en Informatique et en Automatique, Keio University). All Rights Reserved.
Contributing Author(s):
Dave Raggett
Andy Quick (translation to Java)
The contributing author(s) would like to thank all those who helped with testing, bug fixes, and patience. This wouldn't have been possible without all of you.
COPYRIGHT NOTICE:
This software and documentation is provided "as is," and
the copyright holders and contributing author(s) make no
representations or warranties, express or implied, including
but not limited to, warranties of merchantability or fitness
for any particular purpose or that the use of the software or
documentation will not infringe any third party patents,
copyrights, trademarks or other rights.
The copyright holders and contributing author(s) will not be liable for any direct, indirect, special or consequential damages arising out of any use of the software or documentation, even if advised of the possibility of such damage.
Permission is hereby granted to use, copy, modify, and distribute this source code, or portions hereof, documentation and executables, for any purpose, without fee, subject to the following restrictions:
The copyright holders and contributing author(s) specifically permit, without fee, and encourage the use of this source code as a component for supporting the Hypertext Markup Language in commercial products. If you use this source code in a product, acknowledgment is not required but would be appreciated.
| Constructor Summary | |
Tidy()
|
|
| Method Summary | |
static Document |
createEmptyDocument()
Creates an empty DOM Document. |
java.lang.String |
getAltText()
|
boolean |
getBreakBeforeBR()
|
boolean |
getBurstSlides()
|
int |
getCharEncoding()
|
Configuration |
getConfiguration()
|
java.lang.String |
getDocType()
|
boolean |
getDropEmptyParas()
|
boolean |
getDropFontTags()
|
boolean |
getEmacs()
|
boolean |
getEncloseBlockText()
|
boolean |
getEncloseText()
|
java.lang.String |
getErrfile()
|
java.io.PrintWriter |
getErrout()
Errout - the error output stream |
boolean |
getFixBackslash()
|
boolean |
getFixComments()
|
boolean |
getHideEndTags()
|
boolean |
getIndentAttributes()
|
boolean |
getIndentContent()
|
java.lang.String |
getInputStreamName()
|
boolean |
getKeepFileTimes()
|
boolean |
getLiteralAttribs()
|
boolean |
getLogicalEmphasis()
|
boolean |
getMakeClean()
|
boolean |
getNumEntities()
|
boolean |
getOnlyErrors()
|
int |
getParseErrors()
ParseErrors - the number of errors that occurred in the most recent parse operation |
int |
getParseWarnings()
ParseWarnings - the number of warnings that occurred in the most recent parse operation |
boolean |
getQuiet()
|
boolean |
getQuoteAmpersand()
|
boolean |
getQuoteMarks()
|
boolean |
getQuoteNbsp()
|
boolean |
getRawOut()
|
boolean |
getShowWarnings()
|
java.lang.String |
getSlidestyle()
|
boolean |
getSmartIndent()
|
int |
getSpaces()
|
java.io.PrintWriter |
getStderr()
|
int |
getTabsize()
|
boolean |
getTidyMark()
|
boolean |
getUpperCaseAttrs()
|
boolean |
getUpperCaseTags()
|
boolean |
getWord2000()
|
boolean |
getWrapAsp()
|
boolean |
getWrapAttVals()
|
boolean |
getWrapJste()
|
int |
getWraplen()
|
boolean |
getWrapPhp()
|
boolean |
getWrapScriptlets()
|
boolean |
getWrapSection()
|
boolean |
getWriteback()
|
boolean |
getXHTML()
|
boolean |
getXmlOut()
|
boolean |
getXmlPi()
|
boolean |
getXmlPIs()
|
boolean |
getXmlSpace()
|
boolean |
getXmlTags()
|
static void |
main(java.lang.String[] argv)
Command line interface to parser and pretty printer. |
Node |
parse(java.io.InputStream in,
java.io.OutputStream out)
Parses InputStream in and returns the root Node. |
Document |
parseDOM(java.io.InputStream in,
java.io.OutputStream out)
Parses InputStream in and returns a DOM Document node. |
void |
pprint(Document doc,
java.io.OutputStream out)
Pretty-prints a DOM Document. |
void |
setAltText(java.lang.String altText)
AltText - default text for alt attribute |
void |
setBreakBeforeBR(boolean BreakBeforeBR)
BreakBeforeBR - o/p newline before <br> or not? |
void |
setBurstSlides(boolean BurstSlides)
BurstSlides - create slides on each h2 element |
void |
setCharEncoding(int charencoding)
CharEncoding |
void |
setConfigurationFromFile(java.lang.String filename)
Sets the configuration from a configuration file. |
void |
setConfigurationFromProps(java.util.Properties props)
Sets the configuration from a properties object. |
void |
setDocType(java.lang.String doctype)
DocType - user specified doctype omit | auto | strict | loose | fpi where the fpi is a string similar to "-//ACME//DTD HTML 3.14159//EN" Note: for fpi include the double-quotes in the string. |
void |
setDropEmptyParas(boolean DropEmptyParas)
DropEmptyParas - discard empty p elements |
void |
setDropFontTags(boolean DropFontTags)
DropFontTags - discard presentation tags |
void |
setEmacs(boolean Emacs)
Emacs - if true format error output for GNU Emacs |
void |
setEncloseBlockText(boolean EncloseBlockText)
EncloseBlockText - if true text in blocks is wrapped in <p>'s |
void |
setEncloseText(boolean EncloseText)
EncloseText - if true text at body is wrapped in <p>'s |
void |
setErrfile(java.lang.String errfile)
Errfile - file name to write errors to |
void |
setErrout(java.io.PrintWriter errout)
|
void |
setFixBackslash(boolean FixBackslash)
FixBackslash - fix URLs by replacing \ with / |
void |
setFixComments(boolean FixComments)
FixComments - fix comments with adjacent hyphens |
void |
setHideEndTags(boolean HideEndTags)
HideEndTags - suppress optional end tags |
void |
setIndentAttributes(boolean IndentAttributes)
IndentAttributes - newline+indent before each attribute |
void |
setIndentContent(boolean IndentContent)
IndentContent - indent content of appropriate tags |
void |
setInputStreamName(java.lang.String name)
InputStreamName - the name of the input stream (printed in the header information). |
void |
setKeepFileTimes(boolean KeepFileTimes)
KeepFileTimes - if true last modified time is preserved this is NOT supported at this time. |
void |
setLiteralAttribs(boolean LiteralAttribs)
LiteralAttribs - if true attributes may use newlines |
void |
setLogicalEmphasis(boolean LogicalEmphasis)
LogicalEmphasis - replace i by em and b by strong |
void |
setMakeClean(boolean MakeClean)
MakeClean - remove presentational clutter |
void |
setNumEntities(boolean NumEntities)
NumEntities - use numeric entities |
void |
setOnlyErrors(boolean OnlyErrors)
OnlyErrors - if true normal output is suppressed |
void |
setQuiet(boolean Quiet)
Quiet - no 'Parsing X', guessed DTD or summary |
void |
setQuoteAmpersand(boolean QuoteAmpersand)
QuoteAmpersand - output naked ampersand as & |
void |
setQuoteMarks(boolean QuoteMarks)
QuoteMarks - output " marks as " |
void |
setQuoteNbsp(boolean QuoteNbsp)
QuoteNbsp - output non-breaking space as entity |
void |
setRawOut(boolean RawOut)
RawOut - avoid mapping values > 127 to entities |
void |
setShowWarnings(boolean ShowWarnings)
ShowWarnings - however errors are always shown |
void |
setSlidestyle(java.lang.String slidestyle)
Slidestyle - style sheet for slides |
void |
setSmartIndent(boolean SmartIndent)
SmartIndent - does text/block level content effect indentation |
void |
setSpaces(int spaces)
Spaces - default indentation |
void |
setTabsize(int tabsize)
Tabsize |
void |
setTidyMark(boolean TidyMark)
TidyMark - add meta element indicating tidied doc |
void |
setUpperCaseAttrs(boolean UpperCaseAttrs)
UpperCaseAttrs - output attributes in upper not lower case |
void |
setUpperCaseTags(boolean UpperCaseTags)
UpperCaseTags - output tags in upper not lower case |
void |
setWord2000(boolean Word2000)
Word2000 - draconian cleaning for Word2000 |
void |
setWrapAsp(boolean WrapAsp)
WrapAsp - wrap within ASP pseudo elements |
void |
setWrapAttVals(boolean WrapAttVals)
WrapAttVals - wrap within attribute values |
void |
setWrapJste(boolean WrapJste)
WrapJste - wrap within JSTE pseudo elements |
void |
setWraplen(int wraplen)
Wraplen - default wrap margin |
void |
setWrapPhp(boolean WrapPhp)
WrapPhp - wrap within PHP pseudo elements |
void |
setWrapScriptlets(boolean WrapScriptlets)
WrapScriptlets - wrap within JavaScript string literals |
void |
setWrapSection(boolean WrapSection)
WrapSection - wrap within <![ ... |
void |
setWriteback(boolean writeback)
Writeback - if true then output tidied markup NOTE: this property is ignored when parsing from an InputStream. |
void |
setXHTML(boolean xHTML)
XHTML - output extensible HTML |
void |
setXmlOut(boolean XmlOut)
XmlOut - create output as XML |
void |
setXmlPi(boolean XmlPi)
XmlPi - add <?xml?> for XML docs |
void |
setXmlPIs(boolean XmlPIs)
XmlPIs - if set to true PIs must end with ?> |
void |
setXmlSpace(boolean XmlSpace)
XmlSpace - if set to yes adds xml:space attr as needed |
void |
setXmlTags(boolean XmlTags)
XmlTags - treat input as XML |
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
public Tidy()
| Method Detail |
public Configuration getConfiguration()
public java.io.PrintWriter getStderr()
public int getParseErrors()
public int getParseWarnings()
public java.io.PrintWriter getErrout()
public void setErrout(java.io.PrintWriter errout)
public void setSpaces(int spaces)
Configuration.spacespublic int getSpaces()
public void setWraplen(int wraplen)
Configuration.wraplenpublic int getWraplen()
public void setCharEncoding(int charencoding)
Configuration.CharEncodingpublic int getCharEncoding()
public void setTabsize(int tabsize)
Configuration.tabsizepublic int getTabsize()
public void setErrfile(java.lang.String errfile)
Configuration.errfilepublic java.lang.String getErrfile()
public void setWriteback(boolean writeback)
Configuration.writebackpublic boolean getWriteback()
public void setOnlyErrors(boolean OnlyErrors)
Configuration.OnlyErrorspublic boolean getOnlyErrors()
public void setShowWarnings(boolean ShowWarnings)
Configuration.ShowWarningspublic boolean getShowWarnings()
public void setQuiet(boolean Quiet)
Configuration.Quietpublic boolean getQuiet()
public void setIndentContent(boolean IndentContent)
Configuration.IndentContentpublic boolean getIndentContent()
public void setSmartIndent(boolean SmartIndent)
Configuration.SmartIndentpublic boolean getSmartIndent()
public void setHideEndTags(boolean HideEndTags)
Configuration.HideEndTagspublic boolean getHideEndTags()
public void setXmlTags(boolean XmlTags)
Configuration.XmlTagspublic boolean getXmlTags()
public void setXmlOut(boolean XmlOut)
Configuration.XmlOutpublic boolean getXmlOut()
public void setXHTML(boolean xHTML)
Configuration.xHTMLpublic boolean getXHTML()
public void setRawOut(boolean RawOut)
Configuration.RawOutpublic boolean getRawOut()
public void setUpperCaseTags(boolean UpperCaseTags)
Configuration.UpperCaseTagspublic boolean getUpperCaseTags()
public void setUpperCaseAttrs(boolean UpperCaseAttrs)
Configuration.UpperCaseAttrspublic boolean getUpperCaseAttrs()
public void setMakeClean(boolean MakeClean)
Configuration.MakeCleanpublic boolean getMakeClean()
public void setBreakBeforeBR(boolean BreakBeforeBR)
Configuration.BreakBeforeBRpublic boolean getBreakBeforeBR()
public void setBurstSlides(boolean BurstSlides)
Configuration.BurstSlidespublic boolean getBurstSlides()
public void setNumEntities(boolean NumEntities)
Configuration.NumEntitiespublic boolean getNumEntities()
public void setQuoteMarks(boolean QuoteMarks)
Configuration.QuoteMarkspublic boolean getQuoteMarks()
public void setQuoteNbsp(boolean QuoteNbsp)
Configuration.QuoteNbsppublic boolean getQuoteNbsp()
public void setQuoteAmpersand(boolean QuoteAmpersand)
Configuration.QuoteAmpersandpublic boolean getQuoteAmpersand()
public void setWrapAttVals(boolean WrapAttVals)
Configuration.WrapAttValspublic boolean getWrapAttVals()
public void setWrapScriptlets(boolean WrapScriptlets)
Configuration.WrapScriptletspublic boolean getWrapScriptlets()
public void setWrapSection(boolean WrapSection)
Configuration.WrapSectionpublic boolean getWrapSection()
public void setAltText(java.lang.String altText)
Configuration.altTextpublic java.lang.String getAltText()
public void setSlidestyle(java.lang.String slidestyle)
Configuration.slidestylepublic java.lang.String getSlidestyle()
public void setXmlPi(boolean XmlPi)
Configuration.XmlPipublic boolean getXmlPi()
public void setDropFontTags(boolean DropFontTags)
Configuration.DropFontTagspublic boolean getDropFontTags()
public void setDropEmptyParas(boolean DropEmptyParas)
Configuration.DropEmptyParaspublic boolean getDropEmptyParas()
public void setFixComments(boolean FixComments)
Configuration.FixCommentspublic boolean getFixComments()
public void setWrapAsp(boolean WrapAsp)
Configuration.WrapAsppublic boolean getWrapAsp()
public void setWrapJste(boolean WrapJste)
Configuration.WrapJstepublic boolean getWrapJste()
public void setWrapPhp(boolean WrapPhp)
Configuration.WrapPhppublic boolean getWrapPhp()
public void setFixBackslash(boolean FixBackslash)
Configuration.FixBackslashpublic boolean getFixBackslash()
public void setIndentAttributes(boolean IndentAttributes)
Configuration.IndentAttributespublic boolean getIndentAttributes()
public void setDocType(java.lang.String doctype)
Configuration.docTypeStr,
Configuration.docTypeModepublic java.lang.String getDocType()
public void setLogicalEmphasis(boolean LogicalEmphasis)
Configuration.LogicalEmphasispublic boolean getLogicalEmphasis()
public void setXmlPIs(boolean XmlPIs)
Configuration.XmlPIspublic boolean getXmlPIs()
public void setEncloseText(boolean EncloseText)
Configuration.EncloseBodyTextpublic boolean getEncloseText()
public void setEncloseBlockText(boolean EncloseBlockText)
Configuration.EncloseBlockTextpublic boolean getEncloseBlockText()
public void setKeepFileTimes(boolean KeepFileTimes)
Configuration.KeepFileTimespublic boolean getKeepFileTimes()
public void setWord2000(boolean Word2000)
Configuration.Word2000public boolean getWord2000()
public void setTidyMark(boolean TidyMark)
Configuration.TidyMarkpublic boolean getTidyMark()
public void setXmlSpace(boolean XmlSpace)
Configuration.XmlSpacepublic boolean getXmlSpace()
public void setEmacs(boolean Emacs)
Configuration.Emacspublic boolean getEmacs()
public void setLiteralAttribs(boolean LiteralAttribs)
Configuration.LiteralAttribspublic boolean getLiteralAttribs()
public void setInputStreamName(java.lang.String name)
public java.lang.String getInputStreamName()
public void setConfigurationFromFile(java.lang.String filename)
public void setConfigurationFromProps(java.util.Properties props)
public Node parse(java.io.InputStream in,
java.io.OutputStream out)
public Document parseDOM(java.io.InputStream in,
java.io.OutputStream out)
public static Document createEmptyDocument()
public void pprint(Document doc,
java.io.OutputStream out)
public static void main(java.lang.String[] argv)
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||