public class PDFText2HTML extends PDFTextStripper
charactersByArticle, document, output, outputEncoding, systemLineSeparator
Constructor | Description |
---|---|
PDFText2HTML(java.lang.String encoding) |
Constructor.
|
Modifier and Type | Method | Description |
---|---|---|
protected void |
endArticle() |
Write out the article separator.
|
void |
endDocument(PDDocument pdf) |
This method is available for subclasses of this class.
|
protected java.lang.String |
getTitle() |
This method will attempt to guess the title of the document using
either the document properties or the first lines of text.
|
protected void |
startArticle(boolean isltr) |
Write out the article separator (div tag) with proper text direction
information.
|
protected void |
writeHeader() |
Write the header to the output document.
|
protected void |
writePage() |
This will print the text of the processed page to "output".
|
protected void |
writeParagraphEnd() |
Writes the paragraph end "
|
protected void |
writeString(java.lang.String chars) |
Write a string to the output stream and escape some HTML characters.
|
protected void |
writeString(java.lang.String text,
java.util.List<TextPosition> textPositions) |
Write a string to the output stream, maintain font state, and escape some HTML characters.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getColorSpaces, getCurrentPage, getFonts, getGraphicsStack, getGraphicsState, getGraphicsStates, getResources, getTextLineMatrix, getTextMatrix, getTotalCharCnt, getValidCharCnt, getXObjects, isForceParsing, processEncodedText, processOperator, processOperator, processStream, processSubStream, registerOperatorProcessor, setColorSpaces, setFonts, setForceParsing, setGraphicsStack, setGraphicsState, setGraphicsStates, setTextLineMatrix, setTextMatrix
endPage, getAddMoreFormatting, getArticleEnd, getArticleStart, getAverageCharTolerance, getCharactersByArticle, getCurrentPageNo, getDropThreshold, getEndBookmark, getEndPage, getIndentThreshold, getLineSeparator, getListItemPatterns, getOutput, getPageEnd, getPageSeparator, getPageStart, getParagraphEnd, getParagraphStart, getSeparateByBeads, getSortByPosition, getSpacingTolerance, getStartBookmark, getStartPage, getSuppressDuplicateOverlappingText, getText, getText, getWordSeparator, handleLineSeparation, inspectFontEncoding, isParagraphSeparation, matchListItemPattern, matchPattern, processPage, processPages, processTextPosition, resetEngine, setAddMoreFormatting, setArticleEnd, setArticleStart, setAverageCharTolerance, setDropThreshold, setEndBookmark, setEndPage, setIndentThreshold, setLineSeparator, setListItemPatterns, setPageEnd, setPageSeparator, setPageStart, setParagraphEnd, setParagraphStart, setShouldSeparateByBeads, setSortByPosition, setSpacingTolerance, setStartBookmark, setStartPage, setSuppressDuplicateOverlappingText, setWordSeparator, startArticle, startDocument, startPage, writeCharacters, writeLineSeparator, writePageEnd, writePageSeperator, writePageStart, writeParagraphSeparator, writeParagraphStart, writeText, writeText, writeWordSeparator
public PDFText2HTML(java.lang.String encoding) throws java.io.IOException
encoding
- The encoding to be usedjava.io.IOException
- If there is an error during initialization.protected void writeHeader() throws java.io.IOException
java.io.IOException
- If there is a problem writing out the header to the document.protected void writePage() throws java.io.IOException
writePage
in class PDFTextStripper
java.io.IOException
- If there is an error writing the text.public void endDocument(PDDocument pdf) throws java.io.IOException
endDocument
in class PDFTextStripper
pdf
- The PDF document that is being processed.java.io.IOException
- If an IO error occurs.protected java.lang.String getTitle()
protected void startArticle(boolean isltr) throws java.io.IOException
startArticle
in class PDFTextStripper
isltr
- true if direction of text is left to rightjava.io.IOException
- If there is an error writing to the stream.protected void endArticle() throws java.io.IOException
endArticle
in class PDFTextStripper
java.io.IOException
- If there is an error writing to the stream.protected void writeString(java.lang.String text, java.util.List<TextPosition> textPositions) throws java.io.IOException
writeString
in class PDFTextStripper
text
- The text to write to the stream.textPositions
- the corresponding text positionsjava.io.IOException
- If there is an error writing to the stream.protected void writeString(java.lang.String chars) throws java.io.IOException
writeString
in class PDFTextStripper
chars
- String to be written to the streamjava.io.IOException
- If there is an error writing to the stream.protected void writeParagraphEnd() throws java.io.IOException
writeParagraphEnd
in class PDFTextStripper
java.io.IOException
- if something went wrong