public class PDF2HTMLImportStripper
extends org.apache.pdfbox.text.PDFTextStripper
Constructor and Description |
---|
PDF2HTMLImportStripper(org.apache.pdfbox.pdmodel.PDDocument document) |
Modifier and Type | Method and Description |
---|---|
void |
drawHTMLFields() |
String |
getHTML() |
com.hp.gagawa.java.Node |
getHTMLBase() |
com.alibaba.fastjson.JSONArray |
getOnValues(org.apache.pdfbox.pdmodel.interactive.form.PDButton pdField) |
ArrayList<String> |
getPageImages() |
protected void |
processOperator(org.apache.pdfbox.contentstream.operator.Operator operator,
List<org.apache.pdfbox.cos.COSBase> operands) |
protected void |
processTextPosition(org.apache.pdfbox.text.TextPosition text) |
protected void |
showGlyph(org.apache.pdfbox.util.Matrix arg0,
org.apache.pdfbox.pdmodel.font.PDFont arg1,
int arg2,
String arg3,
org.apache.pdfbox.util.Vector arg4) |
void |
stripPage(int page,
String img) |
endArticle, endDocument, endPage, getAddMoreFormatting, getArticleEnd, getArticleStart, getAverageCharTolerance, getCharactersByArticle, getCurrentPageNo, getDropThreshold, getEndBookmark, getEndPage, getIndentThreshold, getLineSeparator, getListItemPatterns, getOutput, getPageEnd, getPageStart, getParagraphEnd, getParagraphStart, getSeparateByBeads, getSortByPosition, getSpacingTolerance, getStartBookmark, getStartPage, getSuppressDuplicateOverlappingText, getText, getWordSeparator, matchPattern, processPage, processPages, setAddMoreFormatting, setArticleEnd, setArticleStart, setAverageCharTolerance, setDropThreshold, setEndBookmark, setEndPage, setIndentThreshold, setLineSeparator, setListItemPatterns, setPageEnd, setPageStart, setParagraphEnd, setParagraphStart, setShouldSeparateByBeads, setSortByPosition, setSpacingTolerance, setStartBookmark, setStartPage, setSuppressDuplicateOverlappingText, setWordSeparator, startArticle, startArticle, startDocument, startPage, writeCharacters, writeLineSeparator, writePage, writePageEnd, writePageStart, writeParagraphEnd, writeParagraphSeparator, writeParagraphStart, writeString, writeString, writeText, writeWordSeparator
addOperator, applyTextAdjustment, beginText, endText, getAppearance, getCurrentPage, getGraphicsStackSize, getGraphicsState, getInitialMatrix, getResources, getTextLineMatrix, getTextMatrix, operatorException, processAnnotation, processChildStream, processOperator, processSoftMask, processTilingPattern, processTilingPattern, processTransparencyGroup, processType3Stream, registerOperatorProcessor, restoreGraphicsStack, restoreGraphicsState, saveGraphicsStack, saveGraphicsState, setLineDashPattern, setTextLineMatrix, setTextMatrix, showAnnotation, showFontGlyph, showForm, showText, showTextString, showTextStrings, showTransparencyGroup, showType3Glyph, transformedPoint, transformWidth, unsupportedOperator
public PDF2HTMLImportStripper(org.apache.pdfbox.pdmodel.PDDocument document) throws IOException
IOException
public String getHTML()
public com.hp.gagawa.java.Node getHTMLBase()
protected void processOperator(org.apache.pdfbox.contentstream.operator.Operator operator, List<org.apache.pdfbox.cos.COSBase> operands) throws IOException
processOperator
in class org.apache.pdfbox.contentstream.PDFStreamEngine
IOException
public void stripPage(int page, String img) throws IOException
IOException
public com.alibaba.fastjson.JSONArray getOnValues(org.apache.pdfbox.pdmodel.interactive.form.PDButton pdField)
public void drawHTMLFields()
protected void processTextPosition(org.apache.pdfbox.text.TextPosition text)
processTextPosition
in class org.apache.pdfbox.text.PDFTextStripper
protected void showGlyph(org.apache.pdfbox.util.Matrix arg0, org.apache.pdfbox.pdmodel.font.PDFont arg1, int arg2, String arg3, org.apache.pdfbox.util.Vector arg4) throws IOException
showGlyph
in class org.apache.pdfbox.contentstream.PDFStreamEngine
IOException
Copyright © 2021 XIMA MEDIA GmbH. All rights reserved.