Class PDF2HTMLImportStripper


  • public class PDF2HTMLImportStripper
    extends org.apache.pdfbox.text.PDFTextStripper
    • Field Summary

      • Fields inherited from class org.apache.pdfbox.text.PDFTextStripper

        charactersByArticle, document, LINE_SEPARATOR, output
    • Constructor Summary

      Constructors 
      Constructor Description
      PDF2HTMLImportStripper​(org.apache.pdfbox.pdmodel.PDDocument document)  
    • Method Summary

      All Methods Instance Methods Concrete Methods 
      Modifier and Type Method Description
      protected float computeFontHeight​(org.apache.pdfbox.pdmodel.font.PDFont arg0)  
      void drawHTMLFields()  
      String getHTML()  
      com.hp.gagawa.java.Node getHTMLBase()  
      com.alibaba.fastjson.JSONArray getOnValues​(org.apache.pdfbox.pdmodel.interactive.form.PDButton pdField)  
      ArrayList<String> getPageImages()  
      protected void processOperator​(org.apache.pdfbox.contentstream.operator.Operator operator, List<org.apache.pdfbox.cos.COSBase> operands)  
      protected void processTextPosition​(org.apache.pdfbox.text.TextPosition text)  
      protected void showGlyph​(org.apache.pdfbox.util.Matrix arg0, org.apache.pdfbox.pdmodel.font.PDFont arg1, int arg2, String arg3, org.apache.pdfbox.util.Vector arg4)  
      void stripPage​(int page, String img)  
      • Methods inherited from class org.apache.pdfbox.text.PDFTextStripper

        endArticle, endDocument, endPage, getAddMoreFormatting, getArticleEnd, getArticleStart, getAverageCharTolerance, getCharactersByArticle, getCurrentPageNo, getDropThreshold, getEndBookmark, getEndPage, getIndentThreshold, getLineSeparator, getListItemPatterns, getOutput, getPageEnd, getPageStart, getParagraphEnd, getParagraphStart, getSeparateByBeads, getSortByPosition, getSpacingTolerance, getStartBookmark, getStartPage, getSuppressDuplicateOverlappingText, getText, getWordSeparator, matchPattern, processPage, processPages, setAddMoreFormatting, setArticleEnd, setArticleStart, setAverageCharTolerance, setDropThreshold, setEndBookmark, setEndPage, setIndentThreshold, setLineSeparator, setListItemPatterns, setPageEnd, setPageStart, setParagraphEnd, setParagraphStart, setShouldSeparateByBeads, setSortByPosition, setSpacingTolerance, setStartBookmark, setStartPage, setSuppressDuplicateOverlappingText, setWordSeparator, startArticle, startArticle, startDocument, startPage, writeCharacters, writeLineSeparator, writePage, writePageEnd, writePageStart, writeParagraphEnd, writeParagraphSeparator, writeParagraphStart, writeString, writeString, writeText, writeWordSeparator
      • Methods inherited from class org.apache.pdfbox.contentstream.PDFStreamEngine

        addOperator, applyTextAdjustment, beginMarkedContentSequence, beginText, decreaseLevel, endMarkedContentSequence, endText, getAppearance, getCurrentPage, getGraphicsStackSize, getGraphicsState, getInitialMatrix, getLevel, getResources, getTextLineMatrix, getTextMatrix, increaseLevel, operatorException, processAnnotation, processChildStream, processOperator, processSoftMask, processTilingPattern, processTilingPattern, processTransparencyGroup, processType3Stream, registerOperatorProcessor, restoreGraphicsStack, restoreGraphicsState, saveGraphicsStack, saveGraphicsState, setLineDashPattern, setTextLineMatrix, setTextMatrix, showAnnotation, showFontGlyph, showFontGlyph, showForm, showGlyph, showText, showTextString, showTextStrings, showTransparencyGroup, showType3Glyph, showType3Glyph, transformedPoint, transformWidth, unsupportedOperator
    • Constructor Detail

      • PDF2HTMLImportStripper

        public PDF2HTMLImportStripper​(org.apache.pdfbox.pdmodel.PDDocument document)
                               throws IOException
        Throws:
        IOException
    • Method Detail

      • getHTML

        public String getHTML()
      • getHTMLBase

        public com.hp.gagawa.java.Node getHTMLBase()
      • processOperator

        protected void processOperator​(org.apache.pdfbox.contentstream.operator.Operator operator,
                                       List<org.apache.pdfbox.cos.COSBase> operands)
                                throws IOException
        Overrides:
        processOperator in class org.apache.pdfbox.contentstream.PDFStreamEngine
        Throws:
        IOException
      • getOnValues

        public com.alibaba.fastjson.JSONArray getOnValues​(org.apache.pdfbox.pdmodel.interactive.form.PDButton pdField)
      • drawHTMLFields

        public void drawHTMLFields()
      • processTextPosition

        protected void processTextPosition​(org.apache.pdfbox.text.TextPosition text)
        Overrides:
        processTextPosition in class org.apache.pdfbox.text.PDFTextStripper
      • showGlyph

        protected void showGlyph​(org.apache.pdfbox.util.Matrix arg0,
                                 org.apache.pdfbox.pdmodel.font.PDFont arg1,
                                 int arg2,
                                 String arg3,
                                 org.apache.pdfbox.util.Vector arg4)
                          throws IOException
        Overrides:
        showGlyph in class org.apache.pdfbox.contentstream.PDFStreamEngine
        Throws:
        IOException
      • computeFontHeight

        protected float computeFontHeight​(org.apache.pdfbox.pdmodel.font.PDFont arg0)
                                   throws IOException
        Throws:
        IOException