How to read page by page of a PDF with PDFBox

2

Good afternoon.

I wonder if anyone can help me. I need to extract data from a PDF file, however I need to read page by page of the file, if anyone can help me thank you.

public static void main(String args[]) {
    PDFTextStripper pdfStripper = null;
    PDDocument pdDoc = null;
    COSDocument cosDoc = null;
    File file = new File("C\testes\teste.pdf");
    try {
        PDFParser parser = new PDFParser(new FileInputStream(file)); //Aqui o FileInputStream está acusando erro;
        parser.parse();
        cosDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        pdfStripper.setStartPage(1);
        pdfStripper.setEndPage(5);
        String parsedText = pdfStripper.getText(pdDoc);
        System.out.println(parsedText);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
    
asked by anonymous 23.06.2016 / 21:55

1 answer

2

How does it work?

import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

public class SuaClasse {
    public static void main(String args[]) {
        File file = new File("C\testes\teste.pdf");
        try {
            PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
            parser.parse();
            COSDocument cosDoc = parser.getDocument();
            PDFTextStripper pdfStripper = new PDFTextStripper();
            PDDocument pdDoc = new PDDocument(cosDoc);
            for (int i = 1; i <= pdDoc.getNumberOfPages(); i++) {
                pdfStripper.setStartPage(i);
                pdfStripper.setEndPage(i);
                String parsedText = pdfStripper.getText(pdDoc);
                System.out.println("Página " + i + ": " + parsedText);
            }
        } catch (IOException e) {
            // Tratar a exceção adequadamente.
            e.printStackTrace();
        }
    }
}
    
23.06.2016 / 22:13