Java：Apache POIを使用してMS WordファイルをPDFに変換する方法は？

Question

Apache POIを使用して、MS Wordファイルをpdfに変換する方法

私は次のコードを使用していますが、エラーが発生して機能しません。間違ったクラスをインポートしていると思いますか？

import Java.io.File; import Java.io.FileInputStream; import Java.io.FileOutputStream; import Java.io.OutputStream; import org.Apache.poi.hslf.record.Document; import org.Apache.poi.hwpf.HWPFDocument; import org.Apache.poi.hwpf.extractor.WordExtractor; import org.Apache.poi.hwpf.usermodel.Paragraph; import org.Apache.poi.hwpf.usermodel.Range; import org.Apache.poi.poifs.filesystem.POIFSFileSystem; public class TestCon { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub POIFSFileSystem fs = null; Document document = new Document(); try { System.out.println("Starting the test"); fs = new POIFSFileSystem(new FileInputStream("/document/test2.doc")); HWPFDocument doc = new HWPFDocument(fs); WordExtractor we = new WordExtractor(doc); OutputStream file = new FileOutputStream(new File("/document/test.pdf")); PdfWriter writer = PdfWriter.getInstance(document, file); Range range = doc.getRange(); document.open(); writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.Apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); // CharacterRun run = pr.getCharacterRun(i); // run.setBold(true); // run.setCapitalized(true); // run.setItalic(true); paragraphs[i] = paragraphs[i].replaceAll("\cM?
?
", ""); System.out.println("Length:" + paragraphs[i].length()); System.out.println("Paragraph" + i + ": " + paragraphs[i].toString()); // add the paragraph to the document document.add(new Paragraph(paragraphs[i])); } System.out.println("Document testing completed"); } catch (Exception e) { System.out.println("Exception during test"); e.printStackTrace(); } finally { // close the document document.close(); } } }

Harinder · Accepted Answer

解決しました

import Java.io.File; import Java.io.FileInputStream; import Java.io.FileOutputStream; import Java.io.OutputStream; import com.lowagie.text.Document; import com.lowagie.text.DocumentException; import com.lowagie.text.Paragraph; import com.lowagie.text.pdf.PdfWriter; import org.Apache.poi.hwpf.HWPFDocument; import org.Apache.poi.hwpf.extractor.WordExtractor; import org.Apache.poi.hwpf.usermodel.Range; import org.Apache.poi.poifs.filesystem.POIFSFileSystem; public class TestCon { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub POIFSFileSystem fs = null; Document document = new Document(); try { System.out.println("Starting the test"); fs = new POIFSFileSystem(new FileInputStream("D:/Resume.doc")); HWPFDocument doc = new HWPFDocument(fs); WordExtractor we = new WordExtractor(doc); OutputStream file = new FileOutputStream(new File("D:/test.pdf")); PdfWriter writer = PdfWriter.getInstance(document, file); Range range = doc.getRange(); document.open(); writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.Apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); // CharacterRun run = pr.getCharacterRun(i); // run.setBold(true); // run.setCapitalized(true); // run.setItalic(true); paragraphs[i] = paragraphs[i].replaceAll("\cM?
?
", ""); System.out.println("Length:" + paragraphs[i].length()); System.out.println("Paragraph" + i + ": " + paragraphs[i].toString()); // add the paragraph to the document document.add(new Paragraph(paragraphs[i])); } System.out.println("Document testing completed"); } catch (Exception e) { System.out.println("Exception during test"); e.printStackTrace(); } finally { // close the document document.close(); } } }

Kushagra Sahni · Answer

これは私のために働きました：-

ソース：- http://www.programcreek.com/Java-api-examples/index.php?api=org.Apache.poi.xwpf.converter.pdf.PdfConverter

package pdf; import Java.io.File; import Java.io.FileInputStream; import Java.io.FileOutputStream; import Java.io.OutputStream; import org.Apache.poi.xwpf.converter.pdf.PdfConverter; import org.Apache.poi.xwpf.converter.pdf.PdfOptions; import org.Apache.poi.xwpf.usermodel.XWPFDocument; public class PDF { public static void main(String[] args) throws Exception { String inputFile="D:/TEST.docx"; String outputFile="D:/TEST.pdf"; if (args != null && args.length == 2) { inputFile=args[0]; outputFile=args[1]; } System.out.println("inputFile:" + inputFile + ",outputFile:"+ outputFile); FileInputStream in=new FileInputStream(inputFile); XWPFDocument document=new XWPFDocument(in); File outFile=new File(outputFile); OutputStream out=new FileOutputStream(outFile); PdfOptions options=null; PdfConverter.getInstance().convert(document,out,options); } }

Rohit Dubey · Answer

以下のコードは私のために働きました：

Public class DocToPdfConverter{ public static void main(String[] args) { String k=null; OutputStream fileForPdf =null; try { String fileName="/document/test2.doc"; //Below Code is for .doc file if(fileName.endsWith(".doc")) { HWPFDocument doc = new HWPFDocument(new FileInputStream( fileName)); WordExtractor we=new WordExtractor(doc); k = we.getText(); fileForPdf = new FileOutputStream(new File( "/document/DocToPdf.pdf")); we.close(); } //Below Code for else if(fileName.endsWith(".docx")) { XWPFDocument docx = new XWPFDocument(new FileInputStream( fileName)); // using XWPFWordExtractor Class XWPFWordExtractor we = new XWPFWordExtractor(docx); k = we.getText(); fileForPdf = new FileOutputStream(new File( "/document/DocxToPdf.pdf")); we.close(); } Document document = new Document(); PdfWriter.getInstance(document, fileForPdf); document.open(); document.add(new Paragraph(k)); document.close(); fileForPdf.close(); } catch (Exception e) { e.printStackTrace(); } } }

theshadow · Answer

補足として、ファイルシステムからコンテンツを読み取ってディスクにシリアル化する代わりに、Word/Excelコンテンツストリームから直接オンザフライでコンテンツを読み取ることもできます。たとえば、CMISリポジトリからコンテンツを取得する場合などです。

例えば.

 //HWPFDocument docx = new HWPFDocument(fs); HWPFDocument docx = new HWPFDocument(doc.getContentStream().getStream());

（ドキュメントのタイプはorg.Apache.chemistry.opencmis.client.api.Documentそしてこの場合、opencmisを使用してAlfrescoリポジトリからWordファイルを取得し、それをPDFに変換するようにコードを調整しました）

HTH

duffymo · Answer

ここにはいくつかのステップがあります：

POIを使用してWord文書を形式にとらわれない形式で読み取る
形式に依存しないフォームをPDFに変換
PDFを書く

POIがステップ2を実行するかどうかはわかりません。 iTextのような何か他のものをお勧めします。

Erich13 · Answer

Kushagraの回答に加えて、ここで更新されたmavenの依存関係：

 <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.xdocreport.converter.docx.xwpf</artifactId> <version>2.0.1</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.xdocreport.converter</artifactId> <version>2.0.1</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.pdf</artifactId> <version>2.0.1</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId> <version>2.0.1</version> </dependency>

ImYuta · Answer

これは私の日を節約し、URLからdocxファイルをロードしてpdfに変換します：

pom.xml

<dependency> <groupId>org.Apache.poi</groupId> <artifactId>poi</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>org.Apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>org.Apache.poi.xwpf.converter.pdf</artifactId> <version>LATEST</version> </dependency>

main_class

public String wordToPDFPOI(String url) throws Exception { InputStream doc = new URL(url).openStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); XWPFDocument document = new XWPFDocument(doc); PdfOptions options = PdfOptions.create(); PdfConverter.getInstance().convert(document, baos, options); String base64_encoded = Base64.encodeBytes(baos.toByteArray()); return base64_encoded; }