docx document to pdf file

Posted by AJW on Sun, 03 May 2020 23:41:37 +0200

Note: only docx file can be converted here. doc is not allowed. The source code is at the end of the article.

It can be run on windows and linux, with the function of content replacement added, because some document contents need to be generated dynamically by code.

The following are the specific operation steps:

  • maven dependency
<!-- docx turn pdf -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.11</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>ooxml-schemas</artifactId>
            <version>1.1</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.4.3</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
            <version>1.0.6</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
            <version>1.0.6</version>
        </dependency>
        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-ImportXHTML</artifactId>
            <version>3.2.0</version>
        </dependency>
        <!-- docx turn pdf end -->
  • java code
package com.gitee.docx2pdf;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;

import com.lowagie.text.Font;
import com.lowagie.text.pdf.BaseFont;

import fr.opensagres.xdocreport.itext.extension.font.IFontProvider;

/**
 * Document tool class
 */
public class DocUtil {

    public static void main(String[] args) throws Exception {
        String docx = "1.docx";
        String pdf = "1.pdf";

        // Direct conversion
        InputStream docxStream = DocUtil.class.getClassLoader().getResourceAsStream(docx);
        byte[] pdfData = docxToPdf(docxStream);
        FileUtils.writeByteArrayToFile(new File(pdf), pdfData);

        // Example of converting after replacing content
        InputStream docxStream2 = DocUtil.class.getClassLoader().getResourceAsStream("2.docx");
        Map<String, String> data = new HashMap<String, String>();
        data.put("{title}", "Title Content");
        data.put("{username}", "Zhang San");
        byte[] pdfData2 = bindDocxDataAndToPdf(docxStream2, data);
        FileUtils.writeByteArrayToFile(new File("data.pdf"), pdfData2);

        System.out.println("finished.");
    }

    /**
     * Replace docx file content and convert to PDF
     * 
     * @param input
     * @param data
     * @return
     * @throws Exception
     */
    public static byte[] bindDocxDataAndToPdf(InputStream input, Map<String, String> data) throws Exception {
        byte[] replacedContent = replaceDocxContent(input, data);
        byte[] pdfData = docxToPdf(new ByteArrayInputStream(replacedContent));
        return pdfData;
    }

    /**
     * docx Convert to pdf
     * 
     * @param docxStream
     *            docx File flow
     * @return Return pdf data
     * @throws Exception
     */
    public static byte[] docxToPdf(InputStream docxStream) throws Exception {
        ByteArrayOutputStream targetStream = null;
        XWPFDocument doc = null;
        try {
            doc = new XWPFDocument(docxStream);

            PdfOptions options = PdfOptions.create();
            // Chinese font processing
            options.fontProvider(new IFontProvider() {

                @Override
                public Font getFont(String familyName, String encoding, float size, int style, java.awt.Color color) {
                    try {
                        BaseFont bfChinese = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
                        Font fontChinese = new Font(bfChinese, 12, style, color);
                        if (familyName != null)
                            fontChinese.setFamily(familyName);
                        return fontChinese;
                    } catch (Exception e) {
                        e.printStackTrace();
                        return null;
                    }
                }
            });

            targetStream = new ByteArrayOutputStream();
            PdfConverter.getInstance().convert(doc, targetStream, options);

            return targetStream.toByteArray();
        } catch (IOException e) {
            throw new Exception(e);
        } finally {
            IOUtils.closeQuietly(targetStream);
        }
    }

    /**
     * docx Convert to html content
     * 
     * @param docxIn
     *            docx File input stream
     * @return
     * @throws Exception
     */
    public static byte[] docxToHtml(InputStream docxIn) throws Exception {
        ByteArrayOutputStream out = null;
        try {
            XWPFDocument document = new XWPFDocument(docxIn);

            XHTMLOptions options = XHTMLOptions.create();

            out = new ByteArrayOutputStream();
            XHTMLConverter.getInstance().convert(document, out, options);

            return out.toByteArray();
        } catch (IOException e) {
            throw new Exception(e);
        } finally {
            IOUtils.closeQuietly(out);
        }
    }

    /**
     * Replace docx content
     * 
     * @param in
     *            docx Input stream
     * @param map
     *            Replace key value pair
     * @return Return to the replaced file stream
     * @throws Exception
     */
    public static byte[] replaceDocxContent(InputStream in, Map<String, String> map) throws Exception {
        // Read word template
        XWPFDocument hdt = null;
        ByteArrayOutputStream out = null;
        try {
            hdt = new XWPFDocument(in);
            // Replace paragraph content
            List<XWPFParagraph> paragraphs = hdt.getParagraphs();
            replaceParagraphsContent(paragraphs, map);

            // Replace table contents
            List<XWPFTable> tables = hdt.getTables();
            // Read table
            for (XWPFTable table : tables) {
                int rcount = table.getNumberOfRows();
                // Traverse rows in table
                for (int i = 0; i < rcount; i++) {
                    XWPFTableRow row = table.getRow(i);
                    // Traverse cells in row
                    List<XWPFTableCell> cells = row.getTableCells();
                    for (XWPFTableCell cell : cells) {
                        List<XWPFParagraph> cellParagraphs = cell.getParagraphs();
                        replaceParagraphsContent(cellParagraphs, map);
                    }
                }
            }

            out = new ByteArrayOutputStream();

            hdt.write(out);

            return out.toByteArray();
        } catch (IOException e) {
            throw new Exception(e.getMessage());
        } finally {
            IOUtils.closeQuietly(out);
        }

    }

    private static void replaceParagraphsContent(List<XWPFParagraph> paragraphs, Map<String, String> map) {
        for (XWPFParagraph paragraph : paragraphs) {
            List<XWPFRun> runs = paragraph.getRuns();
            for (XWPFRun run : runs) {
                String text = run.getText(0);
                if (text != null) {
                    boolean isSetText = false;
                    for (Entry<String, String> entry : map.entrySet()) {
                        String key = entry.getKey();
                        if (text.indexOf(key) != -1) {// There is a key corresponding to this keyword in the configuration file
                            String value = entry.getValue();
                            if (value == null) {
                                throw new RuntimeException(key + "The corresponding value cannot be null");
                            }
                            // Text substitution
                            text = text.replace(key, value);
                            isSetText = true;
                        }
                    }
                    if (isSetText) {
                        run.setText(text, 0);
                    }
                }
            }
        }
    }
}
  • Add font resource file

Add the font resource file to resources. The specific content can be seen in the source code below.
The resource file here is copied directly from itextasian-1.5.2.jar, because itextasian-1.5.2.jar can't be downloaded in the central warehouse.

Finally, run the main method in DocUtil to test.

Source code: Click to go

Topics: Apache Java Windows Linux