本文讲述通过PDFBox实现PDF的骑缝章、定位签章、PDF文本转图片功能,通过PDFBox实现的功能,是开源的可商用,而通过itext不能商用。

1. 导入PDFBox的依赖

1
2
3
4
5
6
7
8
9
10
11
<!--PDFBox-->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.17</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.17</version>
</dependency>

2. 定义工具类PdfBoxListener

包含骑缝章、定位签章、文本转图片方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Setter;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
* PdfBox定位签章和骑缝章
*/
@EqualsAndHashCode(callSuper = true)
@Setter
public class PdfBoxListener extends PDFTextStripper {

//关键字
private Set<String> keys;
// 当前页关键字位置信息集合
private MultiValueMap<String, float[]> position = new LinkedMultiValueMap<>();
//所有关键字在所有页面的位置
private MultiValueMap<String, float[]> allPosition = new LinkedMultiValueMap<>();

public PdfBoxListener(String source) throws IOException {
super();
super.setSortByPosition(true);
this.document = PDDocument.load(new File(source));
}

/**
* writeText的回调方法
*
* @param str 当前获取行的文本信息
* @param textPositions 当前行每一个字符的坐标信息
*/
@Override
protected void writeString(String str, List<TextPosition> textPositions) throws IOException {
// text得到pdf这一行中的汉字
// String text = textPositions.toString().replaceAll("[^\u4E00-\u9FA5]", "");
for (String key : keys) {
if (str.contains(key)) {
float[] idx = new float[3];
// 需要进行一些调整 使得章盖在字体上
int i = str.indexOf(key);
// X坐标 在这里加上了字体的长度,也可以直接 idx[0] = textPositions.get(i).getX()
idx[0] = textPositions.get(i).getX() + textPositions.get(i).getFontSize();
// Y坐标 在这里减去的字体的长度,也可以直接 idx[1] = textPositions.get(i).getPageHeight()-textPositions.get(i).getY()
idx[1] = textPositions.get(i).getPageHeight() - textPositions.get(i).getY() - 4 * textPositions.get(i).getFontSize();
LoggerUtil.info("str:{},x:{},y:{},index{}", str, idx[0], idx[1], i);
position.add(key, idx);
return;
}
}

}

/**
* 定位签章方法
* 防止定位错误问题,可以使用全角空格进行辅助定位
*
* @param images k:定位关键字 v:图片路径
*/
public void coordinate(Map<String, Image> images) throws IOException {
keys = images.keySet();
int pages = document.getNumberOfPages();
PDPageContentStream contentStream = null;
for (int i = 1; i <= pages; i++) {
position.clear();
super.setSortByPosition(true);
super.setStartPage(i);
super.setEndPage(i);
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
//调用父类writeText方法,将进入writeString回调方法
super.writeText(document, dummy);
for (String key : images.keySet()) {
List<float[]> floats = position.get(key);
if (CollectionUtils.isNotEmpty(floats)) {
addAll(key, floats, i);
for (float[] m : floats) {
float x = m[0];
float y = m[1];
contentStream = new PDPageContentStream(document, document.getPage(i - 1), PDPageContentStream.AppendMode.APPEND, true);
Image image = images.get(key);
contentStream.drawImage(PDImageXObject.createFromFile(image.path, document), x + image.x, y + image.y, image.width, image.height);
contentStream.close();
}
}
}
}
}

private void addAll(String key, List<float[]> floats, int i) {
floats.forEach(f -> f[2] = i);
allPosition.addAll(key, floats);
}

private void print() {
allPosition.forEach((k, v) -> {
List<Float> floats = v.stream().map(i -> i[2]).toList();
LoggerUtil.info("PDF END ==== keyWord:{},pages:{}", k, StringUtils.join(floats));
});
}

/**
* 骑缝章方法 推荐用透明图片
*
* @param seal 公章图片路径
*/
public void pagingSeal(String seal) throws IOException {
BufferedImage[] images = ImageUtil.splitImage(ImageIO.read(new FileInputStream(seal)), 1, 2);
Path left = Files.createTempFile("left", ".png");
Path right = Files.createTempFile("right", ".png");
ImageIO.write(images[0], "png", left.toFile());
ImageIO.write(images[1], "png", right.toFile());
float chunkWidth = 125;// 公章大小,x轴
float chunkHeight = 125;// 公章大小,y轴
PDPageContentStream contentStream = null;
int pages = document.getNumberOfPages();
for (int i = 0; i < pages; i++) {
PDPage page = document.getPage(i);
PDRectangle mediaBox = page.getMediaBox();
float height = mediaBox.getHeight();
float width = mediaBox.getWidth();
float xL = width - chunkWidth / 2 - 2;
float yL = height - chunkHeight / 2 - 200;
float xR = width - chunkHeight / 2 + chunkHeight / 8 + 4;
contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, false);
if (i % 2 == 0) {
contentStream.drawImage(PDImageXObject.createFromFile(left.toUri().getPath(), document), xL, yL, chunkWidth / 2, chunkHeight);
} else {
contentStream.drawImage(PDImageXObject.createFromFile(right.toUri().getPath(), document), 0, yL, chunkWidth / 2, chunkHeight);
}
contentStream.close();
}
}

/**
* 将PDF转换为图片格式
*
* @return
*/
public PDDocument convertPdfTextToImage(String path) throws IOException {
PDDocument newPdf = new PDDocument();
document = PDDocument.load(new File(path));
PDFRenderer renderer = new PDFRenderer(document);
for (int i = 0; i < document.getNumberOfPages(); i++) {
BufferedImage image = renderer.renderImageWithDPI(i, 144);
PDPage page = new PDPage(PDRectangle.A4);
byte[] jpgs = bufferedImageToBytes(image, "jpg");
PDImageXObject xObject = PDImageXObject.createFromByteArray(newPdf, jpgs, null);
PDPageContentStream stream = new PDPageContentStream(newPdf, page);
stream.drawImage(xObject, 0, 0, PDRectangle.A4.getWidth(), PDRectangle.A4.getHeight());
stream.close();
newPdf.addPage(page);
}
return newPdf;
}

public byte[] bufferedImageToBytes(BufferedImage image, String format) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// 指定图片格式,如 "jpg", "png", "gif" 等
ImageIO.write(image, format, baos);
// 将缓冲区的数据转化为字节数组
byte[] imageInBytes = baos.toByteArray();
// 关闭输出流
baos.close();
return imageInBytes;
}

/**
* 保存为新的PDF文件
*
* @param pdf 新文件绝对路径
*/
public void save(String pdf) throws IOException {
document.save(pdf);
try (PDDocument toImage = convertPdfTextToImage(pdf)) {
toImage.save(pdf);
}
print();
}

public void close() {
try {
document.close();
} catch (IOException e) {
LoggerUtil.error("关闭文件流异常!", e);
}
}

@Data
@AllArgsConstructor
public static class Image {
String path; //图片路径
float width; //图片宽度
float height; //高度
float x; //x偏移量
float y; //y偏移量
}
}

3. 附带的图片处理工具类ImageUtil

包含切割图片、合并图片方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.List;

/**
* 公章图片处理类
*/
public class ImageUtil {

/**
* 分割图片
*
* @param image: 图片BufferedImage流
* @param rows: 分割行
* @param cols: 分割列
* @return java.awt.image.BufferedImage[] 返回分割后的图片流
**/
public static BufferedImage[] splitImage(BufferedImage image, int rows, int cols) {
// 分割成4*4(16)个小图
int chunks = rows * cols;
// 计算每个小图的宽度和高度
int chunkWidth = image.getWidth() / cols;// 向右移动3
int chunkHeight = image.getHeight() / rows;
int count = 0;
BufferedImage[] imgs = new BufferedImage[chunks];
for (int x = 0; x < rows; x++) {
for (int y = 0; y < cols; y++) {
//设置小图的大小和类型
imgs[count] = new BufferedImage(chunkWidth, chunkHeight, BufferedImage.TYPE_INT_RGB);
//写入图像内容
Graphics2D gr = imgs[count].createGraphics();
// 增加下面代码使得背景透明
imgs[count] = gr.getDeviceConfiguration().createCompatibleImage(chunkWidth, chunkHeight, Transparency.TRANSLUCENT);
gr.dispose();
gr = imgs[count].createGraphics();
gr.drawImage(image, 0, 0,
chunkWidth, chunkHeight,
chunkWidth * y, chunkHeight * x,
chunkWidth * y + chunkWidth,
chunkHeight * x + chunkHeight, null);
gr.dispose();
count++;
}
}
return imgs;
}

/**
* @param bufferedImage 图片
* @return byte[]
* @deprecated 将BufferedImage转换成字节数组
**/
public static byte[] imageToBytes(BufferedImage bufferedImage) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, "png", baos);
baos.flush();
byte[] imageInByte = baos.toByteArray();
baos.close();
return imageInByte;

}

public static byte[] mergeImageBytes(ImageMerge imageMerge, List<byte[]> bytes) {
bytes.removeIf(i -> i == null || i.length == 0);
if (CollectionUtils.isEmpty(bytes)) {
return new byte[0];
}
BufferedImage read = null;
ByteArrayInputStream in = new ByteArrayInputStream(bytes.get(0));
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
read = ImageIO.read(in);
for (int i = 1; i < bytes.size(); i++) {
ByteArrayInputStream in1 = new ByteArrayInputStream(bytes.get(i));
BufferedImage image1 = ImageIO.read(in1);
read = switch (imageMerge) {
case HORIZONTAL -> horizontal(read, image1);
case VERTICAL -> vertical(read, image1);
default -> nested(read, image1);
};
}
ImageIO.write(read, "png", out);
} catch (IOException e) {
LoggerUtil.error("read image byte error,exception:", e);
}
return out.toByteArray();
}

/**
* 合并图片
*
* @param merge 合并方式
* @param images 图片路径数组,必须保证第一张图片一定存在,否则返回空字节
* @return byte[]
*/
public static byte[] mergeImage(ImageMerge merge, List<String> images) {
byte[] bytes = new byte[0];
try {
BufferedImage image1 = ImageIO.read(new File(images.get(0)));
for (int i = 1; i < images.size(); i++) {
File file = new File(images.get(i));
if (!file.isFile()) {
continue;
}
BufferedImage image2 = ImageIO.read(file);
image1 = switch (merge) {
case HORIZONTAL -> horizontal(image1, image2);
case VERTICAL -> vertical(image1, image2);
default -> nested(image1, image2);
};
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(image1, "png", baos);
baos.flush();
return baos.toByteArray();
} catch (IOException e) {
LoggerUtil.error("read image error,exception:", e);
}
return bytes;
}


private static BufferedImage horizontal(BufferedImage image1, BufferedImage image2) {
BufferedImage image = new BufferedImage(image1.getWidth() + image2.getWidth(),
Math.max(image1.getHeight(), image2.getHeight()),
BufferedImage.TYPE_INT_ARGB);
Graphics2D g = image.createGraphics();
g.drawImage(image1, 0, 0, null);
g.drawImage(image2, image1.getWidth(), 0, null);
g.dispose();
return image;
}

private static BufferedImage vertical(BufferedImage image1, BufferedImage image2) {
BufferedImage image = new BufferedImage(Math.max(image1.getWidth(), image2.getWidth()),
image1.getHeight() + image2.getHeight(),
BufferedImage.TYPE_INT_RGB);
Graphics2D g = image.createGraphics();
g.drawImage(image1, 0, 0, null);
g.drawImage(image2, 0, image1.getHeight(), null);
g.dispose();
return image;
}

private static BufferedImage nested(BufferedImage image1, BufferedImage image2) {
BufferedImage image = new BufferedImage(Math.max(image1.getWidth(), image2.getWidth()),
Math.max(image1.getHeight(), image2.getHeight()),
BufferedImage.TYPE_INT_RGB);
Graphics2D g = image.createGraphics();
g.drawImage(image1, 0, 0, null);
g.drawImage(image2, 0, 0, null);
g.dispose();
return image;
}

public static byte[] readImage(String filePath) {
if (StringUtils.isBlank(filePath)) {
return null;
}
try {
BufferedImage read = ImageIO.read(new File(filePath));
ByteArrayOutputStream out = new ByteArrayOutputStream();
ImageIO.write(read, "png", out);
return out.toByteArray();
} catch (IOException e) {
LoggerUtil.error("读取图片失败,exception:", e);
return null;
}
}


public enum ImageMerge {
HORIZONTAL,
VERTICAL,
NESTED
}
}

4. 使用方式

1
2
3
4
5
6
7
8
9
10
11
12
13
//创建签章对象,并传入PDF路径
PdfBoxListener listener = new PdfBoxListener(path);
//骑缝章
listener.pagingSeal(seal);
//根据文字定位签章
Map<String, PdfBoxListener.Image> map = new HashMap<>();
map.put("印章1", new PdfBoxListener.Image(seal, 250, 250, -50, -60));
map.put("印章2:", new PdfBoxListener.Image(seal, 250, 250, 50, -60));
listener.coordinate(map);
//覆盖或新路径
listener.save(path);//save时默认自动转成图片格式PDF
//关闭流
listener.close();