diff --git a/community/document-parsers/document-parser-pdf-tables/pom.xml b/community/document-parsers/document-parser-pdf-tables/pom.xml
new file mode 100644
index 00000000..80402948
--- /dev/null
+++ b/community/document-parsers/document-parser-pdf-tables/pom.xml
@@ -0,0 +1,70 @@
+
+
+
+
+ 4.0.0
+
+
+ com.alibaba.cloud.ai
+ spring-ai-alibaba
+ ${revision}
+ ../../../pom.xml
+
+
+ document-parser-pdf-tables
+
+
+ 17
+ 17
+ UTF-8
+
+
+
+
+
+ com.alibaba.cloud.ai
+ spring-ai-alibaba-core
+ ${project.parent.version}
+
+
+
+
+ org.springframework.ai
+ spring-ai-test
+ test
+
+
+
+ fr.neolegal
+ tabula
+ 1.0.12
+
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-maven-plugin
+ ${spring-boot.version}
+
+
+
+
+
diff --git a/community/document-parsers/document-parser-pdf-tables/src/main/java/com/alibaba/cloud/ai/parser/pdf/tables/PdfTablesParser.java b/community/document-parsers/document-parser-pdf-tables/src/main/java/com/alibaba/cloud/ai/parser/pdf/tables/PdfTablesParser.java
new file mode 100644
index 00000000..3731573c
--- /dev/null
+++ b/community/document-parsers/document-parser-pdf-tables/src/main/java/com/alibaba/cloud/ai/parser/pdf/tables/PdfTablesParser.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2024-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.alibaba.cloud.ai.parser.pdf.tables;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import com.alibaba.cloud.ai.document.DocumentParser;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import technology.tabula.ObjectExtractor;
+import technology.tabula.Page;
+import technology.tabula.Table;
+import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
+
+import org.springframework.ai.document.Document;
+
+/**
+ * The purpose of this class is to extract tabular data from PDF files, compared to Apache
+ * Pdfbox. Tabula is more recognizable. tabula-java:
+ * tabula-java return
+ * List {@link Document}
+ *
+ * @author yuluo
+ * @author yuluo
+ */
+
+public class PdfTablesParser implements DocumentParser {
+
+ /**
+ * The page number of the PDF file to be parsed. Default value is 1.
+ */
+ private final Integer page;
+
+ /**
+ * The metadata of the PDF file to be parsed.
+ */
+ private final Map metadata;
+
+ public PdfTablesParser() {
+
+ this(1);
+ }
+
+ public PdfTablesParser(Integer pageNumber) {
+
+ this(pageNumber, Map.of());
+ }
+
+ public PdfTablesParser(Integer pageNumber, Map metadata) {
+
+ this.page = pageNumber;
+ this.metadata = metadata;
+ }
+
+ @Override
+ public List parse(InputStream inputStream) {
+
+ try {
+ return data2Document(parseTables(extraTableData(inputStream)));
+ }
+ catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ protected List extraTableData(InputStream in) throws Exception {
+
+ PDDocument document = PDDocument.load(in);
+
+ // check pdf files
+ int numberOfPages = document.getNumberOfPages();
+ if (numberOfPages < 0) {
+
+ throw new RuntimeException("No page found in the PDF file.");
+ }
+
+ if (page > numberOfPages) {
+
+ throw new RuntimeException("The page number is greater than the number of pages in the PDF file.");
+ }
+
+ SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
+
+ // extract page by page numbers.
+ Page extract = new ObjectExtractor(document).extract(this.page);
+
+ return sea.extract(extract);
+ }
+
+ protected List parseTables(List data) {
+
+ if (data.isEmpty()) {
+ return Collections.emptyList();
+ }
+
+ return data.stream()
+ .flatMap(table -> table.getRows()
+ .stream()
+ .map(cells -> cells.stream()
+ .map(content -> content.getText().replace("\r", "").replace("\n", " "))
+ .reduce((first, second) -> first + "|" + second)
+ .orElse("") + "|"))
+ .collect(Collectors.toList());
+ }
+
+ private List data2Document(List data) {
+
+ List documents = new ArrayList<>();
+
+ if (data.isEmpty()) {
+ return null;
+ }
+
+ for (String datum : data) {
+ Document doc = new Document(datum);
+ documents.add(addMetadata(doc));
+ }
+
+ return documents;
+ }
+
+ private Document addMetadata(Document document) {
+
+ if (metadata.isEmpty()) {
+ return document;
+ }
+
+ for (Map.Entry entry : metadata.entrySet()) {
+ document.getMetadata().put(entry.getKey(), entry.getValue());
+ }
+
+ return document;
+ }
+
+}
diff --git a/community/document-parsers/document-parser-pdf-tables/src/test/java/com/alibaba/cloud/ai/parser/pdf/tables/PdfTablesParserTests.java b/community/document-parsers/document-parser-pdf-tables/src/test/java/com/alibaba/cloud/ai/parser/pdf/tables/PdfTablesParserTests.java
new file mode 100644
index 00000000..cc20bc4c
--- /dev/null
+++ b/community/document-parsers/document-parser-pdf-tables/src/test/java/com/alibaba/cloud/ai/parser/pdf/tables/PdfTablesParserTests.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2024-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.alibaba.cloud.ai.parser.pdf.tables;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.locationtech.jts.util.Assert;
+import technology.tabula.ObjectExtractor;
+import technology.tabula.Page;
+import technology.tabula.PageIterator;
+import technology.tabula.RectangularTextContainer;
+import technology.tabula.Table;
+import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
+
+import org.springframework.ai.document.Document;
+import org.springframework.core.io.DefaultResourceLoader;
+import org.springframework.core.io.Resource;
+
+/**
+ * @author yuluo
+ * @author yuluo
+ */
+
+class PdfTablesParserTests {
+
+ private Resource resource;
+
+ private Resource resource2;
+
+ @BeforeEach
+ void setUp() {
+
+ resource = new DefaultResourceLoader().getResource("classpath:/pdf-tables.pdf");
+ resource2 = new DefaultResourceLoader().getResource("classpath:/sample1.pdf");
+
+ if (!resource.exists()) {
+ throw new RuntimeException("Resource not found: " + resource);
+ }
+ }
+
+ /**
+ * tabula-java use.
+ */
+ @Test
+ void PdfTableTest() throws IOException {
+
+ InputStream in = new FileInputStream(resource.getFile());
+ try (PDDocument document = PDDocument.load(in)) {
+ SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
+ PageIterator pi = new ObjectExtractor(document).extract();
+ while (pi.hasNext()) {
+ // iterate over the pages of the document
+ Page page = pi.next();
+ List table = sea.extract(page);
+ // iterate over the tables of the page
+ for (Table tables : table) {
+ List> rows = tables.getRows();
+ // iterate over the rows of the table
+ for (List cells : rows) {
+ // print all column-cells of the row plus linefeed
+ for (RectangularTextContainer content : cells) {
+ // Note: Cell.getText() uses \r to concat text chunk
+ String text = content.getText().replace("\r", " ");
+ System.out.print(text + "|");
+ }
+ System.out.println();
+ }
+ }
+ }
+ }
+
+ }
+
+ @Test
+ void PdfTablesParseTest() throws IOException {
+
+ String res = """
+ |name|age|sex|
+ |zhangsan|20|m|
+ |lisi|21|w|
+ |wangwu|22|m|
+ |zhangliu|23|w|
+ |songqi|24|w|
+ """;
+
+ InputStream in = new FileInputStream(resource.getFile());
+ PdfTablesParser pdfTablesParser = new PdfTablesParser();
+ List docs = pdfTablesParser.parse(in);
+
+ StringBuilder sb = new StringBuilder();
+ docs.subList(1, docs.size()).forEach(doc -> sb.append(doc.getText() + "\n"));
+
+ Assert.equals(res, sb.toString());
+ }
+
+ @Test
+ void PdfTablesParseTest2() throws IOException {
+
+ String res = """
+ Sample Date:|May 2001|
+ Prepared by:|Accelio Present Applied Technology|
+ Created and Tested Using:|•Accelio Present Central 5.4•Accelio Present Output Designer 5.4|
+ Features Demonstrated:|•Primary bookmarks in a PDF file.•Secondary bookmarks in a PDF file.|
+ """;
+
+ InputStream in = new FileInputStream(resource2.getFile());
+ PdfTablesParser pdfTablesParser = new PdfTablesParser();
+ List docs = pdfTablesParser.parse(in);
+
+ StringBuilder sb = new StringBuilder();
+ docs.forEach(doc -> sb.append(doc.getText() + "\n"));
+
+ Assert.equals(res, sb.toString());
+
+ }
+
+ @Test
+ void PdfTablesParseTest3() throws IOException {
+
+ String res = """
+ |Filename|||escription|escription||
+ |||||||
+ ap_bookmark.IFD|The template design.||||||
+ ap_bookmark.mdf|The template targeted for PDF output.||||||
+ ap_bookmark.dat|A sample data file in DAT format.||||||
+ ap_bookmark.bmk|A sample bookmark file.||||||
+ ap_bookmark.pdf|Sample PDF output.||||||
+ ap_bookmark_doc.pdf|A document describing the sample.||||||
+ |To bookmark by|Use the command line parameter||
+ |Invoices|-abmkap_bookmark.bmk -abmsinvoices||
+ |Type|-abmkap_bookmark.bmk -abmstype||
+ |Amount|-abmkap_bookmark.bmk -abmsamount||
+ """;
+
+ InputStream in = new FileInputStream(resource2.getFile());
+ PdfTablesParser pdfTablesParser = new PdfTablesParser(3);
+ List docs = pdfTablesParser.parse(in);
+
+ StringBuilder sb = new StringBuilder();
+ docs.forEach(doc -> sb.append(doc.getText() + "\n"));
+
+ Assert.equals(res, sb.toString());
+
+ }
+
+}
diff --git a/community/document-parsers/document-parser-pdf-tables/src/test/resources/pdf-tables.pdf b/community/document-parsers/document-parser-pdf-tables/src/test/resources/pdf-tables.pdf
new file mode 100644
index 00000000..830880f2
Binary files /dev/null and b/community/document-parsers/document-parser-pdf-tables/src/test/resources/pdf-tables.pdf differ
diff --git a/community/document-parsers/document-parser-pdf-tables/src/test/resources/sample1.pdf b/community/document-parsers/document-parser-pdf-tables/src/test/resources/sample1.pdf
new file mode 100644
index 00000000..8efd05c3
Binary files /dev/null and b/community/document-parsers/document-parser-pdf-tables/src/test/resources/sample1.pdf differ
diff --git a/community/document-readers/github-document-reader/pom.xml b/community/document-readers/github-document-reader/pom.xml
index 3b9e3fba..759fddd3 100644
--- a/community/document-readers/github-document-reader/pom.xml
+++ b/community/document-readers/github-document-reader/pom.xml
@@ -74,6 +74,7 @@
org.springframework.boot
spring-boot-maven-plugin
+ ${spring-boot.version}
org.apache.maven.plugins
diff --git a/community/document-readers/yuque-document-reader/pom.xml b/community/document-readers/yuque-document-reader/pom.xml
index 0fc0388a..8d978ab7 100644
--- a/community/document-readers/yuque-document-reader/pom.xml
+++ b/community/document-readers/yuque-document-reader/pom.xml
@@ -74,6 +74,7 @@
org.springframework.boot
spring-boot-maven-plugin
+ ${spring-boot.version}
org.apache.maven.plugins
diff --git a/pom.xml b/pom.xml
index b14bad14..d87a0b22 100644
--- a/pom.xml
+++ b/pom.xml
@@ -69,6 +69,7 @@
community/document-parsers/document-parser-apache-pdfbox
community/document-parsers/document-parser-markdown
community/document-parsers/document-parser-tika
+ community/document-parsers/document-parser-pdf-tables