From 3412fb7cd1721948aa93ee6eeff0b3410c4459b1 Mon Sep 17 00:00:00 2001 From: quyixiao <2621048238@qq.com> Date: Mon, 15 Sep 2025 21:04:31 +0800 Subject: [PATCH] jruqwhnt --- .../java/com/heyu/api/alibaba/LLMUtils.java | 9 +- .../request/common/text/AIOcrRequest.java | 35 +----- .../api/controller/ocr/AIOcrController.java | 115 +++++++++++++++--- 3 files changed, 112 insertions(+), 47 deletions(-) diff --git a/api-third/src/main/java/com/heyu/api/alibaba/LLMUtils.java b/api-third/src/main/java/com/heyu/api/alibaba/LLMUtils.java index 58f0439..080ab49 100644 --- a/api-third/src/main/java/com/heyu/api/alibaba/LLMUtils.java +++ b/api-third/src/main/java/com/heyu/api/alibaba/LLMUtils.java @@ -11,6 +11,7 @@ import com.alibaba.dashscope.common.Role; import com.alibaba.fastjson.JSON; import com.heyu.api.alibaba.bailian.AlibabaOCREnums; import com.heyu.api.alibaba.resp.ModelResult; +import com.heyu.api.data.utils.StringUtils; import lombok.extern.slf4j.Slf4j; import java.util.*; @@ -94,13 +95,17 @@ public class LLMUtils { /** * https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg */ - public static ModelResult callOcr(String image, AlibabaOCREnums alibabaOCREnums) { + public static ModelResult callOcr(String image,String imageBase64, AlibabaOCREnums alibabaOCREnums) { ModelResult modelResult = new ModelResult(); try { Date startDate = new Date(); MultiModalConversation conv = new MultiModalConversation(); Map map = new HashMap<>(); - map.put("image", image); + if(StringUtils.isNotEmpty(image)){ + map.put("image", image); + }else{ + map.put("image", "data:image/jpeg;base64," + imageBase64); + } // 输入图像的最大像素阈值,超过该值图像会按原比例缩小,直到总像素低于max_pixels map.put("max_pixels", "6422528"); // 输入图像的最小像素阈值,小于该值图像会按原比例放大,直到总像素大于min_pixels diff --git a/api-third/src/main/java/com/heyu/api/alibaba/request/common/text/AIOcrRequest.java b/api-third/src/main/java/com/heyu/api/alibaba/request/common/text/AIOcrRequest.java index b2f8b64..3d05b33 100644 --- a/api-third/src/main/java/com/heyu/api/alibaba/request/common/text/AIOcrRequest.java +++ b/api-third/src/main/java/com/heyu/api/alibaba/request/common/text/AIOcrRequest.java @@ -6,21 +6,18 @@ import lombok.Data; @Data public class AIOcrRequest { - /*** * 图片url */ private String imageUrl; - - - - // 1. advanced_recognition : 高精识别 , /*** - * 仅qwen-vl-ocr-2025-08-28模型支持,具有以下特性: - * 识别文本内容(提取文字) - * 检测文本位置(定位文本行、获取坐标) + * base64 位的图片 + * + * 和 imageUrl 二传 1 */ + private String imageBase64; + // 2. info_draw: 信息抽取 @@ -32,23 +29,6 @@ public class AIOcrRequest { - // 3. table_parsing: 表格解析 - /** - * 模型会对图像中的表格元素进行解析,以带有HTML格式的文本返回识别结果。 - */ - - - // 4. document_parsing : 文档解析 - /** - * 模型支持解析以图像形式存储的扫描件或PDF文档,能识别文件中的标题、摘要、标签等,以带有LaTeX格式的文本返回识别结果。 - */ - - - // 5. formula_recognition: 公式识别 - /** - * 模型支持解析图像中的公式,以带有LaTeX格式的文本返回识别结果。 - */ - // text_recognition: 通用文字识别 /** @@ -56,11 +36,6 @@ public class AIOcrRequest { */ - - // multi_lan_recognition : 多语言识别 - /** - * 多语言识别适用于针对中英文之外的小语种场景,支持的小语种有:阿拉伯语、法语、德语、意大利语、日语、韩语、葡萄牙语、俄语、西班牙语、越南语,以纯文本格式返回识别结果。 - */ private String type ; diff --git a/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/AIOcrController.java b/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/AIOcrController.java index e9c2e35..d0168ce 100644 --- a/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/AIOcrController.java +++ b/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/AIOcrController.java @@ -20,7 +20,7 @@ import java.util.Map; @Slf4j @RestController -@RequestMapping("/ai/precision/orc/") +@RequestMapping("/ai/orc/") public class AIOcrController { @@ -30,27 +30,112 @@ public class AIOcrController { // http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232 // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg + + // 1. advanced_recognition : 高精识别 , + /*** + * 仅qwen-vl-ocr-2025-08-28模型支持,具有以下特性: + * 识别文本内容(提取文字) + * 检测文本位置(定位文本行、获取坐标) + */ @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) - @RequestMapping("/recognize") - public R recognize(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { - - + @RequestMapping("/advanced/recognition") + public R advanced_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType()); - if(alibabaOCREnums == null){ - - return R.error("请输入正确的类型"); - } - - - ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), alibabaOCREnums); - - Map data = new HashMap<>(); + ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(), + AlibabaOCREnums.advanced_recognition); + Map data = new HashMap<>(); data.put("content", modelResult.getResult()); - return R.ok().setData(data); } + // http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232 + // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg + // 2. info_draw: 信息抽取 + /*** + * 模型支持对票据、证件、表单中的信息进行抽取,并以带有JSON格式的文本返回。 + * + * result_schema可以是任意形式的JSON结构,最多可嵌套3层JSON 对象。您只需要填写JSON对象的key,value保持为空即可。 + */ + @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) + @RequestMapping("/info/draw") + public R info_draw(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { + ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(), + AlibabaOCREnums.key_information_extraction); + Map data = new HashMap<>(); + data.put("content", modelResult.getResult()); + return R.ok().setData(data); + } + // http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232 + // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg + + // 3. table_parsing: 表格解析 + /** + * 模型会对图像中的表格元素进行解析,以带有HTML格式的文本返回识别结果。 + */ + @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) + @RequestMapping("/table/parsing") + public R table_parsing(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { + ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(), + AlibabaOCREnums.table_parsing); + Map data = new HashMap<>(); + data.put("content", modelResult.getResult()); + return R.ok().setData(data); + } + + + // http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232 + // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg + + /*** + * + / 4. document_parsing : 文档解析 + /** + * 模型支持解析以图像形式存储的扫描件或PDF文档,能识别文件中的标题、摘要、标签等,以带有LaTeX格式的文本返回识别结果。 + */ + @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) + @RequestMapping("/document/parsing") + public R document_parsing(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { + ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(), + AlibabaOCREnums.document_parsing); + Map data = new HashMap<>(); + data.put("content", modelResult.getResult()); + return R.ok().setData(data); + } + + + // http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232 + // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg + // 5. formula_recognition: 公式识别 + /** + * 模型支持解析图像中的公式,以带有LaTeX格式的文本返回识别结果。 + */ + @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) + @RequestMapping("/formula/recognition") + public R formula_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { + ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(), + AlibabaOCREnums.formula_recognition); + Map data = new HashMap<>(); + data.put("content", modelResult.getResult()); + return R.ok().setData(data); + } + + // http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232 + // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg + // multi_lan_recognition : 多语言识别 + /** + * 多语言识别适用于针对中英文之外的小语种场景,支持的小语种有:阿拉伯语、法语、德语、意大利语、日语、韩语、葡萄牙语、俄语、西班牙语、越南语,以纯文本格式返回识别结果。 + */ + @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) + @RequestMapping("/multi/lan/recognition") + public R recognize(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { + ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), + aiOcrRequest.getImageBase64(), AlibabaOCREnums.multi_lan); + Map data = new HashMap<>(); + data.put("content", modelResult.getResult()); + return R.ok().setData(data); + } + }