This commit is contained in:
quyixiao 2025-09-15 21:04:31 +08:00
parent 3e81040869
commit 3412fb7cd1
3 changed files with 112 additions and 47 deletions

View File

@ -11,6 +11,7 @@ import com.alibaba.dashscope.common.Role;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.heyu.api.alibaba.bailian.AlibabaOCREnums; import com.heyu.api.alibaba.bailian.AlibabaOCREnums;
import com.heyu.api.alibaba.resp.ModelResult; import com.heyu.api.alibaba.resp.ModelResult;
import com.heyu.api.data.utils.StringUtils;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import java.util.*; import java.util.*;
@ -94,13 +95,17 @@ public class LLMUtils {
/** /**
* https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg * https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg
*/ */
public static ModelResult callOcr(String image, AlibabaOCREnums alibabaOCREnums) { public static ModelResult callOcr(String image,String imageBase64, AlibabaOCREnums alibabaOCREnums) {
ModelResult modelResult = new ModelResult(); ModelResult modelResult = new ModelResult();
try { try {
Date startDate = new Date(); Date startDate = new Date();
MultiModalConversation conv = new MultiModalConversation(); MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>(); Map<String, Object> map = new HashMap<>();
map.put("image", image); if(StringUtils.isNotEmpty(image)){
map.put("image", image);
}else{
map.put("image", "data:image/jpeg;base64," + imageBase64);
}
// 输入图像的最大像素阈值超过该值图像会按原比例缩小直到总像素低于max_pixels // 输入图像的最大像素阈值超过该值图像会按原比例缩小直到总像素低于max_pixels
map.put("max_pixels", "6422528"); map.put("max_pixels", "6422528");
// 输入图像的最小像素阈值小于该值图像会按原比例放大直到总像素大于min_pixels // 输入图像的最小像素阈值小于该值图像会按原比例放大直到总像素大于min_pixels

View File

@ -6,21 +6,18 @@ import lombok.Data;
@Data @Data
public class AIOcrRequest { public class AIOcrRequest {
/*** /***
* 图片url * 图片url
*/ */
private String imageUrl; private String imageUrl;
// 1. advanced_recognition : 高精识别 ,
/*** /***
* 仅qwen-vl-ocr-2025-08-28模型支持具有以下特性 * base64 位的图片
* 识别文本内容提取文字 *
* 检测文本位置定位文本行获取坐标 * imageUrl 二传 1
*/ */
private String imageBase64;
// 2. info_draw: 信息抽取 // 2. info_draw: 信息抽取
@ -32,23 +29,6 @@ public class AIOcrRequest {
// 3. table_parsing: 表格解析
/**
* 模型会对图像中的表格元素进行解析以带有HTML格式的文本返回识别结果
*/
// 4. document_parsing : 文档解析
/**
* 模型支持解析以图像形式存储的扫描件或PDF文档能识别文件中的标题摘要标签等以带有LaTeX格式的文本返回识别结果
*/
// 5. formula_recognition: 公式识别
/**
* 模型支持解析图像中的公式以带有LaTeX格式的文本返回识别结果
*/
// text_recognition: 通用文字识别 // text_recognition: 通用文字识别
/** /**
@ -56,11 +36,6 @@ public class AIOcrRequest {
*/ */
// multi_lan_recognition : 多语言识别
/**
* 多语言识别适用于针对中英文之外的小语种场景支持的小语种有阿拉伯语法语德语意大利语日语韩语葡萄牙语俄语西班牙语越南语以纯文本格式返回识别结果
*/
private String type ; private String type ;

View File

@ -20,7 +20,7 @@ import java.util.Map;
@Slf4j @Slf4j
@RestController @RestController
@RequestMapping("/ai/precision/orc/") @RequestMapping("/ai/orc/")
public class AIOcrController { public class AIOcrController {
@ -30,27 +30,112 @@ public class AIOcrController {
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232 // http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
// 1. advanced_recognition : 高精识别 ,
/***
* 仅qwen-vl-ocr-2025-08-28模型支持具有以下特性
* 识别文本内容提取文字
* 检测文本位置定位文本行获取坐标
*/
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/recognize") @RequestMapping("/advanced/recognition")
public R recognize(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { public R advanced_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType()); AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType());
if(alibabaOCREnums == null){ ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
AlibabaOCREnums.advanced_recognition);
return R.error("请输入正确的类型"); Map<String, Object> data = new HashMap<>();
}
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), alibabaOCREnums);
Map<String,Object> data = new HashMap<>();
data.put("content", modelResult.getResult()); data.put("content", modelResult.getResult());
return R.ok().setData(data); return R.ok().setData(data);
} }
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
// 2. info_draw: 信息抽取
/***
* 模型支持对票据证件表单中的信息进行抽取并以带有JSON格式的文本返回
*
* result_schema可以是任意形式的JSON结构最多可嵌套3层JSON 对象您只需要填写JSON对象的keyvalue保持为空即可
*/
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/info/draw")
public R info_draw(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
AlibabaOCREnums.key_information_extraction);
Map<String, Object> data = new HashMap<>();
data.put("content", modelResult.getResult());
return R.ok().setData(data);
}
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
// 3. table_parsing: 表格解析
/**
* 模型会对图像中的表格元素进行解析以带有HTML格式的文本返回识别结果
*/
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/table/parsing")
public R table_parsing(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
AlibabaOCREnums.table_parsing);
Map<String, Object> data = new HashMap<>();
data.put("content", modelResult.getResult());
return R.ok().setData(data);
}
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
/***
*
/ 4. document_parsing : 文档解析
/**
* 模型支持解析以图像形式存储的扫描件或PDF文档能识别文件中的标题摘要标签等以带有LaTeX格式的文本返回识别结果
*/
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/document/parsing")
public R document_parsing(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
AlibabaOCREnums.document_parsing);
Map<String, Object> data = new HashMap<>();
data.put("content", modelResult.getResult());
return R.ok().setData(data);
}
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
// 5. formula_recognition: 公式识别
/**
* 模型支持解析图像中的公式以带有LaTeX格式的文本返回识别结果
*/
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/formula/recognition")
public R formula_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
AlibabaOCREnums.formula_recognition);
Map<String, Object> data = new HashMap<>();
data.put("content", modelResult.getResult());
return R.ok().setData(data);
}
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
// multi_lan_recognition : 多语言识别
/**
* 多语言识别适用于针对中英文之外的小语种场景支持的小语种有阿拉伯语法语德语意大利语日语韩语葡萄牙语俄语西班牙语越南语以纯文本格式返回识别结果
*/
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/multi/lan/recognition")
public R recognize(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(),
aiOcrRequest.getImageBase64(), AlibabaOCREnums.multi_lan);
Map<String, Object> data = new HashMap<>();
data.put("content", modelResult.getResult());
return R.ok().setData(data);
}
} }