jruqwhnt
This commit is contained in:
parent
3e81040869
commit
3412fb7cd1
@ -11,6 +11,7 @@ import com.alibaba.dashscope.common.Role;
|
|||||||
import com.alibaba.fastjson.JSON;
|
import com.alibaba.fastjson.JSON;
|
||||||
import com.heyu.api.alibaba.bailian.AlibabaOCREnums;
|
import com.heyu.api.alibaba.bailian.AlibabaOCREnums;
|
||||||
import com.heyu.api.alibaba.resp.ModelResult;
|
import com.heyu.api.alibaba.resp.ModelResult;
|
||||||
|
import com.heyu.api.data.utils.StringUtils;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
@ -94,13 +95,17 @@ public class LLMUtils {
|
|||||||
/**
|
/**
|
||||||
* https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg
|
* https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg
|
||||||
*/
|
*/
|
||||||
public static ModelResult callOcr(String image, AlibabaOCREnums alibabaOCREnums) {
|
public static ModelResult callOcr(String image,String imageBase64, AlibabaOCREnums alibabaOCREnums) {
|
||||||
ModelResult modelResult = new ModelResult();
|
ModelResult modelResult = new ModelResult();
|
||||||
try {
|
try {
|
||||||
Date startDate = new Date();
|
Date startDate = new Date();
|
||||||
MultiModalConversation conv = new MultiModalConversation();
|
MultiModalConversation conv = new MultiModalConversation();
|
||||||
Map<String, Object> map = new HashMap<>();
|
Map<String, Object> map = new HashMap<>();
|
||||||
map.put("image", image);
|
if(StringUtils.isNotEmpty(image)){
|
||||||
|
map.put("image", image);
|
||||||
|
}else{
|
||||||
|
map.put("image", "data:image/jpeg;base64," + imageBase64);
|
||||||
|
}
|
||||||
// 输入图像的最大像素阈值,超过该值图像会按原比例缩小,直到总像素低于max_pixels
|
// 输入图像的最大像素阈值,超过该值图像会按原比例缩小,直到总像素低于max_pixels
|
||||||
map.put("max_pixels", "6422528");
|
map.put("max_pixels", "6422528");
|
||||||
// 输入图像的最小像素阈值,小于该值图像会按原比例放大,直到总像素大于min_pixels
|
// 输入图像的最小像素阈值,小于该值图像会按原比例放大,直到总像素大于min_pixels
|
||||||
|
|||||||
@ -6,21 +6,18 @@ import lombok.Data;
|
|||||||
@Data
|
@Data
|
||||||
public class AIOcrRequest {
|
public class AIOcrRequest {
|
||||||
|
|
||||||
|
|
||||||
/***
|
/***
|
||||||
* 图片url
|
* 图片url
|
||||||
*/
|
*/
|
||||||
private String imageUrl;
|
private String imageUrl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// 1. advanced_recognition : 高精识别 ,
|
|
||||||
/***
|
/***
|
||||||
* 仅qwen-vl-ocr-2025-08-28模型支持,具有以下特性:
|
* base64 位的图片
|
||||||
* 识别文本内容(提取文字)
|
*
|
||||||
* 检测文本位置(定位文本行、获取坐标)
|
* 和 imageUrl 二传 1
|
||||||
*/
|
*/
|
||||||
|
private String imageBase64;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// 2. info_draw: 信息抽取
|
// 2. info_draw: 信息抽取
|
||||||
@ -32,23 +29,6 @@ public class AIOcrRequest {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
// 3. table_parsing: 表格解析
|
|
||||||
/**
|
|
||||||
* 模型会对图像中的表格元素进行解析,以带有HTML格式的文本返回识别结果。
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
// 4. document_parsing : 文档解析
|
|
||||||
/**
|
|
||||||
* 模型支持解析以图像形式存储的扫描件或PDF文档,能识别文件中的标题、摘要、标签等,以带有LaTeX格式的文本返回识别结果。
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
// 5. formula_recognition: 公式识别
|
|
||||||
/**
|
|
||||||
* 模型支持解析图像中的公式,以带有LaTeX格式的文本返回识别结果。
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
// text_recognition: 通用文字识别
|
// text_recognition: 通用文字识别
|
||||||
/**
|
/**
|
||||||
@ -56,11 +36,6 @@ public class AIOcrRequest {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// multi_lan_recognition : 多语言识别
|
|
||||||
/**
|
|
||||||
* 多语言识别适用于针对中英文之外的小语种场景,支持的小语种有:阿拉伯语、法语、德语、意大利语、日语、韩语、葡萄牙语、俄语、西班牙语、越南语,以纯文本格式返回识别结果。
|
|
||||||
*/
|
|
||||||
private String type ;
|
private String type ;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -20,7 +20,7 @@ import java.util.Map;
|
|||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@RestController
|
@RestController
|
||||||
@RequestMapping("/ai/precision/orc/")
|
@RequestMapping("/ai/orc/")
|
||||||
public class AIOcrController {
|
public class AIOcrController {
|
||||||
|
|
||||||
|
|
||||||
@ -30,27 +30,112 @@ public class AIOcrController {
|
|||||||
|
|
||||||
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
|
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
|
||||||
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
|
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
|
||||||
|
|
||||||
|
// 1. advanced_recognition : 高精识别 ,
|
||||||
|
/***
|
||||||
|
* 仅qwen-vl-ocr-2025-08-28模型支持,具有以下特性:
|
||||||
|
* 识别文本内容(提取文字)
|
||||||
|
* 检测文本位置(定位文本行、获取坐标)
|
||||||
|
*/
|
||||||
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
|
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
|
||||||
@RequestMapping("/recognize")
|
@RequestMapping("/advanced/recognition")
|
||||||
public R recognize(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
|
public R advanced_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
|
||||||
|
|
||||||
|
|
||||||
AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType());
|
AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType());
|
||||||
if(alibabaOCREnums == null){
|
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
|
||||||
|
AlibabaOCREnums.advanced_recognition);
|
||||||
return R.error("请输入正确的类型");
|
Map<String, Object> data = new HashMap<>();
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), alibabaOCREnums);
|
|
||||||
|
|
||||||
Map<String,Object> data = new HashMap<>();
|
|
||||||
data.put("content", modelResult.getResult());
|
data.put("content", modelResult.getResult());
|
||||||
|
|
||||||
return R.ok().setData(data);
|
return R.ok().setData(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
|
||||||
|
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
|
||||||
|
// 2. info_draw: 信息抽取
|
||||||
|
/***
|
||||||
|
* 模型支持对票据、证件、表单中的信息进行抽取,并以带有JSON格式的文本返回。
|
||||||
|
*
|
||||||
|
* result_schema可以是任意形式的JSON结构,最多可嵌套3层JSON 对象。您只需要填写JSON对象的key,value保持为空即可。
|
||||||
|
*/
|
||||||
|
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
|
||||||
|
@RequestMapping("/info/draw")
|
||||||
|
public R info_draw(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
|
||||||
|
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
|
||||||
|
AlibabaOCREnums.key_information_extraction);
|
||||||
|
Map<String, Object> data = new HashMap<>();
|
||||||
|
data.put("content", modelResult.getResult());
|
||||||
|
return R.ok().setData(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
|
||||||
|
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
|
||||||
|
|
||||||
|
// 3. table_parsing: 表格解析
|
||||||
|
/**
|
||||||
|
* 模型会对图像中的表格元素进行解析,以带有HTML格式的文本返回识别结果。
|
||||||
|
*/
|
||||||
|
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
|
||||||
|
@RequestMapping("/table/parsing")
|
||||||
|
public R table_parsing(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
|
||||||
|
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
|
||||||
|
AlibabaOCREnums.table_parsing);
|
||||||
|
Map<String, Object> data = new HashMap<>();
|
||||||
|
data.put("content", modelResult.getResult());
|
||||||
|
return R.ok().setData(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
|
||||||
|
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
|
||||||
|
|
||||||
|
/***
|
||||||
|
*
|
||||||
|
/ 4. document_parsing : 文档解析
|
||||||
|
/**
|
||||||
|
* 模型支持解析以图像形式存储的扫描件或PDF文档,能识别文件中的标题、摘要、标签等,以带有LaTeX格式的文本返回识别结果。
|
||||||
|
*/
|
||||||
|
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
|
||||||
|
@RequestMapping("/document/parsing")
|
||||||
|
public R document_parsing(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
|
||||||
|
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
|
||||||
|
AlibabaOCREnums.document_parsing);
|
||||||
|
Map<String, Object> data = new HashMap<>();
|
||||||
|
data.put("content", modelResult.getResult());
|
||||||
|
return R.ok().setData(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
|
||||||
|
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
|
||||||
|
// 5. formula_recognition: 公式识别
|
||||||
|
/**
|
||||||
|
* 模型支持解析图像中的公式,以带有LaTeX格式的文本返回识别结果。
|
||||||
|
*/
|
||||||
|
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
|
||||||
|
@RequestMapping("/formula/recognition")
|
||||||
|
public R formula_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
|
||||||
|
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
|
||||||
|
AlibabaOCREnums.formula_recognition);
|
||||||
|
Map<String, Object> data = new HashMap<>();
|
||||||
|
data.put("content", modelResult.getResult());
|
||||||
|
return R.ok().setData(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
|
||||||
|
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
|
||||||
|
// multi_lan_recognition : 多语言识别
|
||||||
|
/**
|
||||||
|
* 多语言识别适用于针对中英文之外的小语种场景,支持的小语种有:阿拉伯语、法语、德语、意大利语、日语、韩语、葡萄牙语、俄语、西班牙语、越南语,以纯文本格式返回识别结果。
|
||||||
|
*/
|
||||||
|
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
|
||||||
|
@RequestMapping("/multi/lan/recognition")
|
||||||
|
public R recognize(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
|
||||||
|
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(),
|
||||||
|
aiOcrRequest.getImageBase64(), AlibabaOCREnums.multi_lan);
|
||||||
|
Map<String, Object> data = new HashMap<>();
|
||||||
|
data.put("content", modelResult.getResult());
|
||||||
|
return R.ok().setData(data);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user