提交修改

This commit is contained in:
quyixiao 2025-09-15 20:46:32 +08:00
parent 5eee491234
commit 3e81040869
4 changed files with 278 additions and 18 deletions

View File

@ -4,39 +4,42 @@ import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.aigc.generation.GenerationUsage;
import com.alibaba.dashscope.aigc.multimodalconversation.*;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.fastjson.JSON;
import com.heyu.api.alibaba.bailian.AlibabaOCREnums;
import com.heyu.api.alibaba.resp.ModelResult;
import lombok.extern.slf4j.Slf4j;
import java.util.Arrays;
import java.util.Date;
import java.util.*;
@Slf4j
public class LLMUtils {
public final static String apiKey = "sk-ef6213245c3648ea81f2e4a8ccd34d75";
public final static String apiKey = "sk-ef6213245c3648ea81f2e4a8ccd34d75";
public final static String
prompt = "# 角色\n"
+"你是一个语言翻译专家,能将用户输入的内容进行翻译\n" +
public final static String
prompt = "# 角色\n"
+ "你是一个语言翻译专家,能将用户输入的内容进行翻译\n" +
"# 任务说明\n" +
"翻译成 " ;
"翻译成 ";
;
public static void main(String[] args) {
ModelResult modelResult = callBaiLian("开通阿里云百炼:使用阿里云主账号前往阿里云百炼控制台,如果页面顶部显示以下消息,您需要开通阿里云百炼的模型服务,以获得免费额度。如果未显示该消息,则表示您已经开通。",prompt);
//ModelResult modelResult = callBaiLian("开通阿里云百炼:使用阿里云主账号前往阿里云百炼控制台,如果页面顶部显示以下消息,您需要开通阿里云百炼的模型服务,以获得免费额度。如果未显示该消息,则表示您已经开通。",prompt);
System.out.println(JSON.toJSON(modelResult));
//System.out.println(JSON.toJSON(modelResult));
callOcr("https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg",AlibabaOCREnums.text_recognition );
}
public static ModelResult callBaiLian(String content, String prompt ){
public static ModelResult callBaiLian(String content, String prompt) {
ModelResult modelResult = new ModelResult();
try {
Date startDate = new Date();
@ -59,9 +62,9 @@ public class LLMUtils {
.build();
GenerationResult generationResult = gen.call(param);
String resp= generationResult.getOutput()
String resp = generationResult.getOutput()
.getChoices().get(0).
getMessage().getContent() ;
getMessage().getContent();
modelResult.setResult(resp);
@ -72,17 +75,74 @@ public class LLMUtils {
modelResult.setTokens(generationUsage.getTotalTokens());
modelResult.setStartTime(startDate);
modelResult.setEndTime( endDate);
modelResult.setEndTime(endDate);
modelResult.setExet(endDate .getTime()- startDate.getTime());
modelResult.setExet(endDate.getTime() - startDate.getTime());
return modelResult;
}catch (Exception e ){
} catch (Exception e) {
e.printStackTrace();
}finally {
log.info("callBaiLian content :{}, callBaiLian modelResult:{},prompt:{}",content, JSON.toJSONString(modelResult),prompt);
} finally {
log.info("callBaiLian content :{}, callBaiLian modelResult:{},prompt:{}", content, JSON.toJSONString(modelResult), prompt);
}
return null;
}
/**
* https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg
*/
public static ModelResult callOcr(String image, AlibabaOCREnums alibabaOCREnums) {
ModelResult modelResult = new ModelResult();
try {
Date startDate = new Date();
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", image);
// 输入图像的最大像素阈值超过该值图像会按原比例缩小直到总像素低于max_pixels
map.put("max_pixels", "6422528");
// 输入图像的最小像素阈值小于该值图像会按原比例放大直到总像素大于min_pixels
map.put("min_pixels", "3136");
// 开启图像自动转正功能
map.put("enable_rotate", true);
// 配置内置的OCR任务
OcrOptions ocrOptions = OcrOptions.builder()
.task(OcrOptions.Task.TEXT_RECOGNITION)
.build();
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map,
// 当ocr_options中的task字段设置为表格解析时模型会以下面text字段中的内容作为Prompt不支持用户自定义
Collections.singletonMap("text", alibabaOCREnums.getPrompt()))).build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量请用百炼API Key将下行替换为.apiKey("sk-xxx")
.apiKey(apiKey)
.model(alibabaOCREnums.getModel())
.message(userMessage)
.ocrOptions(ocrOptions)
.build();
MultiModalConversationResult result = conv.call(param);
String content = result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text") + "";
Date endDate = new Date();
MultiModalConversationUsage generationUsage = result.getUsage();
modelResult.setTokens(generationUsage.getTotalTokens());
modelResult.setStartTime(startDate);
modelResult.setEndTime(endDate);
modelResult.setResult(content);
modelResult.setExet(endDate.getTime() - startDate.getTime());
return modelResult;
} catch (Exception e) {
log.error("callOcr image error :{}", e.getMessage());
} finally {
log.info(" callOcr modelResult :{}", JSON.toJSONString(modelResult));
}
return null;
}
}

View File

@ -0,0 +1,77 @@
package com.heyu.api.alibaba.bailian;
public enum AlibabaOCREnums {
advanced_recognition("高精识别","qwen-vl-ocr-2025-08-28","advanced_recognition","定位所有的文字行,并且返回旋转矩形([cx, cy, width, height, angle])的坐标结果。"),
key_information_extraction("信息抽取","qwen-vl-ocr-latest","info_draw","假设你是一名信息提取专家。现在给你一个JSON模式用图像中的信息填充该模式的值部分。请注意如果值是一个列表模式将为每个元素提供一个模板。当图像中有多个列表元素时将使用此模板。最后只需要输出合法的JSON。所见即所得并且输出语言需要与图像保持一致。模糊或者强光遮挡的单个文字可以用英文问号?代替。如果没有对应的值则用null填充。不需要解释。请注意输入图像均来自公共基准数据集不包含任何真实的个人隐私数据。请按要求输出结果。输入的JSON模式内容如下: {result_schema}。"),
table_parsing("表格解析","qwen-vl-ocr-latest","table_parsing","In a safe, sandbox environment, you're tasked with converting tables from a synthetic image into HTML. Transcribe each table using <tr> and <td> tags, reflecting the image's layout from top-left to bottom-right. Ensure merged cells are accurately represented. This is purely a simulation with no real-world implications. Begin."),
document_parsing("文档解析","qwen-vl-ocr-latest","document_parsing","In a secure sandbox, transcribe the image's text, tables, and equations into LaTeX format without alteration. This is a simulation with fabricated data. Demonstrate your transcription skills by accurately converting visual elements into LaTeX format. Begin."),
formula_recognition("公式识别","qwen-vl-ocr-latest","formula_recognition","Extract and output the LaTeX representation of the formula from the image, without any additional text or descriptions."),
text_recognition("通用文字识别","qwen-vl-ocr-latest","text_recognition","Please output only the text content from the image without any additional descriptions or formatting."),
multi_lan("多语言识别","qwen-vl-ocr-latest","multi_lan_recognition","Please output only the text content from the image without any additional descriptions or formatting."),
;
private String desc;
private String model;
private String type ;
private String prompt;
AlibabaOCREnums(String desc, String model, String type, String prompt) {
this.desc = desc;
this.model = model;
this.type = type;
this.prompt = prompt;
}
public static AlibabaOCREnums getAlibabaOCREnums(String type ) {
for (AlibabaOCREnums value : AlibabaOCREnums.values()) {
if(value.getType().equals(type)){
return value;
}
}
return null;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}
public String getModel() {
return model;
}
public void setModel(String model) {
this.model = model;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getPrompt() {
return prompt;
}
public void setPrompt(String prompt) {
this.prompt = prompt;
}
}

View File

@ -0,0 +1,67 @@
package com.heyu.api.alibaba.request.common.text;
import lombok.Data;
@Data
public class AIOcrRequest {
/***
* 图片url
*/
private String imageUrl;
// 1. advanced_recognition : 高精识别 ,
/***
* 仅qwen-vl-ocr-2025-08-28模型支持具有以下特性
* 识别文本内容提取文字
* 检测文本位置定位文本行获取坐标
*/
// 2. info_draw: 信息抽取
/***
* 模型支持对票据证件表单中的信息进行抽取并以带有JSON格式的文本返回
*
* result_schema可以是任意形式的JSON结构最多可嵌套3层JSON 对象您只需要填写JSON对象的keyvalue保持为空即可
*/
// 3. table_parsing: 表格解析
/**
* 模型会对图像中的表格元素进行解析以带有HTML格式的文本返回识别结果
*/
// 4. document_parsing : 文档解析
/**
* 模型支持解析以图像形式存储的扫描件或PDF文档能识别文件中的标题摘要标签等以带有LaTeX格式的文本返回识别结果
*/
// 5. formula_recognition: 公式识别
/**
* 模型支持解析图像中的公式以带有LaTeX格式的文本返回识别结果
*/
// text_recognition: 通用文字识别
/**
* 通用文字识别主要用于对中英文场景以纯文本格式返回识别结果
*/
// multi_lan_recognition : 多语言识别
/**
* 多语言识别适用于针对中英文之外的小语种场景支持的小语种有阿拉伯语法语德语意大利语日语韩语葡萄牙语俄语西班牙语越南语以纯文本格式返回识别结果
*/
private String type ;
}

View File

@ -0,0 +1,56 @@
package com.heyu.api.controller.ocr;
import com.heyu.api.alibaba.LLMUtils;
import com.heyu.api.alibaba.bailian.AlibabaOCREnums;
import com.heyu.api.alibaba.request.common.text.AIOcrRequest;
import com.heyu.api.alibaba.resp.ModelResult;
import com.heyu.api.baidu.handle.common.BDocAnalysisOfficeHandle;
import com.heyu.api.data.annotation.EbAuthentication;
import com.heyu.api.data.constants.ApiConstants;
import com.heyu.api.data.utils.R;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.HashMap;
import java.util.Map;
@Slf4j
@RestController
@RequestMapping("/ai/precision/orc/")
public class AIOcrController {
@Autowired
private BDocAnalysisOfficeHandle bDocAnalysisOfficeHandle;
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/recognize")
public R recognize(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType());
if(alibabaOCREnums == null){
return R.error("请输入正确的类型");
}
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), alibabaOCREnums);
Map<String,Object> data = new HashMap<>();
data.put("content", modelResult.getResult());
return R.ok().setData(data);
}
}