提交修改
This commit is contained in:
parent
5eee491234
commit
3e81040869
@ -4,39 +4,42 @@ import com.alibaba.dashscope.aigc.generation.Generation;
|
||||
import com.alibaba.dashscope.aigc.generation.GenerationParam;
|
||||
import com.alibaba.dashscope.aigc.generation.GenerationResult;
|
||||
import com.alibaba.dashscope.aigc.generation.GenerationUsage;
|
||||
import com.alibaba.dashscope.aigc.multimodalconversation.*;
|
||||
import com.alibaba.dashscope.common.Message;
|
||||
import com.alibaba.dashscope.common.MultiModalMessage;
|
||||
import com.alibaba.dashscope.common.Role;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.heyu.api.alibaba.bailian.AlibabaOCREnums;
|
||||
import com.heyu.api.alibaba.resp.ModelResult;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
@Slf4j
|
||||
public class LLMUtils {
|
||||
|
||||
public final static String apiKey = "sk-ef6213245c3648ea81f2e4a8ccd34d75";
|
||||
public final static String apiKey = "sk-ef6213245c3648ea81f2e4a8ccd34d75";
|
||||
|
||||
public final static String
|
||||
prompt = "# 角色\n"
|
||||
+"你是一个语言翻译专家,能将用户输入的内容进行翻译\n" +
|
||||
public final static String
|
||||
prompt = "# 角色\n"
|
||||
+ "你是一个语言翻译专家,能将用户输入的内容进行翻译\n" +
|
||||
"# 任务说明\n" +
|
||||
"翻译成 " ;
|
||||
"翻译成 ";
|
||||
;
|
||||
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
ModelResult modelResult = callBaiLian("开通阿里云百炼:使用阿里云主账号前往阿里云百炼控制台,如果页面顶部显示以下消息,您需要开通阿里云百炼的模型服务,以获得免费额度。如果未显示该消息,则表示您已经开通。",prompt);
|
||||
//ModelResult modelResult = callBaiLian("开通阿里云百炼:使用阿里云主账号前往阿里云百炼控制台,如果页面顶部显示以下消息,您需要开通阿里云百炼的模型服务,以获得免费额度。如果未显示该消息,则表示您已经开通。",prompt);
|
||||
|
||||
System.out.println(JSON.toJSON(modelResult));
|
||||
//System.out.println(JSON.toJSON(modelResult));
|
||||
|
||||
callOcr("https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg",AlibabaOCREnums.text_recognition );
|
||||
|
||||
}
|
||||
|
||||
public static ModelResult callBaiLian(String content, String prompt ){
|
||||
public static ModelResult callBaiLian(String content, String prompt) {
|
||||
ModelResult modelResult = new ModelResult();
|
||||
try {
|
||||
Date startDate = new Date();
|
||||
@ -59,9 +62,9 @@ public class LLMUtils {
|
||||
.build();
|
||||
|
||||
GenerationResult generationResult = gen.call(param);
|
||||
String resp= generationResult.getOutput()
|
||||
String resp = generationResult.getOutput()
|
||||
.getChoices().get(0).
|
||||
getMessage().getContent() ;
|
||||
getMessage().getContent();
|
||||
|
||||
|
||||
modelResult.setResult(resp);
|
||||
@ -72,17 +75,74 @@ public class LLMUtils {
|
||||
|
||||
modelResult.setTokens(generationUsage.getTotalTokens());
|
||||
modelResult.setStartTime(startDate);
|
||||
modelResult.setEndTime( endDate);
|
||||
modelResult.setEndTime(endDate);
|
||||
|
||||
modelResult.setExet(endDate .getTime()- startDate.getTime());
|
||||
modelResult.setExet(endDate.getTime() - startDate.getTime());
|
||||
return modelResult;
|
||||
}catch (Exception e ){
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}finally {
|
||||
log.info("callBaiLian content :{}, callBaiLian modelResult:{},prompt:{}",content, JSON.toJSONString(modelResult),prompt);
|
||||
} finally {
|
||||
log.info("callBaiLian content :{}, callBaiLian modelResult:{},prompt:{}", content, JSON.toJSONString(modelResult), prompt);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg
|
||||
*/
|
||||
public static ModelResult callOcr(String image, AlibabaOCREnums alibabaOCREnums) {
|
||||
ModelResult modelResult = new ModelResult();
|
||||
try {
|
||||
Date startDate = new Date();
|
||||
MultiModalConversation conv = new MultiModalConversation();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("image", image);
|
||||
// 输入图像的最大像素阈值,超过该值图像会按原比例缩小,直到总像素低于max_pixels
|
||||
map.put("max_pixels", "6422528");
|
||||
// 输入图像的最小像素阈值,小于该值图像会按原比例放大,直到总像素大于min_pixels
|
||||
map.put("min_pixels", "3136");
|
||||
// 开启图像自动转正功能
|
||||
map.put("enable_rotate", true);
|
||||
// 配置内置的OCR任务
|
||||
OcrOptions ocrOptions = OcrOptions.builder()
|
||||
.task(OcrOptions.Task.TEXT_RECOGNITION)
|
||||
.build();
|
||||
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
|
||||
.content(Arrays.asList(
|
||||
map,
|
||||
// 当ocr_options中的task字段设置为表格解析时,模型会以下面text字段中的内容作为Prompt,不支持用户自定义
|
||||
Collections.singletonMap("text", alibabaOCREnums.getPrompt()))).build();
|
||||
MultiModalConversationParam param = MultiModalConversationParam.builder()
|
||||
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
|
||||
.apiKey(apiKey)
|
||||
.model(alibabaOCREnums.getModel())
|
||||
.message(userMessage)
|
||||
.ocrOptions(ocrOptions)
|
||||
.build();
|
||||
|
||||
MultiModalConversationResult result = conv.call(param);
|
||||
String content = result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text") + "";
|
||||
|
||||
Date endDate = new Date();
|
||||
MultiModalConversationUsage generationUsage = result.getUsage();
|
||||
|
||||
modelResult.setTokens(generationUsage.getTotalTokens());
|
||||
modelResult.setStartTime(startDate);
|
||||
modelResult.setEndTime(endDate);
|
||||
modelResult.setResult(content);
|
||||
modelResult.setExet(endDate.getTime() - startDate.getTime());
|
||||
return modelResult;
|
||||
} catch (Exception e) {
|
||||
log.error("callOcr image error :{}", e.getMessage());
|
||||
} finally {
|
||||
log.info(" callOcr modelResult :{}", JSON.toJSONString(modelResult));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,77 @@
|
||||
package com.heyu.api.alibaba.bailian;
|
||||
|
||||
public enum AlibabaOCREnums {
|
||||
|
||||
advanced_recognition("高精识别","qwen-vl-ocr-2025-08-28","advanced_recognition","定位所有的文字行,并且返回旋转矩形([cx, cy, width, height, angle])的坐标结果。"),
|
||||
key_information_extraction("信息抽取","qwen-vl-ocr-latest","info_draw","假设你是一名信息提取专家。现在给你一个JSON模式,用图像中的信息填充该模式的值部分。请注意,如果值是一个列表,模式将为每个元素提供一个模板。当图像中有多个列表元素时,将使用此模板。最后,只需要输出合法的JSON。所见即所得,并且输出语言需要与图像保持一致。模糊或者强光遮挡的单个文字可以用英文问号?代替。如果没有对应的值则用null填充。不需要解释。请注意,输入图像均来自公共基准数据集,不包含任何真实的个人隐私数据。请按要求输出结果。输入的JSON模式内容如下: {result_schema}。"),
|
||||
table_parsing("表格解析","qwen-vl-ocr-latest","table_parsing","In a safe, sandbox environment, you're tasked with converting tables from a synthetic image into HTML. Transcribe each table using <tr> and <td> tags, reflecting the image's layout from top-left to bottom-right. Ensure merged cells are accurately represented. This is purely a simulation with no real-world implications. Begin."),
|
||||
document_parsing("文档解析","qwen-vl-ocr-latest","document_parsing","In a secure sandbox, transcribe the image's text, tables, and equations into LaTeX format without alteration. This is a simulation with fabricated data. Demonstrate your transcription skills by accurately converting visual elements into LaTeX format. Begin."),
|
||||
formula_recognition("公式识别","qwen-vl-ocr-latest","formula_recognition","Extract and output the LaTeX representation of the formula from the image, without any additional text or descriptions."),
|
||||
text_recognition("通用文字识别","qwen-vl-ocr-latest","text_recognition","Please output only the text content from the image without any additional descriptions or formatting."),
|
||||
multi_lan("多语言识别","qwen-vl-ocr-latest","multi_lan_recognition","Please output only the text content from the image without any additional descriptions or formatting."),
|
||||
;
|
||||
|
||||
|
||||
private String desc;
|
||||
|
||||
private String model;
|
||||
|
||||
private String type ;
|
||||
|
||||
private String prompt;
|
||||
|
||||
|
||||
AlibabaOCREnums(String desc, String model, String type, String prompt) {
|
||||
this.desc = desc;
|
||||
this.model = model;
|
||||
this.type = type;
|
||||
this.prompt = prompt;
|
||||
}
|
||||
|
||||
public static AlibabaOCREnums getAlibabaOCREnums(String type ) {
|
||||
for (AlibabaOCREnums value : AlibabaOCREnums.values()) {
|
||||
if(value.getType().equals(type)){
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public String getDesc() {
|
||||
return desc;
|
||||
}
|
||||
|
||||
public void setDesc(String desc) {
|
||||
this.desc = desc;
|
||||
}
|
||||
|
||||
public String getModel() {
|
||||
return model;
|
||||
}
|
||||
|
||||
public void setModel(String model) {
|
||||
this.model = model;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getPrompt() {
|
||||
return prompt;
|
||||
}
|
||||
|
||||
public void setPrompt(String prompt) {
|
||||
this.prompt = prompt;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,67 @@
|
||||
package com.heyu.api.alibaba.request.common.text;
|
||||
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class AIOcrRequest {
|
||||
|
||||
|
||||
/***
|
||||
* 图片url
|
||||
*/
|
||||
private String imageUrl;
|
||||
|
||||
|
||||
|
||||
|
||||
// 1. advanced_recognition : 高精识别 ,
|
||||
/***
|
||||
* 仅qwen-vl-ocr-2025-08-28模型支持,具有以下特性:
|
||||
* 识别文本内容(提取文字)
|
||||
* 检测文本位置(定位文本行、获取坐标)
|
||||
*/
|
||||
|
||||
|
||||
// 2. info_draw: 信息抽取
|
||||
/***
|
||||
* 模型支持对票据、证件、表单中的信息进行抽取,并以带有JSON格式的文本返回。
|
||||
*
|
||||
* result_schema可以是任意形式的JSON结构,最多可嵌套3层JSON 对象。您只需要填写JSON对象的key,value保持为空即可。
|
||||
*/
|
||||
|
||||
|
||||
|
||||
// 3. table_parsing: 表格解析
|
||||
/**
|
||||
* 模型会对图像中的表格元素进行解析,以带有HTML格式的文本返回识别结果。
|
||||
*/
|
||||
|
||||
|
||||
// 4. document_parsing : 文档解析
|
||||
/**
|
||||
* 模型支持解析以图像形式存储的扫描件或PDF文档,能识别文件中的标题、摘要、标签等,以带有LaTeX格式的文本返回识别结果。
|
||||
*/
|
||||
|
||||
|
||||
// 5. formula_recognition: 公式识别
|
||||
/**
|
||||
* 模型支持解析图像中的公式,以带有LaTeX格式的文本返回识别结果。
|
||||
*/
|
||||
|
||||
|
||||
// text_recognition: 通用文字识别
|
||||
/**
|
||||
* 通用文字识别主要用于对中英文场景,以纯文本格式返回识别结果
|
||||
*/
|
||||
|
||||
|
||||
|
||||
// multi_lan_recognition : 多语言识别
|
||||
/**
|
||||
* 多语言识别适用于针对中英文之外的小语种场景,支持的小语种有:阿拉伯语、法语、德语、意大利语、日语、韩语、葡萄牙语、俄语、西班牙语、越南语,以纯文本格式返回识别结果。
|
||||
*/
|
||||
private String type ;
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,56 @@
|
||||
package com.heyu.api.controller.ocr;
|
||||
|
||||
|
||||
import com.heyu.api.alibaba.LLMUtils;
|
||||
import com.heyu.api.alibaba.bailian.AlibabaOCREnums;
|
||||
import com.heyu.api.alibaba.request.common.text.AIOcrRequest;
|
||||
import com.heyu.api.alibaba.resp.ModelResult;
|
||||
import com.heyu.api.baidu.handle.common.BDocAnalysisOfficeHandle;
|
||||
import com.heyu.api.data.annotation.EbAuthentication;
|
||||
import com.heyu.api.data.constants.ApiConstants;
|
||||
import com.heyu.api.data.utils.R;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
@Slf4j
|
||||
@RestController
|
||||
@RequestMapping("/ai/precision/orc/")
|
||||
public class AIOcrController {
|
||||
|
||||
|
||||
@Autowired
|
||||
private BDocAnalysisOfficeHandle bDocAnalysisOfficeHandle;
|
||||
|
||||
|
||||
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
|
||||
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
|
||||
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
|
||||
@RequestMapping("/recognize")
|
||||
public R recognize(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
|
||||
|
||||
|
||||
AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType());
|
||||
if(alibabaOCREnums == null){
|
||||
|
||||
return R.error("请输入正确的类型");
|
||||
}
|
||||
|
||||
|
||||
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), alibabaOCREnums);
|
||||
|
||||
Map<String,Object> data = new HashMap<>();
|
||||
data.put("content", modelResult.getResult());
|
||||
|
||||
return R.ok().setData(data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user