Merge branch 'master' into zhenzhen_version_1.0.0

This commit is contained in:
quyixiao 2025-10-07 09:58:57 +08:00
commit b6be118797
5 changed files with 100 additions and 22 deletions

View File

@ -36,7 +36,7 @@ public class LLMUtils {
//System.out.println(JSON.toJSON(modelResult));
callOcr("https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg",AlibabaOCREnums.text_recognition );
callOcr("https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg","",AlibabaOCREnums.text_recognition );
}
@ -107,7 +107,7 @@ public class LLMUtils {
map.put("image", "data:image/jpeg;base64," + imageBase64);
}
// 输入图像的最大像素阈值超过该值图像会按原比例缩小直到总像素低于max_pixels
map.put("max_pixels", "6422528");
map.put("max_pixels", "2073600");//限制图片大小 1920x1080
// 输入图像的最小像素阈值小于该值图像会按原比例放大直到总像素大于min_pixels
map.put("min_pixels", "3136");
// 开启图像自动转正功能

View File

@ -0,0 +1,72 @@
package com.heyu.api.alibaba;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.Constants;
import com.alibaba.dashscope.utils.JsonUtils;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
public class QwenImageEdit {
static {
// 以下为中国北京地域url若使用新加坡地域的模型需将url替换为https://dashscope-intl.aliyuncs.com/api/v1
Constants.baseHttpApiUrl = "https://dashscope.aliyuncs.com/api/v1";
}
// 若没有配置环境变量请用百炼 API Key 将下行替换为apiKey="sk-xxx"
// 新加坡和北京地域的API Key不同获取API Keyhttps://help.aliyun.com/zh/model-studio/get-api-key
static String apiKey = LLMUtils.apiKey;
public static void call() throws ApiException, NoApiKeyException, UploadFileException, IOException {
MultiModalConversation conv = new MultiModalConversation();
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
Collections.singletonMap("image", "https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fcbu01.alicdn.com%2Fimg%2Fibank%2F2019%2F790%2F571%2F12184175097_276213056.jpg&refer=http%3A%2F%2Fcbu01.alicdn.com&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=auto?sec=1761752391&t=9d85136483c31600c81d6dfc20595979"),
Collections.singletonMap("text", "将珍珠戴在美女模特身上")
)).build();
// 多图编辑示例
// MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
// .content(Arrays.asList(
// Collections.singletonMap("image", "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250923/znhvuj/shoes1.webp"),
// Collections.singletonMap("image", "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250923/alubtv/shoes2.webp"),
// Collections.singletonMap("text", "用图中黄色的鞋替换图中白色的鞋")
// )).build();
Map<String, Object> parameters = new HashMap<>();
parameters.put("watermark", false);
parameters.put("negative_prompt", "");
MultiModalConversationParam param = MultiModalConversationParam.builder()
.apiKey(apiKey)
.model("qwen-image-edit")
.messages(Collections.singletonList(userMessage))
.parameters(parameters)
.build();
MultiModalConversationResult result = conv.call(param);
System.out.println(JsonUtils.toJson(result));
System.out.println("输出图像的URL" + result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("image"));
}
public static void main(String[] args) {
try {
call();
} catch (ApiException | NoApiKeyException | UploadFileException | IOException e) {
System.out.println(e.getMessage());
}
}
}

View File

@ -20,23 +20,4 @@ public class AIOcrRequest {
// 2. info_draw: 信息抽取
/***
* 模型支持对票据证件表单中的信息进行抽取并以带有JSON格式的文本返回
*
* result_schema可以是任意形式的JSON结构最多可嵌套3层JSON 对象您只需要填写JSON对象的keyvalue保持为空即可
*/
// text_recognition: 通用文字识别
/**
* 通用文字识别主要用于对中英文场景以纯文本格式返回识别结果
*/
private String type ;
}

View File

@ -40,7 +40,6 @@ public class AIOcrController {
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/advanced/recognition")
public R advanced_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType());
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
AlibabaOCREnums.advanced_recognition);
Map<String, Object> data = new HashMap<>();
@ -106,6 +105,27 @@ public class AIOcrController {
}
// http://localhost:8888/ai/orc/text/recognition?imageUrl=https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
/***
*
/ 4. text_recognition : 通用文字识别
/**
* 模型支持解析以图像形式存储的扫描件或PDF文档能识别文件中的标题摘要标签等以带有LaTeX格式的文本返回识别结果
*/
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/text/recognition")
public R text_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception {
ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(),
AlibabaOCREnums.text_recognition);
Map<String, Object> data = new HashMap<>();
data.put("content", modelResult.getResult());
return R.ok().setData(data);
}
// http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232
// https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg
// 5. formula_recognition: 公式识别

View File

@ -38,6 +38,11 @@ public class GeneralBasicOcrController extends BaseController {
private BGeneralBasicHandle bGeneralBasicHandle;
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/recognize1")
public R recognize1(@RequestBody GeneralBasicOcrRequest generalBasicOcrRequest) throws Exception {
return recognize(generalBasicOcrRequest);
}
// http://localhost:8888/general/text/orc/recognize?imageBase64=3232
@EbAuthentication(tencent = ApiConstants.TENCENT_AUTH)
@RequestMapping("/recognize")