diff --git a/api-third/src/main/java/com/heyu/api/alibaba/LLMUtils.java b/api-third/src/main/java/com/heyu/api/alibaba/LLMUtils.java index 080ab49..96d3cda 100644 --- a/api-third/src/main/java/com/heyu/api/alibaba/LLMUtils.java +++ b/api-third/src/main/java/com/heyu/api/alibaba/LLMUtils.java @@ -36,7 +36,7 @@ public class LLMUtils { //System.out.println(JSON.toJSON(modelResult)); - callOcr("https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg",AlibabaOCREnums.text_recognition ); + callOcr("https://heyuoss.oss-cn-shanghai.aliyuncs.com/test/ccc.jpg","",AlibabaOCREnums.text_recognition ); } @@ -107,7 +107,7 @@ public class LLMUtils { map.put("image", "data:image/jpeg;base64," + imageBase64); } // 输入图像的最大像素阈值,超过该值图像会按原比例缩小,直到总像素低于max_pixels - map.put("max_pixels", "6422528"); + map.put("max_pixels", "2073600");//限制图片大小 1920x1080 // 输入图像的最小像素阈值,小于该值图像会按原比例放大,直到总像素大于min_pixels map.put("min_pixels", "3136"); // 开启图像自动转正功能 diff --git a/api-third/src/main/java/com/heyu/api/alibaba/QwenImageEdit.java b/api-third/src/main/java/com/heyu/api/alibaba/QwenImageEdit.java new file mode 100644 index 0000000..f91e61d --- /dev/null +++ b/api-third/src/main/java/com/heyu/api/alibaba/QwenImageEdit.java @@ -0,0 +1,72 @@ +package com.heyu.api.alibaba; + +import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation; +import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam; +import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult; +import com.alibaba.dashscope.common.MultiModalMessage; +import com.alibaba.dashscope.common.Role; +import com.alibaba.dashscope.exception.ApiException; +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.alibaba.dashscope.exception.UploadFileException; +import com.alibaba.dashscope.utils.Constants; +import com.alibaba.dashscope.utils.JsonUtils; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class QwenImageEdit { + + static { + // 以下为中国(北京)地域url,若使用新加坡地域的模型,需将url替换为:https://dashscope-intl.aliyuncs.com/api/v1 + Constants.baseHttpApiUrl = "https://dashscope.aliyuncs.com/api/v1"; + } + + // 若没有配置环境变量,请用百炼 API Key 将下行替换为:apiKey="sk-xxx" + // 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key + static String apiKey = LLMUtils.apiKey; + + public static void call() throws ApiException, NoApiKeyException, UploadFileException, IOException { + + MultiModalConversation conv = new MultiModalConversation(); + + MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue()) + .content(Arrays.asList( + Collections.singletonMap("image", "https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fcbu01.alicdn.com%2Fimg%2Fibank%2F2019%2F790%2F571%2F12184175097_276213056.jpg&refer=http%3A%2F%2Fcbu01.alicdn.com&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=auto?sec=1761752391&t=9d85136483c31600c81d6dfc20595979"), + Collections.singletonMap("text", "将珍珠戴在美女模特身上") + )).build(); + + // 多图编辑示例 + // MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue()) + // .content(Arrays.asList( + // Collections.singletonMap("image", "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250923/znhvuj/shoes1.webp"), + // Collections.singletonMap("image", "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250923/alubtv/shoes2.webp"), + // Collections.singletonMap("text", "用图中黄色的鞋替换图中白色的鞋") + // )).build(); + + Map parameters = new HashMap<>(); + parameters.put("watermark", false); + parameters.put("negative_prompt", ""); + + MultiModalConversationParam param = MultiModalConversationParam.builder() + .apiKey(apiKey) + .model("qwen-image-edit") + .messages(Collections.singletonList(userMessage)) + .parameters(parameters) + .build(); + + MultiModalConversationResult result = conv.call(param); + System.out.println(JsonUtils.toJson(result)); + System.out.println("输出图像的URL:" + result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("image")); + } + + public static void main(String[] args) { + try { + call(); + } catch (ApiException | NoApiKeyException | UploadFileException | IOException e) { + System.out.println(e.getMessage()); + } + } +} \ No newline at end of file diff --git a/api-third/src/main/java/com/heyu/api/alibaba/request/common/text/AIOcrRequest.java b/api-third/src/main/java/com/heyu/api/alibaba/request/common/text/AIOcrRequest.java index 3d05b33..53c6b03 100644 --- a/api-third/src/main/java/com/heyu/api/alibaba/request/common/text/AIOcrRequest.java +++ b/api-third/src/main/java/com/heyu/api/alibaba/request/common/text/AIOcrRequest.java @@ -20,23 +20,4 @@ public class AIOcrRequest { - // 2. info_draw: 信息抽取 - /*** - * 模型支持对票据、证件、表单中的信息进行抽取,并以带有JSON格式的文本返回。 - * - * result_schema可以是任意形式的JSON结构,最多可嵌套3层JSON 对象。您只需要填写JSON对象的key,value保持为空即可。 - */ - - - - - // text_recognition: 通用文字识别 - /** - * 通用文字识别主要用于对中英文场景,以纯文本格式返回识别结果 - */ - - - private String type ; - - } diff --git a/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/AIOcrController.java b/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/AIOcrController.java index d0168ce..20764c2 100644 --- a/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/AIOcrController.java +++ b/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/AIOcrController.java @@ -40,7 +40,6 @@ public class AIOcrController { @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) @RequestMapping("/advanced/recognition") public R advanced_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { - AlibabaOCREnums alibabaOCREnums = AlibabaOCREnums.getAlibabaOCREnums(aiOcrRequest.getType()); ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(), AlibabaOCREnums.advanced_recognition); Map data = new HashMap<>(); @@ -106,6 +105,27 @@ public class AIOcrController { } + // http://localhost:8888/ai/orc/text/recognition?imageUrl=https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg + // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg + + /*** + * + / 4. text_recognition : 通用文字识别 + /** + * 模型支持解析以图像形式存储的扫描件或PDF文档,能识别文件中的标题、摘要、标签等,以带有LaTeX格式的文本返回识别结果。 + */ + @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) + @RequestMapping("/text/recognition") + public R text_recognition(@RequestBody AIOcrRequest aiOcrRequest) throws Exception { + ModelResult modelResult = LLMUtils.callOcr(aiOcrRequest.getImageUrl(), aiOcrRequest.getImageBase64(), + AlibabaOCREnums.text_recognition); + Map data = new HashMap<>(); + data.put("content", modelResult.getResult()); + return R.ok().setData(data); + } + + + // http://localhost:8888/ai/precision/orc/recognize?imageBase64=3232 // https://heyuoss.oss-cn-shanghai.aliyuncs.com/prd/testxxx.jpg // 5. formula_recognition: 公式识别 diff --git a/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/GeneralBasicOcrController.java b/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/GeneralBasicOcrController.java index dddc7d9..8413c8d 100644 --- a/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/GeneralBasicOcrController.java +++ b/api-web/api-interface/src/main/java/com/heyu/api/controller/ocr/GeneralBasicOcrController.java @@ -38,6 +38,11 @@ public class GeneralBasicOcrController extends BaseController { private BGeneralBasicHandle bGeneralBasicHandle; + @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) + @RequestMapping("/recognize1") + public R recognize1(@RequestBody GeneralBasicOcrRequest generalBasicOcrRequest) throws Exception { + return recognize(generalBasicOcrRequest); + } // http://localhost:8888/general/text/orc/recognize?imageBase64=3232 @EbAuthentication(tencent = ApiConstants.TENCENT_AUTH) @RequestMapping("/recognize")