package com.accounting.util; import com.alibaba.fastjson2.JSON; import com.alibaba.fastjson2.JSONArray; import com.alibaba.fastjson2.JSONObject; import java.math.BigDecimal; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class OcrAmountParser { // 金额正则表达式:匹配 ¥100.00、100.00元、100元、100.00 等格式 private static final Pattern AMOUNT_PATTERN = Pattern.compile( "[¥¥]?\\s*(\\d{1,10}(\\.\\d{1,2})?)\\s*[元]?" ); // 日期正则表达式:匹配字符串中是否含有月或日 private static final Pattern UNION_DATE_PATTERN = Pattern.compile(".*月.*日.*"); // 日期正则表达式:匹配 12月2日13:14 这样的格式 private static final Pattern DATE_PATTERN = Pattern.compile( "(\\d{1,2})月(\\d{1,2})日\\s*(\\d{1,2})[::](\\d{1,2})" ); // 商户名称关键词(常见支付平台) private static final String[] MERCHANT_KEYWORDS = { "微信支付", "支付宝", "收款", "付款", "商户", "商家", "店铺", "超市", "餐厅", "饭店" }; public static class ParseResult { private BigDecimal amount; private String merchant; private LocalDateTime date; // 改为LocalDateTime以支持时间 private BigDecimal confidence; public ParseResult(BigDecimal amount, String merchant, LocalDateTime date, BigDecimal confidence) { this.amount = amount; this.merchant = merchant; this.date = date; this.confidence = confidence; } public ParseResult() { } public void setAmount(BigDecimal amount) { this.amount = amount; } public void setMerchant(String merchant) { this.merchant = merchant; } public void setDate(LocalDateTime date) { this.date = date; } public void setConfidence(BigDecimal confidence) { this.confidence = confidence; } public BigDecimal getAmount() { return amount; } public String getMerchant() { return merchant; } public LocalDateTime getDate() { return date; } public BigDecimal getConfidence() { return confidence; } } /** * 验证字符串是否为有效日期格式 */ private static boolean isValidDate(String dateStr) { if (dateStr == null || dateStr.trim().isEmpty()) { return false; } return UNION_DATE_PATTERN.matcher(dateStr.trim()).matches(); } /** * 将字符串转换为BigDecimal * @param moneyStr 金额字符串 * @return BigDecimal对象,如果转换失败则返回null */ private static BigDecimal parseMoneyString(String moneyStr) { if (moneyStr == null || moneyStr.trim().isEmpty()) { return null; } try { // 判断正负号 String cleanStr = moneyStr.trim(); boolean isNegative = cleanStr.startsWith("-"); // 移除可能的前缀符号 (+/-) if (cleanStr.startsWith("+") || cleanStr.startsWith("-")) { cleanStr = cleanStr.substring(1); } // 使用现有的金额正则表达式匹配 Matcher matcher = AMOUNT_PATTERN.matcher(cleanStr); if (matcher.find()) { String amountStr = matcher.group(1); BigDecimal amount = new BigDecimal(amountStr); // 应用正负号 return isNegative ? amount.negate() : amount; } } catch (Exception e) { // 转换失败 e.printStackTrace(); } return null; } /** * 解析日期字符串为LocalDateTime对象 * @param dateStr 日期字符串,例如:"12月2日13:14" * @return LocalDateTime对象 */ private static LocalDateTime parseDateTimeString(String dateStr) { if (dateStr == null || dateStr.trim().isEmpty()) { return null; } try { Matcher matcher = DATE_PATTERN.matcher(dateStr.trim()); if (matcher.find()) { int month = Integer.parseInt(matcher.group(1)); int day = Integer.parseInt(matcher.group(2)); int hour = Integer.parseInt(matcher.group(3)); int minute = Integer.parseInt(matcher.group(4)); // 使用当前年份 int year = java.time.Year.now().getValue(); return LocalDateTime.of(year, month, day, hour, minute); } } catch (Exception e) { e.printStackTrace(); } return null; } /** * 解析OCR识别结果,提取金额、商户名称、日期等信息 */ public static List parse(String ocrResultJson) { try { JSONObject jsonObject = JSON.parseObject(ocrResultJson); System.out.println(); String data = jsonObject.getString("data"); if (data == null) { return List.of(new ParseResult(null, null, null, BigDecimal.ZERO)); } JSONObject dataObject = JSON.parseObject(data); String content = dataObject.getString("content"); String[] split = content.split(" "); ArrayList> signList = new ArrayList<>(); for (int i = 1; i < split.length; i++) { String currentLine = split[i].trim(); String previousLine = split[i-1].trim(); // 安全地获取 i+1, i+2, i+3 位置的值 String iPlusOne = null; String iPlusTwo = null; String iPlusThree = null; if (i + 1 < split.length) { iPlusOne = split[i + 1].trim(); } if (i + 2 < split.length) { iPlusTwo = split[i + 2].trim(); } if (i + 3 < split.length) { iPlusThree = split[i + 3].trim(); } // 检查当前行是否以+或-开头 if (currentLine.startsWith("+") || currentLine.startsWith("-")) { Map signMap = new HashMap<>(); signMap.put("money", currentLine); signMap.put("content", previousLine); // 检查 i+1, i+2, i+3 哪个是日期格式 String dateValue = null; if (i + 1 < split.length && isValidDate(iPlusOne)) { dateValue = iPlusOne; } else if (i + 2 < split.length && isValidDate(iPlusTwo)) { dateValue = iPlusTwo; } else if (i + 3 < split.length && isValidDate(iPlusThree)) { dateValue = iPlusThree; } if (dateValue != null) { signMap.put("date", dateValue); } signList.add(signMap); } } //识别完成,对识别结果进行处理 ArrayList parseList = new ArrayList<>(); signList.forEach(signMap -> { ParseResult result = new ParseResult(); if (signMap.containsKey("money")){ result.setAmount(parseMoneyString(signMap.get("money"))); } if (signMap.containsKey("content")){ result.setMerchant(parseMerchant(signMap.get("content"))); } if (signMap.containsKey("date")){ result.setDate(parseDateTimeString(signMap.get("date"))); } parseList.add(result); }); return parseList; } catch (Exception e) { throw new RuntimeException("解析过程中出错,请重试或联系管理员,报错信息:"+e); } } /** * 从OCR结果中提取文本内容 */ private static String extractContent(JSONObject data) { StringBuilder content = new StringBuilder(); // 尝试获取prism_wordsInfo字段(通用文字识别) JSONArray wordsInfo = data.getJSONArray("prism_wordsInfo"); if (wordsInfo != null) { for (int i = 0; i < wordsInfo.size(); i++) { JSONObject word = wordsInfo.getJSONObject(i); String wordStr = word.getString("word"); if (wordStr != null) { content.append(wordStr).append(" "); } } } // 如果没有prism_wordsInfo,尝试获取content字段 if (content.length() == 0) { String contentStr = data.getString("content"); if (contentStr != null) { content.append(contentStr); } } return content.toString().trim(); } /** * 解析金额 */ private static BigDecimal parseAmount(String content) { Matcher matcher = AMOUNT_PATTERN.matcher(content); // 查找所有匹配的金额,取最大的(通常是实际支付金额) BigDecimal maxAmount = null; while (matcher.find()) { String amountStr = matcher.group(1); try { BigDecimal amount = new BigDecimal(amountStr); if (maxAmount == null || amount.compareTo(maxAmount) > 0) { maxAmount = amount; } } catch (NumberFormatException e) { // 忽略解析失败的金额 } } return maxAmount; } /** * 解析日期 */ private static LocalDate parseDate(String content) { Matcher matcher = DATE_PATTERN.matcher(content); if (matcher.find()) { try { int year = Integer.parseInt(matcher.group(1)); int month = Integer.parseInt(matcher.group(2)); int day = Integer.parseInt(matcher.group(3)); return LocalDate.of(year, month, day); } catch (Exception e) { // 解析失败,返回null } } return null; } /** * 解析商户名称 */ private static String parseMerchant(String content) { // 查找包含商户关键词的行 String[] lines = content.split("\n"); for (String line : lines) { for (String keyword : MERCHANT_KEYWORDS) { if (line.contains(keyword)) { // 提取商户名称(去除关键词本身) String merchant = line.replace(keyword, "").trim(); if (!merchant.isEmpty() && merchant.length() < 50) { return merchant; } } } } // 如果没有找到,返回第一行非金额非日期的文本 for (String line : lines) { line = line.trim(); if (!line.isEmpty() && !AMOUNT_PATTERN.matcher(line).find() && !DATE_PATTERN.matcher(line).find() && line.length() < 50) { return line; } } return null; } }