处理韦总地址结构化代码
parent
1b0876d735
commit
3de061a843
|
|
@ -2,14 +2,18 @@ package org.dromara.extract.controller;
|
|||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.dromara.common.core.domain.R;
|
||||
import org.dromara.common.core.utils.StringUtils;
|
||||
import org.dromara.common.web.core.BaseController;
|
||||
import org.dromara.extract.service.impl.DataMigrationService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -52,6 +56,41 @@ public class AddressController extends BaseController {
|
|||
return ResponseEntity.ok("✅ 迁移任务已提交,正在后台执行...");
|
||||
}
|
||||
|
||||
|
||||
|
||||
@GetMapping("/startsh")
|
||||
public ResponseEntity<String> startshMigration() {
|
||||
try {
|
||||
log.info("🚀 开始执行涉黄数据迁移任务...");
|
||||
dataMigrationService.startShMigration();
|
||||
} catch (Exception e) {
|
||||
log.error("❌ 迁移任务执行异常", e);
|
||||
} finally {
|
||||
isRunning = false;
|
||||
}
|
||||
return ResponseEntity.ok("✅ 迁移任务已提交,正在后台执行...");
|
||||
}
|
||||
|
||||
|
||||
@GetMapping("/startsd")
|
||||
public ResponseEntity<String> startsdMigration() {
|
||||
|
||||
try {
|
||||
log.info("🚀 开始执行涉赌数据迁移任务...");
|
||||
dataMigrationService.startSdMigration();
|
||||
} catch (Exception e) {
|
||||
log.error("❌ 迁移任务执行异常", e);
|
||||
} finally {
|
||||
isRunning = false;
|
||||
}
|
||||
|
||||
return ResponseEntity.ok("✅ 迁移任务已提交,正在后台执行...");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@GetMapping("/status")
|
||||
public ResponseEntity<Map<String, Object>> getStatus() {
|
||||
Map<String, Object> status = new HashMap<>();
|
||||
|
|
@ -60,17 +99,6 @@ public class AddressController extends BaseController {
|
|||
return ResponseEntity.ok(status);
|
||||
}
|
||||
|
||||
@GetMapping("/stop")
|
||||
public ResponseEntity<String> stopMigration() {
|
||||
// 注意:当前线程池没有实现优雅关闭
|
||||
// 这里只是一个提示,实际需要在 Service 中支持中断
|
||||
if (!isRunning) {
|
||||
return ResponseEntity.ok("任务未运行,无需停止");
|
||||
}
|
||||
|
||||
// ⚠️ 提示用户:无法强制停止,建议手动关闭应用或增强 Service
|
||||
return ResponseEntity.ok("⚠️ 停止功能暂不支持。请重启应用或增强线程控制逻辑。");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,354 @@
|
|||
package org.dromara.extract.controller;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.dromara.extract.service.impl.LargeExcelAddressParseService;
|
||||
import org.dromara.extract.util.ExcelProcessTask;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ByteArrayResource;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Files;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* Excel地址解析控制器 - 大文件并发处理
|
||||
* 支持多请求并发处理,保留预览50条功能
|
||||
*/
|
||||
|
||||
@Slf4j
|
||||
@RestController
|
||||
@RequestMapping("/api/excel")
|
||||
public class ExcelPreviewController {
|
||||
|
||||
@Autowired
|
||||
private LargeExcelAddressParseService excelService;
|
||||
|
||||
/**
|
||||
* 上传并预览处理(前50行) - 核心功能
|
||||
*/
|
||||
@PostMapping("/upload-preview")
|
||||
public ResponseEntity<?> uploadAndPreview(@RequestParam("file") MultipartFile file) {
|
||||
try {
|
||||
validateLargeExcelFile(file);
|
||||
|
||||
String requestId = UUID.randomUUID().toString();
|
||||
log.info("收到大文件预览请求 [{}]: {},大小: {} MB",
|
||||
requestId, file.getOriginalFilename(),
|
||||
file.getSize() / (1024.0 * 1024.0));
|
||||
|
||||
// 异步处理预览50行
|
||||
CompletableFuture.supplyAsync(() -> {
|
||||
return excelService.processLargeExcelFile(file, requestId, true); // previewOnly=true
|
||||
});
|
||||
|
||||
// 立即返回任务信息,用户可轮询状态
|
||||
Map<String, Object> response = createPreviewResponse(requestId, file);
|
||||
|
||||
return ResponseEntity.accepted().body(response);
|
||||
|
||||
} catch (IllegalArgumentException e) {
|
||||
return ResponseEntity.badRequest().body(createErrorResponse(e.getMessage()));
|
||||
} catch (Exception e) {
|
||||
log.error("大文件预览请求失败", e);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
|
||||
.body(createErrorResponse("预览处理失败: " + e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 完整处理(一次性处理所有行,跳过预览)
|
||||
*/
|
||||
@PostMapping("/upload-full")
|
||||
public ResponseEntity<?> uploadAndFullProcess(@RequestParam("file") MultipartFile file) {
|
||||
try {
|
||||
validateLargeExcelFile(file);
|
||||
|
||||
String requestId = UUID.randomUUID().toString();
|
||||
log.info("收到大文件完整处理请求 [{}]: {},大小: {} MB",
|
||||
requestId, file.getOriginalFilename(),
|
||||
file.getSize() / (1024.0 * 1024.0));
|
||||
|
||||
// 异步完整处理
|
||||
CompletableFuture.supplyAsync(() -> {
|
||||
return excelService.processLargeExcelFile(file, requestId, false); // previewOnly=false
|
||||
});
|
||||
|
||||
Map<String, Object> response = new HashMap<>();
|
||||
response.put("requestId", requestId);
|
||||
response.put("filename", file.getOriginalFilename());
|
||||
response.put("fileSize", formatFileSize(file.getSize()));
|
||||
response.put("status", "FULL_PROCESSING");
|
||||
response.put("message", "正在完整处理文件...");
|
||||
response.put("estimatedTime", estimateFullProcessTime(file.getSize()));
|
||||
response.put("statusUrl", "/api/excel/status/" + requestId);
|
||||
response.put("isPreview", false);
|
||||
|
||||
return ResponseEntity.accepted().body(response);
|
||||
|
||||
} catch (IllegalArgumentException e) {
|
||||
return ResponseEntity.badRequest().body(createErrorResponse(e.getMessage()));
|
||||
} catch (Exception e) {
|
||||
log.error("大文件完整处理请求失败", e);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
|
||||
.body(createErrorResponse("完整处理失败: " + e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 下载完整处理文件
|
||||
*/
|
||||
@GetMapping("/download/{requestId}")
|
||||
public ResponseEntity<Resource> downloadFile(@PathVariable String requestId) {
|
||||
try {
|
||||
// 获取任务信息
|
||||
ExcelProcessTask task = excelService.getTaskStatus(requestId);
|
||||
if (task == null) {
|
||||
return ResponseEntity.status(HttpStatus.NOT_FOUND).build();
|
||||
}
|
||||
|
||||
// 从任务信息中获取文件路径
|
||||
// 注意:LargeExcelAddressParseService需要提供获取输出文件的方法
|
||||
File outputFile = getOutputFileFromTask(task);
|
||||
|
||||
if (outputFile == null || !outputFile.exists()) {
|
||||
log.warn("输出文件不存在 [{}]", requestId);
|
||||
return ResponseEntity.status(HttpStatus.NOT_FOUND).build();
|
||||
}
|
||||
|
||||
byte[] content = Files.readAllBytes(outputFile.toPath());
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
|
||||
headers.setContentDispositionFormData("attachment",
|
||||
"processed_" + task.getFilename());
|
||||
headers.setContentLength(content.length);
|
||||
|
||||
if (task.isPreviewMode()) {
|
||||
headers.add("X-Processing-Type", "preview-and-continue");
|
||||
headers.add("X-Preview-Completed", String.valueOf(task.getPreviewCompletedRows()));
|
||||
} else {
|
||||
headers.add("X-Processing-Type", "full-process");
|
||||
}
|
||||
|
||||
return new ResponseEntity<>(new ByteArrayResource(content), headers, HttpStatus.OK);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("下载文件失败 [{}]", requestId, e);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 取消任务
|
||||
*/
|
||||
@PostMapping("/cancel/{requestId}")
|
||||
public ResponseEntity<?> cancelTask(@PathVariable String requestId) {
|
||||
try {
|
||||
// 这里需要服务层支持取消任务的功能
|
||||
// excelService.cancelTask(requestId);
|
||||
|
||||
Map<String, Object> response = new HashMap<>();
|
||||
response.put("requestId", requestId);
|
||||
response.put("status", "CANCELLED");
|
||||
response.put("message", "任务取消请求已提交");
|
||||
response.put("timestamp", System.currentTimeMillis());
|
||||
|
||||
return ResponseEntity.ok(response);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("取消任务失败 [{}]", requestId, e);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
|
||||
.body(createErrorResponse("取消任务失败: " + e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理过期任务(管理员接口)
|
||||
*/
|
||||
@PostMapping("/admin/cleanup")
|
||||
public ResponseEntity<?> cleanupOldTasks(@RequestHeader(value = "X-Admin-Token", required = false) String adminToken) {
|
||||
try {
|
||||
// 简单的管理员验证
|
||||
if (!"your-admin-token".equals(adminToken)) {
|
||||
return ResponseEntity.status(HttpStatus.UNAUTHORIZED)
|
||||
.body(createErrorResponse("未授权"));
|
||||
}
|
||||
|
||||
// 获取清理前的任务数量
|
||||
int beforeCount = excelService.getAllActiveTasks().size();
|
||||
|
||||
// 清理操作
|
||||
// excelService.cleanupExpiredTasks();
|
||||
|
||||
Map<String, Object> response = new HashMap<>();
|
||||
response.put("action", "cleanup");
|
||||
response.put("beforeCount", beforeCount);
|
||||
response.put("afterCount", excelService.getAllActiveTasks().size());
|
||||
response.put("timestamp", System.currentTimeMillis());
|
||||
|
||||
log.info("管理员清理过期任务,清理前: {},清理后: {}", beforeCount, response.get("afterCount"));
|
||||
|
||||
return ResponseEntity.ok(response);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("清理任务失败", e);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
|
||||
.body(createErrorResponse("清理失败: " + e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
// ============ 辅助方法 ============
|
||||
|
||||
/**
|
||||
* 创建预览响应
|
||||
*/
|
||||
private Map<String, Object> createPreviewResponse(String requestId, MultipartFile file) {
|
||||
Map<String, Object> response = new HashMap<>();
|
||||
response.put("requestId", requestId);
|
||||
response.put("filename", file.getOriginalFilename());
|
||||
response.put("fileSize", formatFileSize(file.getSize()));
|
||||
response.put("status", "PREVIEW_PROCESSING");
|
||||
response.put("message", "正在处理前50行预览...");
|
||||
response.put("previewRows", 50); // 明确显示预览行数
|
||||
response.put("previewStatusUrl", "/api/excel/preview/status/" + requestId);
|
||||
response.put("fullStatusUrl", "/api/excel/status/" + requestId);
|
||||
response.put("estimatedTime", estimatePreviewTime(file.getSize()));
|
||||
response.put("isPreview", true);
|
||||
response.put("timestamp", System.currentTimeMillis());
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 创建错误响应
|
||||
*/
|
||||
private Map<String, Object> createErrorResponse(String error) {
|
||||
Map<String, Object> response = new HashMap<>();
|
||||
response.put("error", error);
|
||||
response.put("timestamp", System.currentTimeMillis());
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证大文件
|
||||
*/
|
||||
private void validateLargeExcelFile(MultipartFile file) {
|
||||
if (file.isEmpty()) {
|
||||
throw new IllegalArgumentException("文件不能为空");
|
||||
}
|
||||
|
||||
String filename = file.getOriginalFilename();
|
||||
if (filename == null) {
|
||||
throw new IllegalArgumentException("文件名不能为空");
|
||||
}
|
||||
|
||||
if (!filename.toLowerCase().endsWith(".xlsx") &&
|
||||
!filename.toLowerCase().endsWith(".xls")) {
|
||||
throw new IllegalArgumentException("只支持Excel文件(.xlsx, .xls)");
|
||||
}
|
||||
|
||||
// 大文件大小限制(100MB)
|
||||
if (file.getSize() > 100 * 1024 * 1024) {
|
||||
throw new IllegalArgumentException("文件大小不能超过100MB");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化文件大小
|
||||
*/
|
||||
private String formatFileSize(long size) {
|
||||
if (size < 1024) {
|
||||
return size + " B";
|
||||
} else if (size < 1024 * 1024) {
|
||||
return String.format("%.1f KB", size / 1024.0);
|
||||
} else {
|
||||
return String.format("%.1f MB", size / (1024.0 * 1024.0));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 估算预览时间
|
||||
*/
|
||||
private String estimatePreviewTime(long fileSize) {
|
||||
// 简单估算:每MB约1秒,最小5秒,最大60秒
|
||||
long estimatedSeconds = Math.min(60, Math.max(5, fileSize / (1024 * 1024)));
|
||||
return estimatedSeconds + "秒";
|
||||
}
|
||||
|
||||
/**
|
||||
* 估算继续处理时间
|
||||
*/
|
||||
private String estimateContinueTime(int remainingRows) {
|
||||
// 简单估算:每行约2秒,最小30秒
|
||||
long estimatedSeconds = Math.max(30, remainingRows * 2);
|
||||
long minutes = estimatedSeconds / 60;
|
||||
long seconds = estimatedSeconds % 60;
|
||||
|
||||
if (minutes > 0) {
|
||||
return String.format("%d分%d秒", minutes, seconds);
|
||||
} else {
|
||||
return estimatedSeconds + "秒";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 估算完整处理时间
|
||||
*/
|
||||
private String estimateFullProcessTime(long fileSize) {
|
||||
// 简单估算:每MB约3秒,最小30秒
|
||||
long estimatedSeconds = Math.max(30, (fileSize / (1024 * 1024)) * 3);
|
||||
long minutes = estimatedSeconds / 60;
|
||||
long seconds = estimatedSeconds % 60;
|
||||
|
||||
if (minutes > 60) {
|
||||
long hours = minutes / 60;
|
||||
minutes = minutes % 60;
|
||||
return String.format("%d小时%d分", hours, minutes);
|
||||
} else if (minutes > 0) {
|
||||
return String.format("%d分%d秒", minutes, seconds);
|
||||
} else {
|
||||
return estimatedSeconds + "秒";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务总行数
|
||||
*/
|
||||
private int getTaskTotalRows(String requestId) {
|
||||
try {
|
||||
ExcelProcessTask task = excelService.getTaskStatus(requestId);
|
||||
return task != null ? task.getTotalRows() : 0;
|
||||
} catch (Exception e) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从任务获取输出文件
|
||||
*/
|
||||
private File getOutputFileFromTask(ExcelProcessTask task) {
|
||||
// 这里需要根据你的实际实现来获取文件
|
||||
// 假设文件保存在固定目录下
|
||||
String outputDir = "/home/rsoft/excel_output";
|
||||
String filename = task.getRequestId() + "_processed.xlsx";
|
||||
return new File(outputDir, filename);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
package org.dromara.extract.domain;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
// 新增解析结果类
|
||||
@Data
|
||||
public class AddressParseResult {
|
||||
private String aoi;
|
||||
private String aoiType;
|
||||
private String poi;
|
||||
private String poiType;
|
||||
private String street;
|
||||
private String community;
|
||||
private String address;
|
||||
|
||||
|
||||
|
||||
public AddressParseResult(String aoi, String aoiType, String poi, String poiType,
|
||||
String street, String community, String address) {
|
||||
this.aoi = aoi;
|
||||
this.aoiType = aoiType;
|
||||
this.poi = poi;
|
||||
this.poiType = poiType;
|
||||
this.street = street;
|
||||
this.community = community;
|
||||
this.address = address;
|
||||
}
|
||||
|
||||
// getter和setter方法
|
||||
public String getAoi() { return aoi; }
|
||||
public void setAoi(String aoi) { this.aoi = aoi; }
|
||||
|
||||
public String getAoiType() { return aoiType; }
|
||||
public void setAoiType(String aoiType) { this.aoiType = aoiType; }
|
||||
|
||||
public String getPoi() { return poi; }
|
||||
public void setPoi(String poi) { this.poi = poi; }
|
||||
|
||||
public String getPoiType() { return poiType; }
|
||||
public void setPoiType(String poiType) { this.poiType = poiType; }
|
||||
|
||||
public String getStreet() { return street; }
|
||||
public void setStreet(String street) { this.street = street; }
|
||||
|
||||
public String getCommunity() { return community; }
|
||||
public void setCommunity(String community) { this.community = community; }
|
||||
|
||||
public String getAddress() { return address; }
|
||||
public void setAddress(String address) { this.address = address; }
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
package org.dromara.extract.domain;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 列索引容器类
|
||||
*/
|
||||
@Data
|
||||
public class ColumnIndices {
|
||||
public int addressColIndex;
|
||||
public int poiColIndex;
|
||||
public int poiTypeColIndex;
|
||||
public int aoiColIndex;
|
||||
public int aoiTypeColIndex;
|
||||
public int streetColIndex;
|
||||
public int communityColIndex;
|
||||
|
||||
public ColumnIndices() {}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("地址列:%d, POI:%d, POI类型:%d, AOI:%d, AOI类型:%d, 街道:%d, 社区:%d",
|
||||
addressColIndex, poiColIndex, poiTypeColIndex, aoiColIndex,
|
||||
aoiTypeColIndex, streetColIndex, communityColIndex);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
package org.dromara.extract.domain;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ExcelProcessResult {
|
||||
private String requestId;
|
||||
private String filename;
|
||||
private String outputPath;
|
||||
private int successCount;
|
||||
private int failCount;
|
||||
private int totalRows;
|
||||
private String status;
|
||||
private long elapsedTime;
|
||||
private boolean isPreview;
|
||||
private int previewRows;
|
||||
private int previewCompletedRows;
|
||||
private String downloadUrl;
|
||||
private String continueUrl;
|
||||
|
||||
public ExcelProcessResult(String requestId, String filename, String outputPath,
|
||||
int successCount, int failCount, int totalRows,
|
||||
String status, long elapsedTime,
|
||||
boolean isPreview, int previewRows, int previewCompletedRows) {
|
||||
this.requestId = requestId;
|
||||
this.filename = filename;
|
||||
this.outputPath = outputPath;
|
||||
this.successCount = successCount;
|
||||
this.failCount = failCount;
|
||||
this.totalRows = totalRows;
|
||||
this.status = status;
|
||||
this.elapsedTime = elapsedTime;
|
||||
this.isPreview = isPreview;
|
||||
this.previewRows = previewRows;
|
||||
this.previewCompletedRows = previewCompletedRows;
|
||||
|
||||
// 自动生成URL
|
||||
if (outputPath != null) {
|
||||
this.downloadUrl = "/api/excel/download/" + requestId;
|
||||
}
|
||||
|
||||
if ("PREVIEW_COMPLETED".equals(status)) {
|
||||
this.continueUrl = "/api/excel/continue/" + requestId;
|
||||
}
|
||||
}
|
||||
|
||||
// Getters
|
||||
public String getRequestId() { return requestId; }
|
||||
public String getFilename() { return filename; }
|
||||
public String getOutputPath() { return outputPath; }
|
||||
public int getSuccessCount() { return successCount; }
|
||||
public int getFailCount() { return failCount; }
|
||||
public int getTotalRows() { return totalRows; }
|
||||
public String getStatus() { return status; }
|
||||
public long getElapsedTime() { return elapsedTime; }
|
||||
public boolean isPreview() { return isPreview; }
|
||||
public int getPreviewRows() { return previewRows; }
|
||||
public int getPreviewCompletedRows() { return previewCompletedRows; }
|
||||
public String getDownloadUrl() { return downloadUrl; }
|
||||
public String getContinueUrl() { return continueUrl; }
|
||||
|
||||
public double getSuccessRate() {
|
||||
int total = successCount + failCount;
|
||||
return total > 0 ? (successCount * 100.0 / total) : 0;
|
||||
}
|
||||
|
||||
public int getRemainingRows() {
|
||||
return totalRows - previewCompletedRows;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
package org.dromara.extract.domain;
|
||||
|
||||
/**
|
||||
* 处理策略枚举
|
||||
*/
|
||||
public enum ProcessingStrategy {
|
||||
FAST("快速模式", 1000), // 适合小文件
|
||||
BALANCED("平衡模式", 5000), // 适合中等文件
|
||||
STREAMING("流式模式", -1); // 适合大文件
|
||||
|
||||
private final String name;
|
||||
private final int batchSize; // 批处理大小
|
||||
|
||||
ProcessingStrategy(String name, int batchSize) {
|
||||
this.name = name;
|
||||
this.batchSize = batchSize;
|
||||
}
|
||||
|
||||
public String getName() { return name; }
|
||||
public int getBatchSize() { return batchSize; }
|
||||
|
||||
/**
|
||||
* 获取推荐的API并发数
|
||||
*/
|
||||
public int getApiConcurrency() {
|
||||
switch (this) {
|
||||
case FAST: return 20;
|
||||
case BALANCED: return 15;
|
||||
case STREAMING: return 10;
|
||||
default: return 10;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取内存中的行数限制
|
||||
*/
|
||||
public int getMemoryRowLimit() {
|
||||
switch (this) {
|
||||
case FAST: return 100000; // 内存保持10万行
|
||||
case BALANCED: return 50000; // 内存保持5万行
|
||||
case STREAMING: return 1000; // 内存只保持1000行
|
||||
default: return 10000;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
package org.dromara.extract.domain;
|
||||
|
||||
/**
|
||||
* 行处理结果类
|
||||
*/
|
||||
public class RowProcessResult {
|
||||
private final int rowIndex;
|
||||
private final boolean success;
|
||||
private final String errorMessage;
|
||||
|
||||
private RowProcessResult(int rowIndex, boolean success, String errorMessage) {
|
||||
this.rowIndex = rowIndex;
|
||||
this.success = success;
|
||||
this.errorMessage = errorMessage;
|
||||
}
|
||||
|
||||
public static RowProcessResult success(int rowIndex) {
|
||||
return new RowProcessResult(rowIndex, true, null);
|
||||
}
|
||||
|
||||
public static RowProcessResult failed(int rowIndex, String errorMessage) {
|
||||
return new RowProcessResult(rowIndex, false, errorMessage);
|
||||
}
|
||||
|
||||
public boolean isSuccess() { return success; }
|
||||
public String getErrorMessage() { return errorMessage; }
|
||||
public int getRowIndex() { return rowIndex; }
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
package org.dromara.extract.domain;
|
||||
|
||||
import lombok.Data;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
|
||||
/**
|
||||
* 行处理任务包装类
|
||||
*/
|
||||
public class RowProcessingTask {
|
||||
public final Row row;
|
||||
public final int rowIndex;
|
||||
|
||||
public RowProcessingTask(Row row, int rowIndex) {
|
||||
this.row = row;
|
||||
this.rowIndex = rowIndex;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
package org.dromara.extract.domain;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.IdType;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import lombok.Data;
|
||||
import org.springframework.data.annotation.Id;
|
||||
|
||||
import java.io.Serial;
|
||||
import java.io.Serializable;
|
||||
|
||||
@Data
|
||||
public class SdJqd implements Serializable {
|
||||
|
||||
@Serial
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@TableId(type = IdType.AUTO)
|
||||
private String jqd;
|
||||
|
||||
private String sfdz;
|
||||
|
||||
private String scbjsj;
|
||||
|
||||
private String poi;
|
||||
|
||||
private String aoi;
|
||||
|
||||
private String street;
|
||||
|
||||
private String aoiType;
|
||||
|
||||
private String poiType;
|
||||
|
||||
private String community;
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
package org.dromara.extract.domain;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.IdType;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import lombok.Data;
|
||||
import org.springframework.data.annotation.Id;
|
||||
|
||||
import java.io.Serial;
|
||||
import java.io.Serializable;
|
||||
|
||||
@Data
|
||||
public class ShJqd implements Serializable {
|
||||
|
||||
@Serial
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@TableId(type = IdType.AUTO)
|
||||
private String jqd;
|
||||
|
||||
private String sfdz;
|
||||
|
||||
private String scbjsj;
|
||||
|
||||
private String poi;
|
||||
|
||||
private String aoi;
|
||||
|
||||
private String street;
|
||||
|
||||
private String aoiType;
|
||||
|
||||
private String poiType;
|
||||
|
||||
private String community;
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
package org.dromara.extract.mapper;
|
||||
|
||||
import org.dromara.common.mybatis.core.mapper.BaseMapperPlus;
|
||||
import org.dromara.extract.domain.SdJqd;
|
||||
import org.dromara.extract.domain.ShJqd;
|
||||
|
||||
|
||||
public interface SdJqdMapper extends BaseMapperPlus<SdJqd,SdJqd> {
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
package org.dromara.extract.mapper;
|
||||
|
||||
import com.baomidou.dynamic.datasource.annotation.DS;
|
||||
import com.baomidou.mybatisplus.core.conditions.Wrapper;
|
||||
import com.baomidou.mybatisplus.core.toolkit.Constants;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.dromara.common.mybatis.core.mapper.BaseMapperPlus;
|
||||
import org.dromara.extract.domain.Jqd;
|
||||
import org.dromara.extract.domain.ShJqd;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public interface ShJqdMapper extends BaseMapperPlus<ShJqd,ShJqd> {
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -10,22 +10,22 @@ import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
|
|||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import jakarta.annotation.PreDestroy;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.poi.ss.usermodel.*;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.dromara.common.mybatis.core.page.PageQuery;
|
||||
import org.dromara.extract.domain.Jqd;
|
||||
import org.dromara.extract.domain.PoiAddress;
|
||||
import org.dromara.extract.mapper.AoiAddressMapper;
|
||||
import org.dromara.extract.mapper.JqdMapper;
|
||||
import org.dromara.extract.mapper.PoiAddressMapper;
|
||||
import org.dromara.extract.domain.*;
|
||||
import org.dromara.extract.mapper.*;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.dao.DuplicateKeyException;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
|
@ -45,6 +45,12 @@ public class DataMigrationService {
|
|||
@Autowired
|
||||
private PoiAddressMapper poiAddressMapper;
|
||||
|
||||
@Autowired
|
||||
private ShJqdMapper shJqdMapper;
|
||||
|
||||
@Autowired
|
||||
private SdJqdMapper sdJqdMapper;
|
||||
|
||||
@Autowired
|
||||
private AoiAddressMapper aoiAddressMapper;
|
||||
|
||||
|
|
@ -58,7 +64,7 @@ public class DataMigrationService {
|
|||
private static final int BATCH_SIZE = 1000;
|
||||
|
||||
// 分页大小
|
||||
private static final int PAGE_SIZE = 500;
|
||||
private static final int PAGE_SIZE = 200;
|
||||
|
||||
private static final String SYSTEM_PROMPT = """
|
||||
你是一个地址解析专家,必须严格、逐条遵守以下规则处理输入地址。直接输出JSON,不思考、不解释。注意:你的任务是执行规则,不是依赖常识或猜测。以下规则优先级高于任何通用知识。
|
||||
|
|
@ -114,6 +120,74 @@ public class DataMigrationService {
|
|||
注意:输出必须是纯JSON格式,不要有任何额外的解释、思考过程或markdown代码块标记。
|
||||
""";
|
||||
|
||||
|
||||
private static final String SYSTEM_PROMPT_TYPE = """
|
||||
你是一个地址解析专家,必须严格、逐条遵守以下规则处理输入地址。直接输出JSON,不思考、不解释。注意:你的任务是执行规则,不是依赖常识或猜测。以下规则优先级高于任何通用知识。
|
||||
|
||||
1. AOI定义:指区域状地理实体,如学校、居民小区、商场、广场、大厦、产业园区、公园、市场等具有明确边界和名称的非行政区划实体。
|
||||
✅示例:世纪新都、安徽农业大学教学基地、万达广场、高新银泰、凤凰城二期、九溪江南小区、大摩广场。
|
||||
⚠️必须完整保留"期数"或"分区"信息:如"二期"、"B区"、"南区"、"东苑"等,不得简化为母体名称;
|
||||
❌严格排除:上级行政区划(如省、市、县、区、镇、乡、街道);
|
||||
类行政区划或功能区:`政务区`、`新区`、`高新区`、`经开区`、`工业区`、`示范区`、`片区`、`功能区`、`园区`(无具体名称时)等;
|
||||
无明确边界的泛称区域;学校、医院、政府机关、公司等机构内部的分区(如"西区""南楼""B座"),即使带"区""期"字眼,也不视为AOI;
|
||||
任何包含"社区""片区"等字眼但无独立命名的区域。
|
||||
|
||||
2. POI定义:指具体的、可独立定位的地理点位。
|
||||
允许以下类型:具体商户("庐江百大超市");
|
||||
连锁品牌(如"蜜雪冰城""雅韵皖酒厂");
|
||||
机构营业网点("中国人寿保险(江淮路营业部)"、"工商银行XX支行");
|
||||
具体建筑("邮政大楼"、"科技馆"、"中医院康复楼");
|
||||
基层机构(如"杨楼中队""XX派出所""XX卫生院");
|
||||
基层地理实体(如"宋庄""葛庄""大王庄");
|
||||
交通枢纽(如"孙湾火车站""合肥客运中心""汽车站");
|
||||
政府及公共服务机构(如"观堂镇政府""张村老镇政府""车管所""政务服务中心");
|
||||
道路交口:格式为"[道路A]与[道路B]交口"或"[道路A]与[道路B]交叉口",且道路名称具体;
|
||||
红绿灯:仅当命名为"`[地标]红绿灯`"(如"县政府红绿灯")或"`[交口]红绿灯`"(如"南一环与长江路交口红绿灯")时,可作为POI;
|
||||
功能设施:`加油站`、`公交站`、`地铁站`、`停车场`,但必须依附于一个可定位的主体,且有具体名称(如"亚珠加油站""中石化加油站")。
|
||||
严格排除(以下情况 POI 必须为空字符串):任何孤立的内部设施:`广场`、`花园`、`凉亭`、`健身区`;任何泛称区域词;加油站等设施无具体品牌名(如"南加油站""加油站")。
|
||||
⚠️ 方位/模糊词处理原则:如果地址仅由排除词构成(如"门口""旁边"),或POI完全依赖排除词成立(如"大摩广场对面"),则POI="";
|
||||
但如果地址中包含一个独立、可定位的命名实体(如"三阳路小学""观堂镇政府""客运中心""孙湾火车站""车管所"),即使后接"门口""院里""出站口""院内""后面""东北角"等词,仍应提取该实体作为POI;
|
||||
"出站口""进站口""候车厅""院内""门口"等属于内部位置描述,应在address中去除,但不影响主体POI提取。
|
||||
|
||||
3. 语义优先原则:若地址中包含"AOI + 方位词"(如"大摩广场对面"),则AOI = 主体名称,POI = "";
|
||||
若地址为"道路交口"且无任何方位词,则POI = 交口名称;
|
||||
若地址为"XX街道红绿灯",则POI = ""。
|
||||
|
||||
4. address清洗规则:目标:输出一个最简但具备地理可定位性的核心地址;
|
||||
保留:上一级行政区划;道路交口;可定位设施或地标(如"高新银泰"、"亚珠加油站");
|
||||
小区"期数"或"分区"信息;
|
||||
基层地名(如"宋庄""葛庄");
|
||||
去除:模糊方位词本身(包括方言表达如`北头`、`南头`、`桥头`、`靠近`、`旁边`、`附近`、`对面`、`里面`、`内`、`东南角`、`东北角`、`门口`、`院里`、`出站口`、`院内`、`往北`等),但不删除这些词所修饰的有效实体;
|
||||
✅正确示例:"祁门路与庐州大道路口大摩广场对面" → address: "祁门路与庐州大道路口大摩广场";"三阳路小学门口(西区)" → address: "三阳路小学(西区)";
|
||||
"红星美凯龙东红绿灯" → address: "红星美凯龙";"城西大市场南加油站" → address: "城西大市场";"亚珠加油站桥南" → address: "亚珠加油站";
|
||||
"张寨汽贸城南门雅韵皖酒厂后面" → aoi: "张寨汽贸城", poi: "雅韵皖酒厂", address: "张寨汽贸城雅韵皖酒厂";
|
||||
"高铁南站南边交控集团" → poi: "交控集团", address: "交控集团";"店集镇蜜雪冰城门口" → poi: "蜜雪冰城", address: "店集镇蜜雪冰城";
|
||||
"二桥北头加油站" → aoi: "", poi: "", address: "";"中医院康复楼(庄周西区)" → aoi: "", poi: "中医院康复楼", address: "中医院康复楼(庄周西区)";
|
||||
"吉峰农机大市场西门(庄周西区)" → aoi: "吉峰农机大市场", poi: "", address: "吉峰农机大市场(庄周西区)";"陈大镇杨楼中队后面" → poi: "杨楼中队", address: "陈大镇杨楼中队";
|
||||
"芦庙镇宋庄" → poi: "宋庄", address: "芦庙镇宋庄";"涡南镇葛庄东北角" → poi: "葛庄", address: "涡南镇葛庄";
|
||||
"板桥双陆大王庄" → poi: "大王庄", address: "板桥双陆大王庄";"观堂镇政府门口" → poi: "观堂镇政府", address: "观堂镇政府";
|
||||
"张村老镇政府院里" → poi: "张村老镇政府", address: "张村老镇政府";"客运中心门口" → poi: "客运中心", address: "客运中心";
|
||||
"孙湾火车站出站口" → poi: "孙湾火车站", address: "孙湾火车站";"车管所院内" → poi: "车管所", address: "车管所";
|
||||
"火车站西路口往北" → poi: "火车站西路口", address: "火车站西路口";
|
||||
❌错误示例:address为"桥南"、"东红绿灯"、"北头",poi为"南加油站"或"加油站",aoi为"庄周西区"或"西区"(当主体是学校或行政区时)。
|
||||
|
||||
5. 新增字段提取规则:街道 (street):提取以"街道"结尾的行政区划名称(如"蜀山街道"、"三里庵街道")。当地址中明确包含此类街道信息时提取,否则为空字符串。
|
||||
社区 (community):仅当地址中明确包含"社区"一词(如"XX社区")时提取该完整社区名称,否则为空字符串。注意区分"社区"(行政区划)与"小区"(AOI)。
|
||||
POI类型:根据POI名称推断其业态或类别(如"超市"、"加油站"、"银行"、"学校"、"医院"、"政府"、"火车站"、"小区"、"民宿"等)。若POI为空,则类型也为空字符串。
|
||||
AOI类型:根据AOI名称推断其类别(如"小区"、"商场"、"园区"、"市场"、"广场"、"大厦"等)。若AOI为空,则类型也为空字符串。
|
||||
|
||||
6. 输出格式:仅输出JSON,结构如下:{
|
||||
"aoi": "匹配的AOI名称,无则为空字符串",
|
||||
"aoi_type": "AOI类型,无则为空字符串",
|
||||
"poi": "最可能的POI名称,无有效POI则为空字符串",
|
||||
"poi_type": "POI类型,无则为空字符串",
|
||||
"street": "街道名称(行政区划),无则为空字符串",
|
||||
"community": "社区名称,无则为空字符串",
|
||||
"address": "清洗后的核心地址,若无有效定位信息则为空字符串"
|
||||
} 注意:输出必须是纯JSON格式,不要有任何额外的解释、思考过程或markdown代码块标记。
|
||||
""";
|
||||
|
||||
|
||||
// 启动迁移
|
||||
public void startMigration(String xzqh, String startTime) {
|
||||
log.info("参数xzqh={},startTime={}",xzqh,startTime);
|
||||
|
|
@ -163,6 +237,99 @@ public class DataMigrationService {
|
|||
log.info("✅ 数据迁移任务全部提交完成");
|
||||
}
|
||||
|
||||
|
||||
//涉黄警情
|
||||
public void startShMigration() {
|
||||
log.info("涉黄警情开始迁移数据...");
|
||||
|
||||
|
||||
// 2. 分页读取源数据
|
||||
|
||||
LambdaQueryWrapper<ShJqd> jqlqw = new LambdaQueryWrapper<>();
|
||||
jqlqw.le(ShJqd::getScbjsj,"2025-01-03 15:30:10")
|
||||
.isNotNull(ShJqd::getSfdz)
|
||||
.ne(ShJqd::getSfdz,"地址不详")
|
||||
.ne(ShJqd::getSfdz,"地址未知");
|
||||
long totalCount = shJqdMapper.selectCount(jqlqw);
|
||||
log.info("涉黄查询数据大小={}",totalCount);
|
||||
long totalPages = (totalCount + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
PageQuery pageQuery = new PageQuery();
|
||||
jqlqw.orderByDesc(ShJqd::getScbjsj);
|
||||
pageQuery.setPageSize(PAGE_SIZE);
|
||||
for (int pageNum = 0; pageNum < totalPages; pageNum++) {
|
||||
long offset = pageNum * PAGE_SIZE;
|
||||
pageQuery.setPageNum(pageNum);
|
||||
|
||||
|
||||
log.info("正在处理涉黄第 {}/{} 页, offset={}", pageNum + 1, totalPages, offset);
|
||||
|
||||
List<ShJqd> pageData;
|
||||
try {
|
||||
IPage jqPage = shJqdMapper.selectPage(pageQuery.build(), jqlqw);
|
||||
pageData = jqPage.getRecords();
|
||||
} catch (Exception e) {
|
||||
log.error("读取涉黄第 {} 页失败", pageNum, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pageData.isEmpty()) continue;
|
||||
|
||||
// 并发处理当前页
|
||||
processShPageAsync(pageData);
|
||||
}
|
||||
|
||||
log.info("✅ 涉黄数据迁移任务全部提交完成");
|
||||
}
|
||||
|
||||
//涉赌警情
|
||||
public void startSdMigration() {
|
||||
log.info("涉赌警情开始迁移数据...");
|
||||
|
||||
|
||||
// 2. 分页读取源数据
|
||||
|
||||
LambdaQueryWrapper<SdJqd> jqlqw = new LambdaQueryWrapper<>();
|
||||
jqlqw.le(SdJqd::getScbjsj,"2025-01-01 14:01:41")
|
||||
.isNotNull(SdJqd::getSfdz)
|
||||
.ne(SdJqd::getSfdz,"地址不详")
|
||||
.ne(SdJqd::getSfdz,"地址未知");
|
||||
long totalCount = sdJqdMapper.selectCount(jqlqw);
|
||||
log.info("涉赌查询数据大小={}",totalCount);
|
||||
long totalPages = (totalCount + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
PageQuery pageQuery = new PageQuery();
|
||||
jqlqw.orderByDesc(SdJqd::getScbjsj);
|
||||
pageQuery.setPageSize(PAGE_SIZE);
|
||||
for (int pageNum = 0; pageNum < totalPages; pageNum++) {
|
||||
long offset = pageNum * PAGE_SIZE;
|
||||
pageQuery.setPageNum(pageNum);
|
||||
|
||||
|
||||
log.info("正在处理涉赌第 {}/{} 页, offset={}", pageNum + 1, totalPages, offset);
|
||||
|
||||
List<SdJqd> pageData;
|
||||
try {
|
||||
IPage jqPage = sdJqdMapper.selectPage(pageQuery.build(), jqlqw);
|
||||
pageData = jqPage.getRecords();
|
||||
} catch (Exception e) {
|
||||
log.error("读取涉赌第 {} 页失败", pageNum, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pageData.isEmpty()) continue;
|
||||
|
||||
// 并发处理当前页
|
||||
processSdPageAsync(pageData);
|
||||
}
|
||||
|
||||
log.info("✅ 涉赌数据迁移任务全部提交完成");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// 加载目标库中已存在的 name 到内存(避免重复插入)
|
||||
private void loadExistingNamesToMemory() {
|
||||
Long count = poiAddressMapper.selectCount(null);
|
||||
|
|
@ -321,6 +488,96 @@ public class DataMigrationService {
|
|||
});
|
||||
}
|
||||
|
||||
//涉黄
|
||||
private void processShPageAsync(List<ShJqd> pageData) {
|
||||
log.info("✅ 提交异步任务,页数据量: {}", pageData.size());
|
||||
CompletableFuture.runAsync(() -> {
|
||||
try {
|
||||
List<ShJqd> batch = new ArrayList<>();
|
||||
|
||||
for (ShJqd src : pageData) {
|
||||
try {
|
||||
String query = src.getSfdz();
|
||||
if (StrUtil.isBlank(query)) continue;
|
||||
log.debug("📡 正在调用第三方 API 查询: {}", query); // ✅ 加日志
|
||||
// 调用接口获取解析结果
|
||||
AddressParseResult result = parseAddressWithLLM(query);
|
||||
if (result == null ) {
|
||||
continue;
|
||||
}
|
||||
src.setAoi(result.getAoi());
|
||||
src.setPoi(result.getPoi());
|
||||
src.setAoiType(result.getAoiType());
|
||||
src.setPoiType(result.getPoiType());
|
||||
src.setStreet(result.getStreet());
|
||||
src.setCommunity(result.getCommunity());
|
||||
batch.add(src);
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("处理地址失败: {}", src.getSfdz(), e);
|
||||
}
|
||||
}
|
||||
|
||||
shJqdMapper.updateBatchById(batch);
|
||||
log.info("涉黄调用LLM模型修改数据: {}", batch.size());
|
||||
|
||||
}catch (Throwable t) {
|
||||
// 🚨 捕获所有错误(包括 Error)
|
||||
log.error("❌ processPageAsync 任务内部发生致命错误", t);
|
||||
}
|
||||
|
||||
}, executorService).exceptionally(throwable -> {
|
||||
// 🚨 捕获 CompletableFuture 本身的异常
|
||||
log.error("❌ CompletableFuture 执行失败", throwable);
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
private void processSdPageAsync(List<SdJqd> pageData) {
|
||||
log.info("✅ 提交异步任务,页数据量: {}", pageData.size());
|
||||
CompletableFuture.runAsync(() -> {
|
||||
try {
|
||||
List<SdJqd> batch = new ArrayList<>();
|
||||
|
||||
for (SdJqd src : pageData) {
|
||||
try {
|
||||
String query = src.getSfdz();
|
||||
if (StrUtil.isBlank(query)) continue;
|
||||
log.debug("📡 正在调用第三方 API 查询: {}", query); // ✅ 加日志
|
||||
// 调用接口获取解析结果
|
||||
AddressParseResult result = parseAddressWithLLM(query);
|
||||
if (result == null ) {
|
||||
continue;
|
||||
}
|
||||
src.setAoi(result.getAoi());
|
||||
src.setPoi(result.getPoi());
|
||||
src.setAoiType(result.getAoiType());
|
||||
src.setPoiType(result.getPoiType());
|
||||
src.setStreet(result.getStreet());
|
||||
src.setCommunity(result.getCommunity());
|
||||
batch.add(src);
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("处理地址失败: {}", src.getSfdz(), e);
|
||||
}
|
||||
}
|
||||
sdJqdMapper.updateBatchById(batch);
|
||||
log.info("涉赌调用LLM模型修改数据: {}", batch.size());
|
||||
}catch (Throwable t) {
|
||||
// 🚨 捕获所有错误(包括 Error)
|
||||
log.error("❌ processPageAsync 任务内部发生致命错误", t);
|
||||
}
|
||||
|
||||
}, executorService).exceptionally(throwable -> {
|
||||
// 🚨 捕获 CompletableFuture 本身的异常
|
||||
log.error("❌ CompletableFuture 执行失败", throwable);
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// 调用第三方接口
|
||||
private ParsedResult callThirdPartyApi(String query) {
|
||||
try {
|
||||
|
|
@ -521,11 +778,204 @@ public class DataMigrationService {
|
|||
batch.clear();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@PreDestroy
|
||||
public void shutdown() {
|
||||
executorService.shutdown();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 处理列名别名
|
||||
*/
|
||||
private void handleColumnAliases(Map<String, Integer> columnMap, String mainName, List<String> aliases) {
|
||||
if (!columnMap.containsKey(mainName)) {
|
||||
for (String alias : aliases) {
|
||||
if (columnMap.containsKey(alias)) {
|
||||
columnMap.put(mainName, columnMap.get(alias));
|
||||
log.info("使用别名 '{}' 作为 '{}'", alias, mainName);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 调用大模型解析地址
|
||||
*/
|
||||
private AddressParseResult parseAddressWithLLM(String address) {
|
||||
try {
|
||||
// 构造消息
|
||||
JSONArray messages = new JSONArray();
|
||||
messages.add(new JSONObject().set("role", "system").set("content", SYSTEM_PROMPT_TYPE));
|
||||
messages.add(new JSONObject().set("role", "user").set("content", address));
|
||||
|
||||
// 构建请求体
|
||||
JSONObject requestBody = new JSONObject()
|
||||
.set("model", "Qwen3-32B")
|
||||
.set("messages", messages)
|
||||
.set("max_tokens", 3000)
|
||||
.set("temperature", 0)
|
||||
.set("seed",42)
|
||||
.set("top_p", 0.9);
|
||||
|
||||
// 发送请求
|
||||
String response = HttpUtil.createPost(apiUrl)
|
||||
.header("Authorization", "Bearer " + bearerToken)
|
||||
.header("Content-Type", "application/json")
|
||||
.timeout(30000) // 增加超时时间
|
||||
.body(requestBody.toString())
|
||||
.execute()
|
||||
.body();
|
||||
|
||||
log.debug("大模型响应: {}", response);
|
||||
|
||||
// 解析响应
|
||||
JSONObject jsonResponse = JSONUtil.parseObj(response);
|
||||
JSONArray choices = jsonResponse.getJSONArray("choices");
|
||||
if (choices == null || choices.isEmpty()) {
|
||||
log.warn("API返回无choices");
|
||||
return null;
|
||||
}
|
||||
|
||||
String content = choices.get(0, JSONObject.class)
|
||||
.getJSONObject("message")
|
||||
.getStr("content");
|
||||
|
||||
if (StrUtil.isBlank(content)) {
|
||||
log.warn("content为空");
|
||||
return null;
|
||||
}
|
||||
|
||||
// 提取JSON
|
||||
String cleanJson = extractPureJsonFromContentForExcel(content);
|
||||
if (StrUtil.isBlank(cleanJson)) {
|
||||
log.warn("无法提取有效JSON");
|
||||
return null;
|
||||
}
|
||||
|
||||
JSONObject resultJson = JSONUtil.parseObj(cleanJson);
|
||||
|
||||
return new AddressParseResult(
|
||||
resultJson.getStr("aoi", ""),
|
||||
resultJson.getStr("aoi_type", ""),
|
||||
resultJson.getStr("poi", ""),
|
||||
resultJson.getStr("poi_type", ""),
|
||||
resultJson.getStr("street", ""),
|
||||
resultJson.getStr("community", ""),
|
||||
resultJson.getStr("address", "")
|
||||
);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("调用大模型解析地址失败: {}", address, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 专为Excel处理优化的JSON提取方法
|
||||
*/
|
||||
private String extractPureJsonFromContentForExcel(String content) {
|
||||
if (StrUtil.isBlank(content)) return null;
|
||||
|
||||
// 1. 移除 <think>...</think>
|
||||
content = content.replaceAll("(?s)<think>.*?</think>", "");
|
||||
content = content.trim();
|
||||
|
||||
// 2. 尝试直接解析
|
||||
if (JSONUtil.isJson(content)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
// 3. 查找第一个 { 和最后一个 }
|
||||
int start = content.indexOf('{');
|
||||
int end = content.lastIndexOf('}');
|
||||
|
||||
if (start >= 0 && end > start) {
|
||||
String candidate = content.substring(start, end + 1);
|
||||
if (JSONUtil.isJson(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 尝试提取JSON对象
|
||||
Pattern pattern = Pattern.compile("\\{(?:[^{}]|\\{(?:[^{}]|\\{[^{}]*\\})*\\})*\\}", Pattern.DOTALL);
|
||||
Matcher matcher = pattern.matcher(content);
|
||||
|
||||
while (matcher.find()) {
|
||||
String candidate = matcher.group();
|
||||
if (JSONUtil.isJson(candidate) &&
|
||||
(candidate.contains("\"aoi\"") || candidate.contains("\"poi\"") ||
|
||||
candidate.contains("\"aoi_type\"") || candidate.contains("\"poi_type\""))) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取或创建列
|
||||
*/
|
||||
private int getOrCreateColumn(Sheet sheet, Map<String, Integer> columnMap,
|
||||
String columnName, String defaultColumn) {
|
||||
if (columnMap.containsKey(columnName)) {
|
||||
return columnMap.get(columnName);
|
||||
}
|
||||
|
||||
// 创建新列
|
||||
Row headerRow = sheet.getRow(0);
|
||||
int newColIndex = headerRow.getLastCellNum();
|
||||
|
||||
Cell newHeaderCell = headerRow.createCell(newColIndex);
|
||||
newHeaderCell.setCellValue(columnName);
|
||||
|
||||
columnMap.put(columnName, newColIndex);
|
||||
return newColIndex;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 提取纯JSON内容(复用原有方法)
|
||||
*/
|
||||
private String extractPureJsonFromContentnew(String content) {
|
||||
// 使用原有的 extractPureJsonFromContent 方法
|
||||
// 或者可以稍作修改以适应新的JSON结构
|
||||
if (StrUtil.isBlank(content)) return null;
|
||||
|
||||
// 移除 <think>...</think>
|
||||
content = content.replaceAll("(?s)<think>.*?</think>", "");
|
||||
content = content.trim();
|
||||
|
||||
// 尝试直接解析
|
||||
if (JSONUtil.isJson(content)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
// 尝试提取 {...} 模式
|
||||
Pattern pattern = Pattern.compile("\\{.*?\\}", Pattern.DOTALL);
|
||||
Matcher matcher = pattern.matcher(content);
|
||||
|
||||
while (matcher.find()) {
|
||||
String candidate = matcher.group();
|
||||
if (JSONUtil.isJson(candidate) &&
|
||||
(candidate.contains("\"aoi\"") || candidate.contains("\"poi\""))) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 辅助类
|
||||
private static class ParsedResult {
|
||||
private final String name, aoi, poi, address;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,73 @@
|
|||
package org.dromara.extract.util;
|
||||
|
||||
import lombok.Data;
|
||||
import org.dromara.extract.domain.ExcelProcessResult;
|
||||
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
@Data
|
||||
public class ExcelProcessTask {
|
||||
private String requestId;
|
||||
private String filename;
|
||||
private volatile String status; // 使用 volatile 确保多线程可见性
|
||||
private volatile int progress; // 使用 volatile
|
||||
private AtomicInteger totalRows = new AtomicInteger(0);
|
||||
private AtomicInteger processedRows = new AtomicInteger(0); // 实际处理的行数(不含表头)
|
||||
private AtomicInteger successCount = new AtomicInteger(0);
|
||||
private AtomicInteger failCount = new AtomicInteger(0);
|
||||
private String errorMessage;
|
||||
private long startTime;
|
||||
private long endTime;
|
||||
private CompletableFuture<ExcelProcessResult> processFuture;
|
||||
private boolean previewMode;
|
||||
private boolean previewCompleted = false; // 预览是否已完成
|
||||
private int previewRows; // 预览的行数限制
|
||||
private int previewCompletedRows = 0; // 实际完成预览的行数
|
||||
|
||||
public ExcelProcessTask(String requestId, String filename) {
|
||||
this.requestId = requestId;
|
||||
this.filename = filename;
|
||||
this.startTime = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public long getElapsedTime() {
|
||||
if (endTime == 0) {
|
||||
return System.currentTimeMillis() - startTime;
|
||||
}
|
||||
return endTime - startTime;
|
||||
}
|
||||
|
||||
// 提供原子操作来更新计数器
|
||||
public void incrementProcessedRows(int count) {
|
||||
this.processedRows.getAndAdd(count);
|
||||
}
|
||||
|
||||
public void incrementSuccessCount(int count) {
|
||||
this.successCount.getAndAdd(count);
|
||||
}
|
||||
|
||||
public void incrementFailCount(int count) {
|
||||
this.failCount.getAndAdd(count);
|
||||
}
|
||||
|
||||
public int getProcessedRows() {
|
||||
return this.processedRows.get();
|
||||
}
|
||||
|
||||
public int getSuccessCount() {
|
||||
return this.successCount.get();
|
||||
}
|
||||
|
||||
public int getFailCount() {
|
||||
return this.failCount.get();
|
||||
}
|
||||
|
||||
public int getTotalRows() {
|
||||
return this.totalRows.get();
|
||||
}
|
||||
|
||||
public void setTotalRows(int totalRows) {
|
||||
this.totalRows.set(totalRows);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE mapper
|
||||
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="org.dromara.extract.mapper.SdJqdMapper">
|
||||
|
||||
<resultMap id="sdjqdResult" type="org.dromara.extract.domain.SdJqd">
|
||||
|
||||
</resultMap>
|
||||
|
||||
|
||||
|
||||
</mapper>
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE mapper
|
||||
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="org.dromara.extract.mapper.ShJqdMapper">
|
||||
|
||||
<resultMap id="shjqdResult" type="org.dromara.extract.domain.ShJqd">
|
||||
|
||||
</resultMap>
|
||||
|
||||
|
||||
|
||||
</mapper>
|
||||
Loading…
Reference in New Issue