处理韦总地址结构化代码

stwzhj
luyya 2026-01-16 16:23:37 +08:00
parent 1b0876d735
commit 3de061a843
17 changed files with 2466 additions and 21 deletions

View File

@ -2,14 +2,18 @@ package org.dromara.extract.controller;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.dromara.common.core.domain.R;
import org.dromara.common.core.utils.StringUtils;
import org.dromara.common.web.core.BaseController;
import org.dromara.extract.service.impl.DataMigrationService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import java.util.HashMap;
import java.util.Map;
@ -52,6 +56,41 @@ public class AddressController extends BaseController {
return ResponseEntity.ok("✅ 迁移任务已提交,正在后台执行...");
}
@GetMapping("/startsh")
public ResponseEntity<String> startshMigration() {
try {
log.info("🚀 开始执行涉黄数据迁移任务...");
dataMigrationService.startShMigration();
} catch (Exception e) {
log.error("❌ 迁移任务执行异常", e);
} finally {
isRunning = false;
}
return ResponseEntity.ok("✅ 迁移任务已提交,正在后台执行...");
}
@GetMapping("/startsd")
public ResponseEntity<String> startsdMigration() {
try {
log.info("🚀 开始执行涉赌数据迁移任务...");
dataMigrationService.startSdMigration();
} catch (Exception e) {
log.error("❌ 迁移任务执行异常", e);
} finally {
isRunning = false;
}
return ResponseEntity.ok("✅ 迁移任务已提交,正在后台执行...");
}
@GetMapping("/status")
public ResponseEntity<Map<String, Object>> getStatus() {
Map<String, Object> status = new HashMap<>();
@ -60,17 +99,6 @@ public class AddressController extends BaseController {
return ResponseEntity.ok(status);
}
@GetMapping("/stop")
public ResponseEntity<String> stopMigration() {
// 注意:当前线程池没有实现优雅关闭
// 这里只是一个提示,实际需要在 Service 中支持中断
if (!isRunning) {
return ResponseEntity.ok("任务未运行,无需停止");
}
// ⚠️ 提示用户:无法强制停止,建议手动关闭应用或增强 Service
return ResponseEntity.ok("⚠️ 停止功能暂不支持。请重启应用或增强线程控制逻辑。");
}

View File

@ -0,0 +1,354 @@
package org.dromara.extract.controller;
import lombok.extern.slf4j.Slf4j;
import org.dromara.extract.service.impl.LargeExcelAddressParseService;
import org.dromara.extract.util.ExcelProcessTask;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
/**
* Excel -
* 50
*/
@Slf4j
@RestController
@RequestMapping("/api/excel")
public class ExcelPreviewController {
@Autowired
private LargeExcelAddressParseService excelService;
/**
* 50 -
*/
@PostMapping("/upload-preview")
public ResponseEntity<?> uploadAndPreview(@RequestParam("file") MultipartFile file) {
try {
validateLargeExcelFile(file);
String requestId = UUID.randomUUID().toString();
log.info("收到大文件预览请求 [{}]: {},大小: {} MB",
requestId, file.getOriginalFilename(),
file.getSize() / (1024.0 * 1024.0));
// 异步处理预览50行
CompletableFuture.supplyAsync(() -> {
return excelService.processLargeExcelFile(file, requestId, true); // previewOnly=true
});
// 立即返回任务信息,用户可轮询状态
Map<String, Object> response = createPreviewResponse(requestId, file);
return ResponseEntity.accepted().body(response);
} catch (IllegalArgumentException e) {
return ResponseEntity.badRequest().body(createErrorResponse(e.getMessage()));
} catch (Exception e) {
log.error("大文件预览请求失败", e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(createErrorResponse("预览处理失败: " + e.getMessage()));
}
}
/**
*
*/
@PostMapping("/upload-full")
public ResponseEntity<?> uploadAndFullProcess(@RequestParam("file") MultipartFile file) {
try {
validateLargeExcelFile(file);
String requestId = UUID.randomUUID().toString();
log.info("收到大文件完整处理请求 [{}]: {},大小: {} MB",
requestId, file.getOriginalFilename(),
file.getSize() / (1024.0 * 1024.0));
// 异步完整处理
CompletableFuture.supplyAsync(() -> {
return excelService.processLargeExcelFile(file, requestId, false); // previewOnly=false
});
Map<String, Object> response = new HashMap<>();
response.put("requestId", requestId);
response.put("filename", file.getOriginalFilename());
response.put("fileSize", formatFileSize(file.getSize()));
response.put("status", "FULL_PROCESSING");
response.put("message", "正在完整处理文件...");
response.put("estimatedTime", estimateFullProcessTime(file.getSize()));
response.put("statusUrl", "/api/excel/status/" + requestId);
response.put("isPreview", false);
return ResponseEntity.accepted().body(response);
} catch (IllegalArgumentException e) {
return ResponseEntity.badRequest().body(createErrorResponse(e.getMessage()));
} catch (Exception e) {
log.error("大文件完整处理请求失败", e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(createErrorResponse("完整处理失败: " + e.getMessage()));
}
}
/**
*
*/
@GetMapping("/download/{requestId}")
public ResponseEntity<Resource> downloadFile(@PathVariable String requestId) {
try {
// 获取任务信息
ExcelProcessTask task = excelService.getTaskStatus(requestId);
if (task == null) {
return ResponseEntity.status(HttpStatus.NOT_FOUND).build();
}
// 从任务信息中获取文件路径
// 注意LargeExcelAddressParseService需要提供获取输出文件的方法
File outputFile = getOutputFileFromTask(task);
if (outputFile == null || !outputFile.exists()) {
log.warn("输出文件不存在 [{}]", requestId);
return ResponseEntity.status(HttpStatus.NOT_FOUND).build();
}
byte[] content = Files.readAllBytes(outputFile.toPath());
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
headers.setContentDispositionFormData("attachment",
"processed_" + task.getFilename());
headers.setContentLength(content.length);
if (task.isPreviewMode()) {
headers.add("X-Processing-Type", "preview-and-continue");
headers.add("X-Preview-Completed", String.valueOf(task.getPreviewCompletedRows()));
} else {
headers.add("X-Processing-Type", "full-process");
}
return new ResponseEntity<>(new ByteArrayResource(content), headers, HttpStatus.OK);
} catch (Exception e) {
log.error("下载文件失败 [{}]", requestId, e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
}
}
/**
*
*/
@PostMapping("/cancel/{requestId}")
public ResponseEntity<?> cancelTask(@PathVariable String requestId) {
try {
// 这里需要服务层支持取消任务的功能
// excelService.cancelTask(requestId);
Map<String, Object> response = new HashMap<>();
response.put("requestId", requestId);
response.put("status", "CANCELLED");
response.put("message", "任务取消请求已提交");
response.put("timestamp", System.currentTimeMillis());
return ResponseEntity.ok(response);
} catch (Exception e) {
log.error("取消任务失败 [{}]", requestId, e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(createErrorResponse("取消任务失败: " + e.getMessage()));
}
}
/**
*
*/
@PostMapping("/admin/cleanup")
public ResponseEntity<?> cleanupOldTasks(@RequestHeader(value = "X-Admin-Token", required = false) String adminToken) {
try {
// 简单的管理员验证
if (!"your-admin-token".equals(adminToken)) {
return ResponseEntity.status(HttpStatus.UNAUTHORIZED)
.body(createErrorResponse("未授权"));
}
// 获取清理前的任务数量
int beforeCount = excelService.getAllActiveTasks().size();
// 清理操作
// excelService.cleanupExpiredTasks();
Map<String, Object> response = new HashMap<>();
response.put("action", "cleanup");
response.put("beforeCount", beforeCount);
response.put("afterCount", excelService.getAllActiveTasks().size());
response.put("timestamp", System.currentTimeMillis());
log.info("管理员清理过期任务,清理前: {},清理后: {}", beforeCount, response.get("afterCount"));
return ResponseEntity.ok(response);
} catch (Exception e) {
log.error("清理任务失败", e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(createErrorResponse("清理失败: " + e.getMessage()));
}
}
// ============ 辅助方法 ============
/**
*
*/
private Map<String, Object> createPreviewResponse(String requestId, MultipartFile file) {
Map<String, Object> response = new HashMap<>();
response.put("requestId", requestId);
response.put("filename", file.getOriginalFilename());
response.put("fileSize", formatFileSize(file.getSize()));
response.put("status", "PREVIEW_PROCESSING");
response.put("message", "正在处理前50行预览...");
response.put("previewRows", 50); // 明确显示预览行数
response.put("previewStatusUrl", "/api/excel/preview/status/" + requestId);
response.put("fullStatusUrl", "/api/excel/status/" + requestId);
response.put("estimatedTime", estimatePreviewTime(file.getSize()));
response.put("isPreview", true);
response.put("timestamp", System.currentTimeMillis());
return response;
}
/**
*
*/
private Map<String, Object> createErrorResponse(String error) {
Map<String, Object> response = new HashMap<>();
response.put("error", error);
response.put("timestamp", System.currentTimeMillis());
return response;
}
/**
*
*/
private void validateLargeExcelFile(MultipartFile file) {
if (file.isEmpty()) {
throw new IllegalArgumentException("文件不能为空");
}
String filename = file.getOriginalFilename();
if (filename == null) {
throw new IllegalArgumentException("文件名不能为空");
}
if (!filename.toLowerCase().endsWith(".xlsx") &&
!filename.toLowerCase().endsWith(".xls")) {
throw new IllegalArgumentException("只支持Excel文件(.xlsx, .xls)");
}
// 大文件大小限制100MB
if (file.getSize() > 100 * 1024 * 1024) {
throw new IllegalArgumentException("文件大小不能超过100MB");
}
}
/**
*
*/
private String formatFileSize(long size) {
if (size < 1024) {
return size + " B";
} else if (size < 1024 * 1024) {
return String.format("%.1f KB", size / 1024.0);
} else {
return String.format("%.1f MB", size / (1024.0 * 1024.0));
}
}
/**
*
*/
private String estimatePreviewTime(long fileSize) {
// 简单估算每MB约1秒最小5秒最大60秒
long estimatedSeconds = Math.min(60, Math.max(5, fileSize / (1024 * 1024)));
return estimatedSeconds + "秒";
}
/**
*
*/
private String estimateContinueTime(int remainingRows) {
// 简单估算每行约2秒最小30秒
long estimatedSeconds = Math.max(30, remainingRows * 2);
long minutes = estimatedSeconds / 60;
long seconds = estimatedSeconds % 60;
if (minutes > 0) {
return String.format("%d分%d秒", minutes, seconds);
} else {
return estimatedSeconds + "秒";
}
}
/**
*
*/
private String estimateFullProcessTime(long fileSize) {
// 简单估算每MB约3秒最小30秒
long estimatedSeconds = Math.max(30, (fileSize / (1024 * 1024)) * 3);
long minutes = estimatedSeconds / 60;
long seconds = estimatedSeconds % 60;
if (minutes > 60) {
long hours = minutes / 60;
minutes = minutes % 60;
return String.format("%d小时%d分", hours, minutes);
} else if (minutes > 0) {
return String.format("%d分%d秒", minutes, seconds);
} else {
return estimatedSeconds + "秒";
}
}
/**
*
*/
private int getTaskTotalRows(String requestId) {
try {
ExcelProcessTask task = excelService.getTaskStatus(requestId);
return task != null ? task.getTotalRows() : 0;
} catch (Exception e) {
return 0;
}
}
/**
*
*/
private File getOutputFileFromTask(ExcelProcessTask task) {
// 这里需要根据你的实际实现来获取文件
// 假设文件保存在固定目录下
String outputDir = "/home/rsoft/excel_output";
String filename = task.getRequestId() + "_processed.xlsx";
return new File(outputDir, filename);
}
}

View File

@ -0,0 +1,50 @@
package org.dromara.extract.domain;
import lombok.Data;
// 新增解析结果类
@Data
public class AddressParseResult {
private String aoi;
private String aoiType;
private String poi;
private String poiType;
private String street;
private String community;
private String address;
public AddressParseResult(String aoi, String aoiType, String poi, String poiType,
String street, String community, String address) {
this.aoi = aoi;
this.aoiType = aoiType;
this.poi = poi;
this.poiType = poiType;
this.street = street;
this.community = community;
this.address = address;
}
// getter和setter方法
public String getAoi() { return aoi; }
public void setAoi(String aoi) { this.aoi = aoi; }
public String getAoiType() { return aoiType; }
public void setAoiType(String aoiType) { this.aoiType = aoiType; }
public String getPoi() { return poi; }
public void setPoi(String poi) { this.poi = poi; }
public String getPoiType() { return poiType; }
public void setPoiType(String poiType) { this.poiType = poiType; }
public String getStreet() { return street; }
public void setStreet(String street) { this.street = street; }
public String getCommunity() { return community; }
public void setCommunity(String community) { this.community = community; }
public String getAddress() { return address; }
public void setAddress(String address) { this.address = address; }
}

View File

@ -0,0 +1,26 @@
package org.dromara.extract.domain;
import lombok.Data;
/**
*
*/
@Data
public class ColumnIndices {
public int addressColIndex;
public int poiColIndex;
public int poiTypeColIndex;
public int aoiColIndex;
public int aoiTypeColIndex;
public int streetColIndex;
public int communityColIndex;
public ColumnIndices() {}
@Override
public String toString() {
return String.format("地址列:%d, POI:%d, POI类型:%d, AOI:%d, AOI类型:%d, 街道:%d, 社区:%d",
addressColIndex, poiColIndex, poiTypeColIndex, aoiColIndex,
aoiTypeColIndex, streetColIndex, communityColIndex);
}
}

View File

@ -0,0 +1,70 @@
package org.dromara.extract.domain;
import lombok.Data;
@Data
public class ExcelProcessResult {
private String requestId;
private String filename;
private String outputPath;
private int successCount;
private int failCount;
private int totalRows;
private String status;
private long elapsedTime;
private boolean isPreview;
private int previewRows;
private int previewCompletedRows;
private String downloadUrl;
private String continueUrl;
public ExcelProcessResult(String requestId, String filename, String outputPath,
int successCount, int failCount, int totalRows,
String status, long elapsedTime,
boolean isPreview, int previewRows, int previewCompletedRows) {
this.requestId = requestId;
this.filename = filename;
this.outputPath = outputPath;
this.successCount = successCount;
this.failCount = failCount;
this.totalRows = totalRows;
this.status = status;
this.elapsedTime = elapsedTime;
this.isPreview = isPreview;
this.previewRows = previewRows;
this.previewCompletedRows = previewCompletedRows;
// 自动生成URL
if (outputPath != null) {
this.downloadUrl = "/api/excel/download/" + requestId;
}
if ("PREVIEW_COMPLETED".equals(status)) {
this.continueUrl = "/api/excel/continue/" + requestId;
}
}
// Getters
public String getRequestId() { return requestId; }
public String getFilename() { return filename; }
public String getOutputPath() { return outputPath; }
public int getSuccessCount() { return successCount; }
public int getFailCount() { return failCount; }
public int getTotalRows() { return totalRows; }
public String getStatus() { return status; }
public long getElapsedTime() { return elapsedTime; }
public boolean isPreview() { return isPreview; }
public int getPreviewRows() { return previewRows; }
public int getPreviewCompletedRows() { return previewCompletedRows; }
public String getDownloadUrl() { return downloadUrl; }
public String getContinueUrl() { return continueUrl; }
public double getSuccessRate() {
int total = successCount + failCount;
return total > 0 ? (successCount * 100.0 / total) : 0;
}
public int getRemainingRows() {
return totalRows - previewCompletedRows;
}
}

View File

@ -0,0 +1,45 @@
package org.dromara.extract.domain;
/**
*
*/
public enum ProcessingStrategy {
FAST("快速模式", 1000), // 适合小文件
BALANCED("平衡模式", 5000), // 适合中等文件
STREAMING("流式模式", -1); // 适合大文件
private final String name;
private final int batchSize; // 批处理大小
ProcessingStrategy(String name, int batchSize) {
this.name = name;
this.batchSize = batchSize;
}
public String getName() { return name; }
public int getBatchSize() { return batchSize; }
/**
* API
*/
public int getApiConcurrency() {
switch (this) {
case FAST: return 20;
case BALANCED: return 15;
case STREAMING: return 10;
default: return 10;
}
}
/**
*
*/
public int getMemoryRowLimit() {
switch (this) {
case FAST: return 100000; // 内存保持10万行
case BALANCED: return 50000; // 内存保持5万行
case STREAMING: return 1000; // 内存只保持1000行
default: return 10000;
}
}
}

View File

@ -0,0 +1,28 @@
package org.dromara.extract.domain;
/**
*
*/
public class RowProcessResult {
private final int rowIndex;
private final boolean success;
private final String errorMessage;
private RowProcessResult(int rowIndex, boolean success, String errorMessage) {
this.rowIndex = rowIndex;
this.success = success;
this.errorMessage = errorMessage;
}
public static RowProcessResult success(int rowIndex) {
return new RowProcessResult(rowIndex, true, null);
}
public static RowProcessResult failed(int rowIndex, String errorMessage) {
return new RowProcessResult(rowIndex, false, errorMessage);
}
public boolean isSuccess() { return success; }
public String getErrorMessage() { return errorMessage; }
public int getRowIndex() { return rowIndex; }
}

View File

@ -0,0 +1,17 @@
package org.dromara.extract.domain;
import lombok.Data;
import org.apache.poi.ss.usermodel.Row;
/**
*
*/
public class RowProcessingTask {
public final Row row;
public final int rowIndex;
public RowProcessingTask(Row row, int rowIndex) {
this.row = row;
this.rowIndex = rowIndex;
}
}

View File

@ -0,0 +1,37 @@
package org.dromara.extract.domain;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import lombok.Data;
import org.springframework.data.annotation.Id;
import java.io.Serial;
import java.io.Serializable;
@Data
public class SdJqd implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
@TableId(type = IdType.AUTO)
private String jqd;
private String sfdz;
private String scbjsj;
private String poi;
private String aoi;
private String street;
private String aoiType;
private String poiType;
private String community;
}

View File

@ -0,0 +1,37 @@
package org.dromara.extract.domain;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import lombok.Data;
import org.springframework.data.annotation.Id;
import java.io.Serial;
import java.io.Serializable;
@Data
public class ShJqd implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
@TableId(type = IdType.AUTO)
private String jqd;
private String sfdz;
private String scbjsj;
private String poi;
private String aoi;
private String street;
private String aoiType;
private String poiType;
private String community;
}

View File

@ -0,0 +1,11 @@
package org.dromara.extract.mapper;
import org.dromara.common.mybatis.core.mapper.BaseMapperPlus;
import org.dromara.extract.domain.SdJqd;
import org.dromara.extract.domain.ShJqd;
public interface SdJqdMapper extends BaseMapperPlus<SdJqd,SdJqd> {
}

View File

@ -0,0 +1,17 @@
package org.dromara.extract.mapper;
import com.baomidou.dynamic.datasource.annotation.DS;
import com.baomidou.mybatisplus.core.conditions.Wrapper;
import com.baomidou.mybatisplus.core.toolkit.Constants;
import org.apache.ibatis.annotations.Param;
import org.dromara.common.mybatis.core.mapper.BaseMapperPlus;
import org.dromara.extract.domain.Jqd;
import org.dromara.extract.domain.ShJqd;
import java.util.List;
public interface ShJqdMapper extends BaseMapperPlus<ShJqd,ShJqd> {
}

View File

@ -10,22 +10,22 @@ import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import jakarta.annotation.PreDestroy;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.dromara.common.mybatis.core.page.PageQuery;
import org.dromara.extract.domain.Jqd;
import org.dromara.extract.domain.PoiAddress;
import org.dromara.extract.mapper.AoiAddressMapper;
import org.dromara.extract.mapper.JqdMapper;
import org.dromara.extract.mapper.PoiAddressMapper;
import org.dromara.extract.domain.*;
import org.dromara.extract.mapper.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.dao.DuplicateKeyException;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -45,6 +45,12 @@ public class DataMigrationService {
@Autowired
private PoiAddressMapper poiAddressMapper;
@Autowired
private ShJqdMapper shJqdMapper;
@Autowired
private SdJqdMapper sdJqdMapper;
@Autowired
private AoiAddressMapper aoiAddressMapper;
@ -58,7 +64,7 @@ public class DataMigrationService {
private static final int BATCH_SIZE = 1000;
// 分页大小
private static final int PAGE_SIZE = 500;
private static final int PAGE_SIZE = 200;
private static final String SYSTEM_PROMPT = """
JSON
@ -114,6 +120,74 @@ public class DataMigrationService {
JSONmarkdown
""";
private static final String SYSTEM_PROMPT_TYPE = """
JSON
1. AOI广
广广
"期数""分区""二期""B区""南区""东苑"
``````````````````
"西区""南楼""B座"使"区""期"AOI
"社区""片区"
2. POI
"庐江百大超市"
"蜜雪冰城""雅韵皖酒厂"
"中国人寿保险(江淮路营业部)""工商银行XX支行"
"邮政大楼""科技馆""中医院康复楼"
"杨楼中队""XX派出所""XX卫生院"
"宋庄""葛庄""大王庄"
"孙湾火车站""合肥客运中心""汽车站"
"观堂镇政府""张村老镇政府""车管所""政务服务中心"
"[道路A]与[道路B]交口""[道路A]与[道路B]交叉口"
绿"`[地标]红绿灯`""县政府红绿灯""`[交口]红绿灯`""南一环与长江路交口红绿灯"POI
````````"亚珠加油站""中石化加油站"
POI `广```````"南加油站""加油站"
/"门口""旁边"POI"大摩广场对面"POI="";
"三阳路小学""观堂镇政府""客运中心""孙湾火车站""车管所"使"门口""院里""出站口""院内""后面""东北角"POI
"出站口""进站口""候车厅""院内""门口"addressPOI
3. "AOI + 方位词""大摩广场对面"AOI = POI = "";
"道路交口"POI =
"XX街道红绿灯"POI = ""
4. address
"高新银泰""亚珠加油站"
"期数""分区"
"宋庄""葛庄"
````````````````````````````````
"祁门路与庐州大道路口大摩广场对面" address: "祁门路与庐州大道路口大摩广场""三阳路小学门口(西区)" address: "三阳路小学(西区)"
"红星美凯龙东红绿灯" address: "红星美凯龙""城西大市场南加油站" address: "城西大市场""亚珠加油站桥南" address: "亚珠加油站"
"张寨汽贸城南门雅韵皖酒厂后面" aoi: "张寨汽贸城", poi: "雅韵皖酒厂", address: "张寨汽贸城雅韵皖酒厂"
"高铁南站南边交控集团" poi: "交控集团", address: "交控集团""店集镇蜜雪冰城门口" poi: "蜜雪冰城", address: "店集镇蜜雪冰城"
"二桥北头加油站" aoi: "", poi: "", address: """中医院康复楼(庄周西区)" aoi: "", poi: "中医院康复楼", address: "中医院康复楼(庄周西区)"
"吉峰农机大市场西门(庄周西区)" aoi: "吉峰农机大市场", poi: "", address: "吉峰农机大市场(庄周西区)""陈大镇杨楼中队后面" poi: "杨楼中队", address: "陈大镇杨楼中队"
"芦庙镇宋庄" poi: "宋庄", address: "芦庙镇宋庄""涡南镇葛庄东北角" poi: "葛庄", address: "涡南镇葛庄"
"板桥双陆大王庄" poi: "大王庄", address: "板桥双陆大王庄""观堂镇政府门口" poi: "观堂镇政府", address: "观堂镇政府"
"张村老镇政府院里" poi: "张村老镇政府", address: "张村老镇政府""客运中心门口" poi: "客运中心", address: "客运中心"
"孙湾火车站出站口" poi: "孙湾火车站", address: "孙湾火车站""车管所院内" poi: "车管所", address: "车管所"
"火车站西路口往北" poi: "火车站西路口", address: "火车站西路口"
address"桥南""东红绿灯""北头"poi"南加油站""加油站"aoi"庄周西区""西区"
5. (street)"街道""蜀山街道""三里庵街道"
(community)"社区""XX社区""社区""小区"AOI
POIPOI"超市""加油站""银行""学校""医院""政府""火车站""小区""民宿"POI
AOIAOI"小区""商场""园区""市场""广场""大厦"AOI
6. JSON{
"aoi": "匹配的AOI名称无则为空字符串",
"aoi_type": "AOI类型无则为空字符串",
"poi": "最可能的POI名称无有效POI则为空字符串",
"poi_type": "POI类型无则为空字符串",
"street": "街道名称(行政区划),无则为空字符串",
"community": "社区名称,无则为空字符串",
"address": "清洗后的核心地址,若无有效定位信息则为空字符串"
} JSONmarkdown
""";
// 启动迁移
public void startMigration(String xzqh, String startTime) {
log.info("参数xzqh={},startTime={}",xzqh,startTime);
@ -163,6 +237,99 @@ public class DataMigrationService {
log.info("✅ 数据迁移任务全部提交完成");
}
//涉黄警情
public void startShMigration() {
log.info("涉黄警情开始迁移数据...");
// 2. 分页读取源数据
LambdaQueryWrapper<ShJqd> jqlqw = new LambdaQueryWrapper<>();
jqlqw.le(ShJqd::getScbjsj,"2025-01-03 15:30:10")
.isNotNull(ShJqd::getSfdz)
.ne(ShJqd::getSfdz,"地址不详")
.ne(ShJqd::getSfdz,"地址未知");
long totalCount = shJqdMapper.selectCount(jqlqw);
log.info("涉黄查询数据大小={}",totalCount);
long totalPages = (totalCount + PAGE_SIZE - 1) / PAGE_SIZE;
PageQuery pageQuery = new PageQuery();
jqlqw.orderByDesc(ShJqd::getScbjsj);
pageQuery.setPageSize(PAGE_SIZE);
for (int pageNum = 0; pageNum < totalPages; pageNum++) {
long offset = pageNum * PAGE_SIZE;
pageQuery.setPageNum(pageNum);
log.info("正在处理涉黄第 {}/{} 页, offset={}", pageNum + 1, totalPages, offset);
List<ShJqd> pageData;
try {
IPage jqPage = shJqdMapper.selectPage(pageQuery.build(), jqlqw);
pageData = jqPage.getRecords();
} catch (Exception e) {
log.error("读取涉黄第 {} 页失败", pageNum, e);
continue;
}
if (pageData.isEmpty()) continue;
// 并发处理当前页
processShPageAsync(pageData);
}
log.info("✅ 涉黄数据迁移任务全部提交完成");
}
//涉赌警情
public void startSdMigration() {
log.info("涉赌警情开始迁移数据...");
// 2. 分页读取源数据
LambdaQueryWrapper<SdJqd> jqlqw = new LambdaQueryWrapper<>();
jqlqw.le(SdJqd::getScbjsj,"2025-01-01 14:01:41")
.isNotNull(SdJqd::getSfdz)
.ne(SdJqd::getSfdz,"地址不详")
.ne(SdJqd::getSfdz,"地址未知");
long totalCount = sdJqdMapper.selectCount(jqlqw);
log.info("涉赌查询数据大小={}",totalCount);
long totalPages = (totalCount + PAGE_SIZE - 1) / PAGE_SIZE;
PageQuery pageQuery = new PageQuery();
jqlqw.orderByDesc(SdJqd::getScbjsj);
pageQuery.setPageSize(PAGE_SIZE);
for (int pageNum = 0; pageNum < totalPages; pageNum++) {
long offset = pageNum * PAGE_SIZE;
pageQuery.setPageNum(pageNum);
log.info("正在处理涉赌第 {}/{} 页, offset={}", pageNum + 1, totalPages, offset);
List<SdJqd> pageData;
try {
IPage jqPage = sdJqdMapper.selectPage(pageQuery.build(), jqlqw);
pageData = jqPage.getRecords();
} catch (Exception e) {
log.error("读取涉赌第 {} 页失败", pageNum, e);
continue;
}
if (pageData.isEmpty()) continue;
// 并发处理当前页
processSdPageAsync(pageData);
}
log.info("✅ 涉赌数据迁移任务全部提交完成");
}
// 加载目标库中已存在的 name 到内存(避免重复插入)
private void loadExistingNamesToMemory() {
Long count = poiAddressMapper.selectCount(null);
@ -321,6 +488,96 @@ public class DataMigrationService {
});
}
//涉黄
private void processShPageAsync(List<ShJqd> pageData) {
log.info("✅ 提交异步任务,页数据量: {}", pageData.size());
CompletableFuture.runAsync(() -> {
try {
List<ShJqd> batch = new ArrayList<>();
for (ShJqd src : pageData) {
try {
String query = src.getSfdz();
if (StrUtil.isBlank(query)) continue;
log.debug("📡 正在调用第三方 API 查询: {}", query); // ✅ 加日志
// 调用接口获取解析结果
AddressParseResult result = parseAddressWithLLM(query);
if (result == null ) {
continue;
}
src.setAoi(result.getAoi());
src.setPoi(result.getPoi());
src.setAoiType(result.getAoiType());
src.setPoiType(result.getPoiType());
src.setStreet(result.getStreet());
src.setCommunity(result.getCommunity());
batch.add(src);
} catch (Exception e) {
log.error("处理地址失败: {}", src.getSfdz(), e);
}
}
shJqdMapper.updateBatchById(batch);
log.info("涉黄调用LLM模型修改数据: {}", batch.size());
}catch (Throwable t) {
// 🚨 捕获所有错误(包括 Error
log.error("❌ processPageAsync 任务内部发生致命错误", t);
}
}, executorService).exceptionally(throwable -> {
// 🚨 捕获 CompletableFuture 本身的异常
log.error("❌ CompletableFuture 执行失败", throwable);
return null;
});
}
private void processSdPageAsync(List<SdJqd> pageData) {
log.info("✅ 提交异步任务,页数据量: {}", pageData.size());
CompletableFuture.runAsync(() -> {
try {
List<SdJqd> batch = new ArrayList<>();
for (SdJqd src : pageData) {
try {
String query = src.getSfdz();
if (StrUtil.isBlank(query)) continue;
log.debug("📡 正在调用第三方 API 查询: {}", query); // ✅ 加日志
// 调用接口获取解析结果
AddressParseResult result = parseAddressWithLLM(query);
if (result == null ) {
continue;
}
src.setAoi(result.getAoi());
src.setPoi(result.getPoi());
src.setAoiType(result.getAoiType());
src.setPoiType(result.getPoiType());
src.setStreet(result.getStreet());
src.setCommunity(result.getCommunity());
batch.add(src);
} catch (Exception e) {
log.error("处理地址失败: {}", src.getSfdz(), e);
}
}
sdJqdMapper.updateBatchById(batch);
log.info("涉赌调用LLM模型修改数据: {}", batch.size());
}catch (Throwable t) {
// 🚨 捕获所有错误(包括 Error
log.error("❌ processPageAsync 任务内部发生致命错误", t);
}
}, executorService).exceptionally(throwable -> {
// 🚨 捕获 CompletableFuture 本身的异常
log.error("❌ CompletableFuture 执行失败", throwable);
return null;
});
}
// 调用第三方接口
private ParsedResult callThirdPartyApi(String query) {
try {
@ -521,11 +778,204 @@ public class DataMigrationService {
batch.clear();
}
@PreDestroy
public void shutdown() {
executorService.shutdown();
}
/**
*
*/
private void handleColumnAliases(Map<String, Integer> columnMap, String mainName, List<String> aliases) {
if (!columnMap.containsKey(mainName)) {
for (String alias : aliases) {
if (columnMap.containsKey(alias)) {
columnMap.put(mainName, columnMap.get(alias));
log.info("使用别名 '{}' 作为 '{}'", alias, mainName);
break;
}
}
}
}
/**
*
*/
private AddressParseResult parseAddressWithLLM(String address) {
try {
// 构造消息
JSONArray messages = new JSONArray();
messages.add(new JSONObject().set("role", "system").set("content", SYSTEM_PROMPT_TYPE));
messages.add(new JSONObject().set("role", "user").set("content", address));
// 构建请求体
JSONObject requestBody = new JSONObject()
.set("model", "Qwen3-32B")
.set("messages", messages)
.set("max_tokens", 3000)
.set("temperature", 0)
.set("seed",42)
.set("top_p", 0.9);
// 发送请求
String response = HttpUtil.createPost(apiUrl)
.header("Authorization", "Bearer " + bearerToken)
.header("Content-Type", "application/json")
.timeout(30000) // 增加超时时间
.body(requestBody.toString())
.execute()
.body();
log.debug("大模型响应: {}", response);
// 解析响应
JSONObject jsonResponse = JSONUtil.parseObj(response);
JSONArray choices = jsonResponse.getJSONArray("choices");
if (choices == null || choices.isEmpty()) {
log.warn("API返回无choices");
return null;
}
String content = choices.get(0, JSONObject.class)
.getJSONObject("message")
.getStr("content");
if (StrUtil.isBlank(content)) {
log.warn("content为空");
return null;
}
// 提取JSON
String cleanJson = extractPureJsonFromContentForExcel(content);
if (StrUtil.isBlank(cleanJson)) {
log.warn("无法提取有效JSON");
return null;
}
JSONObject resultJson = JSONUtil.parseObj(cleanJson);
return new AddressParseResult(
resultJson.getStr("aoi", ""),
resultJson.getStr("aoi_type", ""),
resultJson.getStr("poi", ""),
resultJson.getStr("poi_type", ""),
resultJson.getStr("street", ""),
resultJson.getStr("community", ""),
resultJson.getStr("address", "")
);
} catch (Exception e) {
log.error("调用大模型解析地址失败: {}", address, e);
return null;
}
}
/**
* ExcelJSON
*/
private String extractPureJsonFromContentForExcel(String content) {
if (StrUtil.isBlank(content)) return null;
// 1. 移除 <think>...</think>
content = content.replaceAll("(?s)<think>.*?</think>", "");
content = content.trim();
// 2. 尝试直接解析
if (JSONUtil.isJson(content)) {
return content;
}
// 3. 查找第一个 { 和最后一个 }
int start = content.indexOf('{');
int end = content.lastIndexOf('}');
if (start >= 0 && end > start) {
String candidate = content.substring(start, end + 1);
if (JSONUtil.isJson(candidate)) {
return candidate;
}
}
// 4. 尝试提取JSON对象
Pattern pattern = Pattern.compile("\\{(?:[^{}]|\\{(?:[^{}]|\\{[^{}]*\\})*\\})*\\}", Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
String candidate = matcher.group();
if (JSONUtil.isJson(candidate) &&
(candidate.contains("\"aoi\"") || candidate.contains("\"poi\"") ||
candidate.contains("\"aoi_type\"") || candidate.contains("\"poi_type\""))) {
return candidate;
}
}
return null;
}
/**
*
*/
private int getOrCreateColumn(Sheet sheet, Map<String, Integer> columnMap,
String columnName, String defaultColumn) {
if (columnMap.containsKey(columnName)) {
return columnMap.get(columnName);
}
// 创建新列
Row headerRow = sheet.getRow(0);
int newColIndex = headerRow.getLastCellNum();
Cell newHeaderCell = headerRow.createCell(newColIndex);
newHeaderCell.setCellValue(columnName);
columnMap.put(columnName, newColIndex);
return newColIndex;
}
/**
* JSON
*/
private String extractPureJsonFromContentnew(String content) {
// 使用原有的 extractPureJsonFromContent 方法
// 或者可以稍作修改以适应新的JSON结构
if (StrUtil.isBlank(content)) return null;
// 移除 <think>...</think>
content = content.replaceAll("(?s)<think>.*?</think>", "");
content = content.trim();
// 尝试直接解析
if (JSONUtil.isJson(content)) {
return content;
}
// 尝试提取 {...} 模式
Pattern pattern = Pattern.compile("\\{.*?\\}", Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
String candidate = matcher.group();
if (JSONUtil.isJson(candidate) &&
(candidate.contains("\"aoi\"") || candidate.contains("\"poi\""))) {
return candidate;
}
}
return null;
}
// 辅助类
private static class ParsedResult {
private final String name, aoi, poi, address;

View File

@ -0,0 +1,73 @@
package org.dromara.extract.util;
import lombok.Data;
import org.dromara.extract.domain.ExcelProcessResult;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;
@Data
public class ExcelProcessTask {
private String requestId;
private String filename;
private volatile String status; // 使用 volatile 确保多线程可见性
private volatile int progress; // 使用 volatile
private AtomicInteger totalRows = new AtomicInteger(0);
private AtomicInteger processedRows = new AtomicInteger(0); // 实际处理的行数(不含表头)
private AtomicInteger successCount = new AtomicInteger(0);
private AtomicInteger failCount = new AtomicInteger(0);
private String errorMessage;
private long startTime;
private long endTime;
private CompletableFuture<ExcelProcessResult> processFuture;
private boolean previewMode;
private boolean previewCompleted = false; // 预览是否已完成
private int previewRows; // 预览的行数限制
private int previewCompletedRows = 0; // 实际完成预览的行数
public ExcelProcessTask(String requestId, String filename) {
this.requestId = requestId;
this.filename = filename;
this.startTime = System.currentTimeMillis();
}
public long getElapsedTime() {
if (endTime == 0) {
return System.currentTimeMillis() - startTime;
}
return endTime - startTime;
}
// 提供原子操作来更新计数器
public void incrementProcessedRows(int count) {
this.processedRows.getAndAdd(count);
}
public void incrementSuccessCount(int count) {
this.successCount.getAndAdd(count);
}
public void incrementFailCount(int count) {
this.failCount.getAndAdd(count);
}
public int getProcessedRows() {
return this.processedRows.get();
}
public int getSuccessCount() {
return this.successCount.get();
}
public int getFailCount() {
return this.failCount.get();
}
public int getTotalRows() {
return this.totalRows.get();
}
public void setTotalRows(int totalRows) {
this.totalRows.set(totalRows);
}
}

View File

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.dromara.extract.mapper.SdJqdMapper">
<resultMap id="sdjqdResult" type="org.dromara.extract.domain.SdJqd">
</resultMap>
</mapper>

View File

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.dromara.extract.mapper.ShJqdMapper">
<resultMap id="shjqdResult" type="org.dromara.extract.domain.ShJqd">
</resultMap>
</mapper>