组合查询5000w数据查票务组合
需求情况:现在要查询一张表的数据大概有5000w 数据 需要拿到这个5000w里面数据游客记录,然后算出里面的景区排列组合数量
需求1. 7天周期算一个组合
2. 根据景区算出所有排列组合人数
解决思路:1.拿到这张表的个人信息 这张表只有一个有用的信息那就是身份证号用来辨别身份证
2.拿到这个身份证游客的最低游玩时间和最晚游玩时间
-- pw_sxwl.report_cert_usage_summary definition
CREATE TABLE `report_cert_usage_summary` (
`cert_no` varchar(18) NOT NULL,
`min_use_time` datetime DEFAULT NULL,
`max_use_time` datetime DEFAULT NULL,
`status` int DEFAULT '0',
PRIMARY KEY (`cert_no`),
UNIQUE KEY `cert_usage_summary_un` (`cert_no`),
KEY `report_cert_usage_summary_min_use_time_IDX` (`min_use_time`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
表已经好了那就需要提取数据了
CREATE DEFINER=`admin_pw_sxwl`@`%` PROCEDURE `pw_sxwl`.`TransferCertUsageData`()
begin
-- 插入数据之前先清空目标表,或者根据需要调整这部分逻辑
TRUNCATE TABLE report_cert_usage_summary;
-- 查询身份证的最小和最大使用时间,并插入到目标表中
INSERT INTO report_cert_usage_summary (cert_no, min_use_time, max_use_time)
SELECT
CERT_NO,
MIN(USE_TIME) AS min_use_time,
MAX(USE_TIME) AS max_use_time
FROM
tkt_checked_dtl
WHERE
CERT_NO IS NOT NULL AND CERT_NO != '' AND LENGTH(CERT_NO) = 18 and use_time is not null and PARK_ID in('10001',
'10009','10008',
'10005')
GROUP BY
CERT_NO;
END
存储结构提取一下数据 只拿到一部分景点数据
拿到以后就简单了 可以一个一个去遍历这些人从最早时间到最晚时间 把数据插入到一个新表里面
-- pw_sxwl.report_combination definition
CREATE TABLE `report_combination` (
`id` varchar(100) NOT NULL,
`combination` varchar(100) DEFAULT NULL,
`start_time` datetime DEFAULT NULL COMMENT '当前最后一周开始时间',
`end_time` datetime DEFAULT NULL,
`id_card` char(18) DEFAULT NULL,
KEY `report_combination_id_card_IDX` (`id_card`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
这里的startTime是为了每次的新数据来了以后去同步新数据的 记录当前最后一个周期的时间2个目的
1.如果正好是最后一个周期的7天 那他的开始时间就要从这个周期开始时间➕一天
2.如果不是最后一个周期的7天 那就需要覆盖此数据 他的开始时间就是这个记录周期的开始时间 结束时间就是同步以后的结束时间
上代码
public void sync() throws ParseException {
int batchSize = 10000; // 一次查询5万条数据
int offset = 0;
// 切换到目标数据源(写入数据)
DynamicDataSourceContextHolder.setDataSourceType(DataSourceType.SLAVE.name());
// ticketMapper.downData();
while (true) {
RowBounds rowBounds = new RowBounds(offset, batchSize);
List<Base> batchData = ticketMapper.selectBaseData(rowBounds);
if (batchData.isEmpty()) {
break;
}
// SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
//
//
// Date maxUseTime = formatter.parse("2023-09-20 11:45:12");
// Date minUseTime = formatter.parse("2023-07-20 11:45:12");
// ArrayList<Base> batchData = new ArrayList<>();
// Base base1 = new Base();
// base1.setCert_no("005155196410146620");
// base1.setMin_use_time(minUseTime);
// base1.setMax_use_time(maxUseTime);
// batchData.add(base1);
batchData.forEach(base -> {
Combination combination1 = ticketMapper.selectByCard(base.getCert_no());
if (combination1 != null) {
if (base.getMax_use_time().equals(combination1.getEndTime())) {
System.out.println("==================" + base.getCert_no() + "此数据不需要更新");
return;
} else {
System.out.println("==================" + combination1 + "数据");
System.out.println("==================" + combination1.getEndTime() + "时间");
if (base.getMax_use_time().after(combination1.getEndTime())) {
int intervalDays = getIntervalDays(combination1.getEndTime(), combination1.getStartTime());
if (intervalDays < 7) {
LocalDateTime startDate = combination1.getStartTime().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
LocalDateTime endDate = base.getMax_use_time().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
int i = 0;
while (!startDate.isAfter(endDate)) {
LocalDateTime nextDate = startDate.plusDays(7);
// Ensure the query does not exceed the max time
// if (nextDate.isAfter(endDate)) {
// nextDate = endDate.plusDays(1); // to include the last day in the range
// }
i = i + 1;
Date start = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
Date end = Date.from(nextDate.atZone(ZoneId.systemDefault()).toInstant());
List<Ticket> tickets = ticketMapper.selectTicketListDay(base.getCert_no(), start, end);
if (tickets != null && !tickets.isEmpty()) {
StringBuffer stringBuffer = new StringBuffer();
// 使用流来收集和排序公园名称
String sortedString = tickets.stream()
.map(ticket -> ticketMapper.selectPark(ticket.getPARK_ID()).getPARK_NAME())
.collect(Collectors.toCollection(TreeSet::new)) // 收集到TreeSet中自动去重和排序
.stream()
.collect(Collectors.joining(",")); // 使用逗号将排序后的名称连接成字符串
Combination combination = new Combination();
combination.setId(IdUtils.simpleUUID());
combination.setIdCard(base.getCert_no());
combination.setCombination(sortedString);
System.out.println(startDate.plusDays(7));
if (i == 1) {
combination.setId(combination1.getId());
combination.setCombination(sortedString);
System.out.println("==================" + base.getCert_no() + "更新身份证id");
System.out.println("==================" + combination1.getId() + "更新id号");
exportTicketMapper.update(combination);
} else {
if (nextDate.isAfter(endDate) || startDate.plusDays(7).equals(endDate)) {
// System.out.println("最后一个周期"+nextDate);
System.out.println("最后一个周期" + startDate);
System.out.println("最后一个周期" + endDate);
Date startTime = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
Date endTime = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
combination.setStartTime(startTime);
combination.setEndTime(endTime);
exportTicketMapper.insert(combination);
} else {
exportTicketMapper.insert(combination);
}
}
}
startDate = nextDate; // Move to the next period
}
} else {
LocalDateTime startDate1 = combination1.getStartTime().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
LocalDateTime startDate = startDate1.plusDays(1);
LocalDateTime endDate = base.getMax_use_time().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
while (!startDate.isAfter(endDate)) {
LocalDateTime nextDate = startDate.plusDays(7);
// Ensure the query does not exceed the max time
// if (nextDate.isAfter(endDate)) {
// nextDate = endDate.plusDays(1); // to include the last day in the range
// }
Date start = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
Date end = Date.from(nextDate.atZone(ZoneId.systemDefault()).toInstant());
List<Ticket> tickets = new ArrayList<>();
if (nextDate.isAfter(endDate)) {
Date ends = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
tickets = ticketMapper.selectTicketListDay(base.getCert_no(), start, ends);
} else {
tickets = ticketMapper.selectTicketList(base.getCert_no(), start, end);
}
if (tickets != null && !tickets.isEmpty()) {
StringBuffer stringBuffer = new StringBuffer();
// 使用流来收集和排序公园名称
String sortedString = tickets.stream()
.map(ticket -> ticketMapper.selectPark(ticket.getPARK_ID()).getPARK_NAME())
.collect(Collectors.toCollection(TreeSet::new)) // 收集到TreeSet中自动去重和排序
.stream()
.collect(Collectors.joining(",")); // 使用逗号将排序后的名称连接成字符串
Combination combination = new Combination();
combination.setId(IdUtils.simpleUUID());
combination.setIdCard(base.getCert_no());
combination.setCombination(sortedString);
System.out.println(startDate.plusDays(7));
if (nextDate.isAfter(endDate) || startDate.plusDays(7).equals(endDate)) {
// System.out.println("最后一个周期"+nextDate);
System.out.println("最后一个周期" + startDate);
System.out.println("最后一个周期" + endDate);
Date startTime = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
Date endTime = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
combination.setStartTime(startTime);
combination.setEndTime(endTime);
exportTicketMapper.insert(combination);
} else {
exportTicketMapper.insert(combination);
}
}
startDate = nextDate; // Move to the next period
}
}
}
}
} else {
LocalDateTime startDate = base.getMin_use_time().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
LocalDateTime endDate = base.getMax_use_time().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
while (!startDate.isAfter(endDate)) {
LocalDateTime nextDate = startDate.plusDays(7);
// Ensure the query does not exceed the max time
// if (nextDate.isAfter(endDate)) {
// nextDate = endDate.plusDays(1); // to include the last day in the range
// }
Date start = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
Date end = Date.from(nextDate.atZone(ZoneId.systemDefault()).toInstant());
List<Ticket> tickets = new ArrayList<>();
if (nextDate.isAfter(endDate)) {
Date ends = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
tickets = ticketMapper.selectTicketListDay(base.getCert_no(), start, ends);
} else {
tickets = ticketMapper.selectTicketList(base.getCert_no(), start, end);
}
if (tickets != null && !tickets.isEmpty()) {
StringBuffer stringBuffer = new StringBuffer();
// 使用流来收集和排序公园名称
String sortedString = tickets.stream()
.map(ticket -> ticketMapper.selectPark(ticket.getPARK_ID()).getPARK_NAME())
.collect(Collectors.toCollection(TreeSet::new)) // 收集到TreeSet中自动去重和排序
.stream()
.collect(Collectors.joining(",")); // 使用逗号将排序后的名称连接成字符串
Combination combination = new Combination();
combination.setId(IdUtils.simpleUUID());
combination.setIdCard(base.getCert_no());
combination.setCombination(sortedString);
System.out.println(startDate.plusDays(7));
if (nextDate.isAfter(endDate) || startDate.plusDays(7).equals(endDate)) {
// System.out.println("最后一个周期"+nextDate);
System.out.println("最后一个周期" + startDate);
System.out.println("最后一个周期" + endDate);
Date startTime = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
Date endTime = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
combination.setStartTime(startTime);
combination.setEndTime(endTime);
exportTicketMapper.insert(combination);
} else {
exportTicketMapper.insert(combination);
}
}
startDate = nextDate; // Move to the next period
}
}
});
// 增加偏移量准备下一批数据
offset += batchSize;
// 清理数据源设置,回到默认数据源或为下一次循环准备
// DynamicDataSourceContextHolder.clearDataSourceType();
}
}
这里一次读取一w用户遍历 可以根据实际情况去调整
我们看最后结果
这里因为只是测试用户我只用了三个景点去测试组合
评论区