标签搜索

目 录CONTENT

文章目录

组合查询5000w数据查票务组合

WP&CZH
2024-04-30 / 0 评论 / 4 点赞 / 413 阅读 / 1,918 字 / 正在检测是否收录...

组合查询5000w数据查票务组合

需求情况:现在要查询一张表的数据大概有5000w 数据 需要拿到这个5000w里面数据游客记录,然后算出里面的景区排列组合数量

需求1. 7天周期算一个组合

​ 2. 根据景区算出所有排列组合人数

解决思路:1.拿到这张表的个人信息 这张表只有一个有用的信息那就是身份证号用来辨别身份证

image-20240430154026062

​ 2.拿到这个身份证游客的最低游玩时间和最晚游玩时间

image-20240430154345262

-- pw_sxwl.report_cert_usage_summary definition

CREATE TABLE `report_cert_usage_summary` (
  `cert_no` varchar(18) NOT NULL,
  `min_use_time` datetime DEFAULT NULL,
  `max_use_time` datetime DEFAULT NULL,
  `status` int DEFAULT '0',
  PRIMARY KEY (`cert_no`),
  UNIQUE KEY `cert_usage_summary_un` (`cert_no`),
  KEY `report_cert_usage_summary_min_use_time_IDX` (`min_use_time`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

表已经好了那就需要提取数据了

CREATE DEFINER=`admin_pw_sxwl`@`%` PROCEDURE `pw_sxwl`.`TransferCertUsageData`()
begin
	
	    -- 插入数据之前先清空目标表,或者根据需要调整这部分逻辑
    TRUNCATE TABLE report_cert_usage_summary;

    -- 查询身份证的最小和最大使用时间,并插入到目标表中
    INSERT INTO report_cert_usage_summary (cert_no, min_use_time, max_use_time)
      SELECT 
        CERT_NO, 
        MIN(USE_TIME) AS min_use_time, 
        MAX(USE_TIME) AS max_use_time
    FROM 
        tkt_checked_dtl 
    WHERE 
 
        CERT_NO IS NOT NULL AND CERT_NO != '' AND LENGTH(CERT_NO) = 18  and use_time is not null and PARK_ID in('10001',
         '10009','10008',
                                    '10005')
    GROUP BY 
    CERT_NO;
    
   
      
END   

存储结构提取一下数据 只拿到一部分景点数据

image-20240430154546874

拿到以后就简单了 可以一个一个去遍历这些人从最早时间到最晚时间 把数据插入到一个新表里面

image-20240430154745525

-- pw_sxwl.report_combination definition

CREATE TABLE `report_combination` (
  `id` varchar(100) NOT NULL,
  `combination` varchar(100) DEFAULT NULL,
  `start_time` datetime DEFAULT NULL COMMENT '当前最后一周开始时间',
  `end_time` datetime DEFAULT NULL,
  `id_card` char(18) DEFAULT NULL,
  KEY `report_combination_id_card_IDX` (`id_card`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

这里的startTime是为了每次的新数据来了以后去同步新数据的 记录当前最后一个周期的时间2个目的

1.如果正好是最后一个周期的7天 那他的开始时间就要从这个周期开始时间➕一天

2.如果不是最后一个周期的7天 那就需要覆盖此数据 他的开始时间就是这个记录周期的开始时间 结束时间就是同步以后的结束时间

上代码

  public void sync() throws ParseException {
        int batchSize = 10000;  // 一次查询5万条数据
        int offset = 0;
        // 切换到目标数据源(写入数据)
        DynamicDataSourceContextHolder.setDataSourceType(DataSourceType.SLAVE.name());
//    ticketMapper.downData();
        while (true) {
            RowBounds rowBounds = new RowBounds(offset, batchSize);
            List<Base> batchData = ticketMapper.selectBaseData(rowBounds);
            if (batchData.isEmpty()) {
                break;
            }
//        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
//
//
//            Date maxUseTime = formatter.parse("2023-09-20 11:45:12");
//            Date minUseTime = formatter.parse("2023-07-20 11:45:12");
//        ArrayList<Base> batchData = new ArrayList<>();
//        Base base1 = new Base();
//        base1.setCert_no("005155196410146620");
//        base1.setMin_use_time(minUseTime);
//        base1.setMax_use_time(maxUseTime);
//        batchData.add(base1);


            batchData.forEach(base -> {
                Combination combination1 = ticketMapper.selectByCard(base.getCert_no());
                if (combination1 != null) {
                    if (base.getMax_use_time().equals(combination1.getEndTime())) {
                        System.out.println("==================" + base.getCert_no() + "此数据不需要更新");
                        return;
                    } else {
                        System.out.println("==================" + combination1 + "数据");
                        System.out.println("==================" + combination1.getEndTime() + "时间");
                        if (base.getMax_use_time().after(combination1.getEndTime())) {
                            int intervalDays = getIntervalDays(combination1.getEndTime(), combination1.getStartTime());
                            if (intervalDays < 7) {
                                LocalDateTime startDate = combination1.getStartTime().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
                                LocalDateTime endDate = base.getMax_use_time().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
                                int i = 0;
                                while (!startDate.isAfter(endDate)) {
                                    LocalDateTime nextDate = startDate.plusDays(7);
                                    // Ensure the query does not exceed the max time
//                    if (nextDate.isAfter(endDate)) {
//                        nextDate = endDate.plusDays(1); // to include the last day in the range
//                    }
                                    i = i + 1;
                                    Date start = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
                                    Date end = Date.from(nextDate.atZone(ZoneId.systemDefault()).toInstant());
                                    List<Ticket> tickets = ticketMapper.selectTicketListDay(base.getCert_no(), start, end);

                                    if (tickets != null && !tickets.isEmpty()) {
                                        StringBuffer stringBuffer = new StringBuffer();

//                                  使用流来收集和排序公园名称
                                        String sortedString = tickets.stream()
                                                .map(ticket -> ticketMapper.selectPark(ticket.getPARK_ID()).getPARK_NAME())
                                                .collect(Collectors.toCollection(TreeSet::new)) // 收集到TreeSet中自动去重和排序
                                                .stream()
                                                .collect(Collectors.joining(",")); // 使用逗号将排序后的名称连接成字符串

                                        Combination combination = new Combination();
                                        combination.setId(IdUtils.simpleUUID());
                                        combination.setIdCard(base.getCert_no());
                                        combination.setCombination(sortedString);
                                        System.out.println(startDate.plusDays(7));
                                        if (i == 1) {
                                            combination.setId(combination1.getId());
                                            combination.setCombination(sortedString);
                                            System.out.println("==================" + base.getCert_no() + "更新身份证id");
                                            System.out.println("==================" + combination1.getId() + "更新id号");
                                            exportTicketMapper.update(combination);
                                        } else {
                                            if (nextDate.isAfter(endDate) || startDate.plusDays(7).equals(endDate)) {
//                            System.out.println("最后一个周期"+nextDate);
                                                System.out.println("最后一个周期" + startDate);
                                                System.out.println("最后一个周期" + endDate);
                                                Date startTime = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
                                                Date endTime = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
                                                combination.setStartTime(startTime);
                                                combination.setEndTime(endTime);
                                                exportTicketMapper.insert(combination);
                                            } else {
                                                exportTicketMapper.insert(combination);

                                            }
                                        }


                                    }
                                    startDate = nextDate; // Move to the next period
                                }
                            } else {
                                LocalDateTime startDate1 = combination1.getStartTime().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
                                LocalDateTime startDate = startDate1.plusDays(1);
                                LocalDateTime endDate = base.getMax_use_time().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();

                                while (!startDate.isAfter(endDate)) {
                                    LocalDateTime nextDate = startDate.plusDays(7);
                                    // Ensure the query does not exceed the max time
//                    if (nextDate.isAfter(endDate)) {
//                        nextDate = endDate.plusDays(1); // to include the last day in the range
//                    }

                                    Date start = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
                                    Date end = Date.from(nextDate.atZone(ZoneId.systemDefault()).toInstant());
                                    List<Ticket> tickets = new ArrayList<>();

                                    if (nextDate.isAfter(endDate)) {
                                        Date ends = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
                                        tickets = ticketMapper.selectTicketListDay(base.getCert_no(), start, ends);
                                    } else {
                                        tickets = ticketMapper.selectTicketList(base.getCert_no(), start, end);
                                    }


                                    if (tickets != null && !tickets.isEmpty()) {
                                        StringBuffer stringBuffer = new StringBuffer();

//                     使用流来收集和排序公园名称
                                        String sortedString = tickets.stream()
                                                .map(ticket -> ticketMapper.selectPark(ticket.getPARK_ID()).getPARK_NAME())
                                                .collect(Collectors.toCollection(TreeSet::new)) // 收集到TreeSet中自动去重和排序
                                                .stream()
                                                .collect(Collectors.joining(",")); // 使用逗号将排序后的名称连接成字符串

                                        Combination combination = new Combination();
                                        combination.setId(IdUtils.simpleUUID());
                                        combination.setIdCard(base.getCert_no());
                                        combination.setCombination(sortedString);
                                        System.out.println(startDate.plusDays(7));
                                        if (nextDate.isAfter(endDate) || startDate.plusDays(7).equals(endDate)) {
//                            System.out.println("最后一个周期"+nextDate);
                                            System.out.println("最后一个周期" + startDate);
                                            System.out.println("最后一个周期" + endDate);
                                            Date startTime = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
                                            Date endTime = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
                                            combination.setStartTime(startTime);
                                            combination.setEndTime(endTime);
                                            exportTicketMapper.insert(combination);
                                        } else {
                                            exportTicketMapper.insert(combination);

                                        }


                                    }
                                    startDate = nextDate; // Move to the next period
                                }
                            }
                        }
                    }
                } else {
                    LocalDateTime startDate = base.getMin_use_time().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
                    LocalDateTime endDate = base.getMax_use_time().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();

                    while (!startDate.isAfter(endDate)) {
                        LocalDateTime nextDate = startDate.plusDays(7);
                        // Ensure the query does not exceed the max time
//                    if (nextDate.isAfter(endDate)) {
//                        nextDate = endDate.plusDays(1); // to include the last day in the range
//                    }

                        Date start = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
                        Date end = Date.from(nextDate.atZone(ZoneId.systemDefault()).toInstant());
                        List<Ticket> tickets = new ArrayList<>();

                        if (nextDate.isAfter(endDate)) {
                            Date ends = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
                            tickets = ticketMapper.selectTicketListDay(base.getCert_no(), start, ends);
                        } else {
                            tickets = ticketMapper.selectTicketList(base.getCert_no(), start, end);
                        }


                        if (tickets != null && !tickets.isEmpty()) {
                            StringBuffer stringBuffer = new StringBuffer();

//                     使用流来收集和排序公园名称
                            String sortedString = tickets.stream()
                                    .map(ticket -> ticketMapper.selectPark(ticket.getPARK_ID()).getPARK_NAME())
                                    .collect(Collectors.toCollection(TreeSet::new)) // 收集到TreeSet中自动去重和排序
                                    .stream()
                                    .collect(Collectors.joining(",")); // 使用逗号将排序后的名称连接成字符串

                            Combination combination = new Combination();
                            combination.setId(IdUtils.simpleUUID());
                            combination.setIdCard(base.getCert_no());
                            combination.setCombination(sortedString);
                            System.out.println(startDate.plusDays(7));
                            if (nextDate.isAfter(endDate) || startDate.plusDays(7).equals(endDate)) {
//                            System.out.println("最后一个周期"+nextDate);
                                System.out.println("最后一个周期" + startDate);
                                System.out.println("最后一个周期" + endDate);
                                Date startTime = Date.from(startDate.atZone(ZoneId.systemDefault()).toInstant());
                                Date endTime = Date.from(endDate.atZone(ZoneId.systemDefault()).toInstant());
                                combination.setStartTime(startTime);
                                combination.setEndTime(endTime);
                                exportTicketMapper.insert(combination);
                            } else {
                                exportTicketMapper.insert(combination);

                            }


                        }
                        startDate = nextDate; // Move to the next period
                    }
                }


            });

            // 增加偏移量准备下一批数据
            offset += batchSize;

            // 清理数据源设置,回到默认数据源或为下一次循环准备
//            DynamicDataSourceContextHolder.clearDataSourceType();
        }
    }

这里一次读取一w用户遍历 可以根据实际情况去调整

我们看最后结果

这里因为只是测试用户我只用了三个景点去测试组合

4

评论区