Browse Source

下载图片数据防止重复下载

祝雷 2 years ago
parent
commit
7b254aa90b

+ 244 - 1
tongfei_river_data_collection/src/main/java/com/ublinkage/datacollection/service/SatelliteRadarImageCollectionTask.java

@@ -1,6 +1,7 @@
 package com.ublinkage.datacollection.service;
 
 import lombok.extern.slf4j.Slf4j;
+import org.apache.tomcat.util.buf.StringUtils;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.select.Elements;
@@ -10,6 +11,12 @@ import org.springframework.stereotype.Service;
 
 import java.io.*;
 import java.net.URL;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.List;
 
 /**
  * @author Yang Huang
@@ -26,7 +33,11 @@ public class SatelliteRadarImageCollectionTask {
      */
     @Value("${download.file.path}")
     private String downloadFilePath;
-
+    /**
+     * 去重文件路径
+     */
+    @Value("${duplicateFile.path}")
+    private String duplicateFilePath;
     /**
      * 50分钟一次
      */
@@ -53,6 +64,14 @@ public class SatelliteRadarImageCollectionTask {
             Elements HYS = doc.getElementsByClass("col-xs-12 time ");
             Elements e = doc.getElementsByClass("col-xs-12 time actived");
             HYS.add(e.get(0));
+            File file = new File(duplicateFilePath);
+            if (!file.exists()) {
+                file.createNewFile(); // 创建新文件,有同名的文件的话直接覆盖
+            }
+            FileOutputStream fos = new FileOutputStream(file, true);
+            OutputStreamWriter osw = new OutputStreamWriter(fos);
+            BufferedWriter bw = new BufferedWriter(osw);
+            BufferedReader reader = null;
             for (int i = 0; i < HYS.size(); i++) {
                 Elements a = HYS.get(i).getElementsByAttribute("data-img");
                 String imgUrl = a.attr("data-img");
@@ -67,9 +86,31 @@ public class SatelliteRadarImageCollectionTask {
                 String minutePlist = dtr.substring(10, 12);
                 String fileName = "IMAGE,fyhw," + yearPlist + "_" + monthPlist + "_" + dayPlist + "_" + timePlist + "_" + minutePlist + "_00.JPG";
                 log.info("fileName:" + fileName);
+                reader = new BufferedReader(new FileReader(
+                        "/Users/zhulei/test/test.txt"));
+                String line = reader.readLine();
+                int temp = 0;
+                while (line != null) {
+                    if(line.equals(fileName)){
+                        temp = temp+1;
+                        line = null;
+                    }else {
+                        line = reader.readLine();
+                    }
+                }
+                if(temp>0){
+                    continue;
+                }
+                bw.write(fileName);
+                bw.newLine();
+                bw.flush();
                 String saveFile = downloadFilePath + fileName;
                 downloadPicture(imgUrl, saveFile);
             }
+            bw.close();
+            osw.close();
+            fos.close();
+            reader.close();
         } catch (IOException e) {
             e.printStackTrace();
         }
@@ -86,6 +127,14 @@ public class SatelliteRadarImageCollectionTask {
             Elements HYS = doc.getElementsByClass("col-xs-12 time ");
             Elements e = doc.getElementsByClass("col-xs-12 time actived");
             HYS.add(e.get(0));
+            File file = new File(duplicateFilePath);
+            if (!file.exists()) {
+                file.createNewFile(); // 创建新文件,有同名的文件的话直接覆盖
+            }
+            FileOutputStream fos = new FileOutputStream(file, true);
+            OutputStreamWriter osw = new OutputStreamWriter(fos);
+            BufferedWriter bw = new BufferedWriter(osw);
+            BufferedReader reader = null;
             for (int i = 0; i < HYS.size(); i++) {
                 Elements a = HYS.get(i).getElementsByAttribute("data-img");
                 String imgUrl = a.attr("data-img");
@@ -100,9 +149,31 @@ public class SatelliteRadarImageCollectionTask {
                 String minutePlist = dtr.substring(10, 12);
                 String fileName = "IMAGE,fysq," + yearPlist + "_" + monthPlist + "_" + dayPlist + "_" + timePlist + "_" + minutePlist + "_00.JPG";
                 log.info("fileName:" + fileName);
+                reader = new BufferedReader(new FileReader(
+                        "/Users/zhulei/test/test.txt"));
+                String line = reader.readLine();
+                int temp = 0;
+                while (line != null) {
+                    if(line.equals(fileName)){
+                        temp = temp+1;
+                        line = null;
+                    }else {
+                        line = reader.readLine();
+                    }
+                }
+                if(temp>0){
+                    continue;
+                }
+                bw.write(fileName);
+                bw.newLine();
+                bw.flush();
                 String saveFile = downloadFilePath + fileName;
                 downloadPicture(imgUrl, saveFile);
             }
+            bw.close();
+            osw.close();
+            fos.close();
+            reader.close();
         } catch (IOException e) {
             e.printStackTrace();
         }
@@ -119,6 +190,14 @@ public class SatelliteRadarImageCollectionTask {
             Elements HYS = doc.getElementsByClass("col-xs-12 time ");
             Elements e = doc.getElementsByClass("col-xs-12 time actived");
             HYS.add(e.get(0));
+            File file = new File(duplicateFilePath);
+            if (!file.exists()) {
+                file.createNewFile(); // 创建新文件,有同名的文件的话直接覆盖
+            }
+            FileOutputStream fos = new FileOutputStream(file, true);
+            OutputStreamWriter osw = new OutputStreamWriter(fos);
+            BufferedWriter bw = new BufferedWriter(osw);
+            BufferedReader reader = null;
             for (int i = 0; i < HYS.size(); i++) {
                 Elements a = HYS.get(i).getElementsByAttribute("data-img");
                 String imgUrl = a.attr("data-img");
@@ -133,9 +212,31 @@ public class SatelliteRadarImageCollectionTask {
                 String minutePlist = dtr.substring(10, 12);
                 String fileName = "IMAGE,fy2g," + yearPlist + "_" + monthPlist + "_" + dayPlist + "_" + timePlist + "_" + minutePlist + "_00.JPG";
                 log.info("fileName:" + fileName);
+                reader = new BufferedReader(new FileReader(
+                        "/Users/zhulei/test/test.txt"));
+                String line = reader.readLine();
+                int temp = 0;
+                while (line != null) {
+                    if(line.equals(fileName)){
+                        temp = temp+1;
+                        line = null;
+                    }else {
+                        line = reader.readLine();
+                    }
+                }
+                if(temp>0){
+                    continue;
+                }
+                bw.write(fileName);
+                bw.newLine();
+                bw.flush();
                 String saveFile = downloadFilePath + fileName;
                 downloadPicture(imgUrl, saveFile);
             }
+            bw.close();
+            osw.close();
+            fos.close();
+            reader.close();
         } catch (IOException e) {
             e.printStackTrace();
         }
@@ -152,6 +253,14 @@ public class SatelliteRadarImageCollectionTask {
             Elements HYS = doc.getElementsByClass("col-xs-12 time ");
             Elements e = doc.getElementsByClass("col-xs-12 time actived");
             HYS.add(e.get(0));
+            File file = new File(duplicateFilePath);
+            if (!file.exists()) {
+                file.createNewFile(); // 创建新文件,有同名的文件的话直接覆盖
+            }
+            FileOutputStream fos = new FileOutputStream(file, true);
+            OutputStreamWriter osw = new OutputStreamWriter(fos);
+            BufferedWriter bw = new BufferedWriter(osw);
+            BufferedReader reader = null;
             for (int i = 0; i < HYS.size(); i++) {
                 Elements a = HYS.get(i).getElementsByAttribute("data-img");
                 String imgUrl = a.attr("data-img");
@@ -166,9 +275,31 @@ public class SatelliteRadarImageCollectionTask {
                 String minutePlist = dtr.substring(10, 12);
                 String fileName = "IMAGE,ldpt," + yearPlist + "_" + monthPlist + "_" + dayPlist + "_" + timePlist + "_" + minutePlist + "_00.JPG";
                 log.info("fileName:" + fileName);
+                reader = new BufferedReader(new FileReader(
+                        "/Users/zhulei/test/test.txt"));
+                String line = reader.readLine();
+                int temp = 0;
+                while (line != null) {
+                    if(line.equals(fileName)){
+                        temp = temp+1;
+                        line = null;
+                    }else {
+                        line = reader.readLine();
+                    }
+                }
+                if(temp>0){
+                    continue;
+                }
+                bw.write(fileName);
+                bw.newLine();
+                bw.flush();
                 String saveFile = downloadFilePath + fileName;
                 downloadPicture(imgUrl, saveFile);
             }
+            bw.close();
+            osw.close();
+            fos.close();
+            reader.close();
         } catch (IOException e) {
             e.printStackTrace();
         }
@@ -202,5 +333,117 @@ public class SatelliteRadarImageCollectionTask {
         }
     }
 
+//    public static void main(String[] args) {
+//        log.info("下载全国雷达拼图图片");
+//        String url = "http://www.nmc.cn/publish/radar/chinaall.html";
+//        //String url = "http://www.nmc.cn/publish/satellite/FY4A-infrared.htm";
+//       // String url = "http://www.nmc.cn/publish/satellite/FY4A-water-vapour.htm";
+//       // String url = "http://www.nmc.cn/publish/satellite/fy2e/water_vapor.html";
+//        try {
+//            Document doc = Jsoup.connect(url).timeout(10000).get();
+//            Elements HYS = doc.getElementsByClass("col-xs-12 time ");
+//            Elements e = doc.getElementsByClass("col-xs-12 time actived");
+//            HYS.add(e.get(0));
+//            File file = new File(duplicateFilePath);
+//            if (!file.exists()) {
+//                file.createNewFile(); // 创建新文件,有同名的文件的话直接覆盖
+//                }
+//            FileOutputStream fos = new FileOutputStream(file, true);
+//            OutputStreamWriter osw = new OutputStreamWriter(fos);
+//            BufferedWriter bw = new BufferedWriter(osw);
+//            BufferedReader reader = null;
+//            for (int i = 0; i < HYS.size(); i++) {
+//                Elements a = HYS.get(i).getElementsByAttribute("data-img");
+//                String imgUrl = a.attr("data-img");
+//                log.info("imgUrl:" + imgUrl);
+//                String[] timeStrList = imgUrl.split("_");
+//                String timeStr = timeStrList[timeStrList.length - 1];
+//                String dtr = timeStr.substring(0, 12);
+//                String yearPlist = dtr.substring(0, 4);
+//                String monthPlist = dtr.substring(4, 6);
+//                String dayPlist = dtr.substring(6, 8);
+//                String timePlist = dtr.substring(8, 10);
+//                String minutePlist = dtr.substring(10, 12);
+//                String fileName = "IMAGE,ldpt," + yearPlist + "_" + monthPlist + "_" + dayPlist + "_" + timePlist + "_" + minutePlist + "_00.JPG";
+//                log.info("fileName:" + fileName);
+//                reader = new BufferedReader(new FileReader(
+//                        "/Users/zhulei/test/test.txt"));
+//                String line = reader.readLine();
+//                int temp = 0;
+//                while (line != null) {
+//                        if(line.equals(fileName)){
+//                            temp = temp+1;
+//                            line = null;
+//                        }else {
+//                            line = reader.readLine();
+//                        }
+//                }
+//                if(temp>0){
+//                    continue;
+//                }
+//                bw.write(fileName);
+//                bw.newLine();
+//                bw.flush();
+//            }
+//            bw.close();
+//            osw.close();
+//            fos.close();
+//            reader.close();
+//        } catch (FileNotFoundException e) {
+//            e.printStackTrace();
+//        } catch (IOException e) {
+//            e.printStackTrace();
+//        }
+//    }
+    /**
+     * 0晨6分去重
+     */
+    @Scheduled(cron = "0 6 0 * * ? ")
+    public  void duplicate() {
+        try {
+            File file = new File(duplicateFilePath);
+            if (!file.exists()) {
+               return;// 创建新文件,有同名的文件的话直接覆盖
+            }
+            BufferedReader reader = new BufferedReader(new FileReader(
+                    "/Users/zhulei/test/test.txt"));
+            List<String> list = new ArrayList<>();
+            String line = reader.readLine();
+            while (line != null) {
+                String temp = line.substring(line.indexOf(",",line.indexOf(",")+1)+1, line.lastIndexOf("."));
+                SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy_MM_dd_HH_mm_ss");
+                Date oldDate = sdf1.parse(temp);
+                Calendar aCalendar = Calendar.getInstance();
+                aCalendar.setTime(oldDate);
+                int day1 = aCalendar.get(Calendar.DAY_OF_YEAR);
+                aCalendar.setTime(new Date());
+                int day2 = aCalendar.get(Calendar.DAY_OF_YEAR);
+                if (day2 - day1 < 2) {
+                    list.add(line);
+                }
+                line = reader.readLine();
+            }
+            file.delete();
+            file.createNewFile();
+            FileOutputStream fos = new FileOutputStream(file, true);
+            OutputStreamWriter osw = new OutputStreamWriter(fos);
+            BufferedWriter bw = new BufferedWriter(osw);
+            for (int i = 0; i < list.size(); i++) {
+                bw.write(list.get(i));
+                bw.newLine();
+                bw.flush();
+            }
+            bw.close();
+            osw.close();
+            fos.close();
+            reader.close();
+        } catch (FileNotFoundException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (ParseException e) {
+            e.printStackTrace();
+        }
+    }
 
 }

+ 2 - 0
tongfei_river_data_collection/src/main/resources/application-sky.properties

@@ -7,6 +7,8 @@ snow.qzk.file=/media/sky/73b9d852-878a-4b11-a82f-249a237fa9e7/sky/LCJ/SnowCoverP
 download.file.path=/usr/local/LCJ/data/receivedata/
 
 
+duplicateFile.path=/Users/zhulei/test/test.txt
+
 yb.startTime=2022_12_29_00_00_00
 yb.endTime=2022_12_30_00_00_00