Sunday, 2 June 2019

Parse data from unstructured text file in java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
package main.java;
 
import java.io.File;
import java.io.FileNotFoundException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
 
public class ParseFile {
  
 public static final Pattern VALID_EMAIL_ADDRESS_REGEX = Pattern.compile("[a-z.0-9-]{1,30}@[a-z0-9-]{1,65}.[a-z]{1,}", Pattern.CASE_INSENSITIVE);
 public static final Pattern VALID_URL_REGEX = Pattern.compile("(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]", Pattern.CASE_INSENSITIVE);
 public static final Pattern VALID_DATE_REGEX = Pattern.compile("[0-3]?[0-9]/[0-3]?[0-9]/(?:[0-9]{2})?[0-9]{2}|[0-3]?[0-9]-[0-3]?[0-9]-(?:[0-9]{2})?[0-9]{2}", Pattern.CASE_INSENSITIVE);
 public static final Pattern VALID_TIME_REGEX = Pattern.compile("([2][0-3]|[0-1][0-9]|[1-9]).[0-5][0-9].([0-5][0-9]|[6][0])", Pattern.CASE_INSENSITIVE);
 
 public static void main(String[] args)
 {
  String dir = System.getProperty("user.dir");
  File file = new File(dir + "\\src\\main\\resources\\Test-RegexText.txt");
  List<string> emailList = new ArrayList<string>();
  List<string> urlList = new ArrayList<string>();
  List<string> dateList = new ArrayList<string>();
  List<string> timeList = new ArrayList<string>();
  Scanner sc;     
  Matcher matcher = null;
       
  try
  {
   sc = new Scanner(file);
   while (sc.hasNextLine()){
    matcher = VALID_EMAIL_ADDRESS_REGEX.matcher(sc.nextLine());
    if(matcher.find())
    {
     emailList.add(matcher.group());
    }
   }
    
   sc = new Scanner(file);
   while (sc.hasNextLine())
   {
    matcher = VALID_URL_REGEX.matcher(sc.nextLine());
    if(matcher.find())
    {
     urlList.add(matcher.group());
    }
   }
    
   sc = new Scanner(file);
   while (sc.hasNextLine())
   {
    matcher = VALID_DATE_REGEX.matcher(sc.nextLine());
    if(matcher.find())
    {
     dateList.add(matcher.group());
    }
   }
    
   sc = new Scanner(file);
   while (sc.hasNextLine())
   {
    matcher = VALID_TIME_REGEX.matcher(sc.nextLine());
    if(matcher.find())
    
     final SimpleDateFormat sdf = new SimpleDateFormat("HH.mm.ss");
     try
     {
      timeList.add(new SimpleDateFormat("K:mm a").format(sdf.parse(matcher.group())));
     }
     catch (ParseException e)
     {
       
     }
    }
  }
    
   emailList.forEach((value)->System.out.println("Email : " + value));
   urlList.forEach((value)->System.out.println("Url : " + value));
   dateList.forEach((value)->System.out.println("Date : " + value));
   timeList.forEach((value)->System.out.println("Time : " + value));
    
  }
  catch (FileNotFoundException e)
  {
   e.printStackTrace();
  }
 }
}
<!-- Code End -->
</string></string></string></string></string></string></string></string>
// download full source code from here.
Download Now

No comments:

Post a Comment