Sunday, 2 June 2019

Parse data from unstructured text file in java

package main.java;

import java.io.File;
import java.io.FileNotFoundException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ParseFile {
 
 public static final Pattern VALID_EMAIL_ADDRESS_REGEX = Pattern.compile("[a-z.0-9-]{1,30}@[a-z0-9-]{1,65}.[a-z]{1,}", Pattern.CASE_INSENSITIVE);
 public static final Pattern VALID_URL_REGEX = Pattern.compile("(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]", Pattern.CASE_INSENSITIVE);
 public static final Pattern VALID_DATE_REGEX = Pattern.compile("[0-3]?[0-9]/[0-3]?[0-9]/(?:[0-9]{2})?[0-9]{2}|[0-3]?[0-9]-[0-3]?[0-9]-(?:[0-9]{2})?[0-9]{2}", Pattern.CASE_INSENSITIVE);
 public static final Pattern VALID_TIME_REGEX = Pattern.compile("([2][0-3]|[0-1][0-9]|[1-9]).[0-5][0-9].([0-5][0-9]|[6][0])", Pattern.CASE_INSENSITIVE);

 public static void main(String[] args)
 {
  String dir = System.getProperty("user.dir");
  File file = new File(dir + "\\src\\main\\resources\\Test-RegexText.txt");
  List emailList = new ArrayList();
  List urlList = new ArrayList();
  List dateList = new ArrayList();
  List timeList = new ArrayList();
  Scanner sc;      
  Matcher matcher = null;
      
  try 
  {
   sc = new Scanner(file);
   while (sc.hasNextLine()){
    matcher = VALID_EMAIL_ADDRESS_REGEX.matcher(sc.nextLine());
    if(matcher.find())
    {
     emailList.add(matcher.group());
    }
   }
   
   sc = new Scanner(file);
   while (sc.hasNextLine())
   {
    matcher = VALID_URL_REGEX.matcher(sc.nextLine());
    if(matcher.find())
    {
     urlList.add(matcher.group());
    }
   }
   
   sc = new Scanner(file);
   while (sc.hasNextLine())
   {
    matcher = VALID_DATE_REGEX.matcher(sc.nextLine());
    if(matcher.find())
    {
     dateList.add(matcher.group());
    }
   }
   
   sc = new Scanner(file);
   while (sc.hasNextLine())
   {
    matcher = VALID_TIME_REGEX.matcher(sc.nextLine());
    if(matcher.find())
    {  
     final SimpleDateFormat sdf = new SimpleDateFormat("HH.mm.ss");
     try 
     {
      timeList.add(new SimpleDateFormat("K:mm a").format(sdf.parse(matcher.group())));
     } 
     catch (ParseException e) 
     {
      
     }
    }
  } 
   
   emailList.forEach((value)->System.out.println("Email : " + value));
   urlList.forEach((value)->System.out.println("Url : " + value));
   dateList.forEach((value)->System.out.println("Date : " + value));
   timeList.forEach((value)->System.out.println("Time : " + value));
   
  }
  catch (FileNotFoundException e) 
  {
   e.printStackTrace();
  }
 }
}
 
// download full source code from here.
Download Now

Eid Mubarak

Asalam -u- Alikum,
To all of my blog readers.
Eid Mubarak to you and your family.

Monday, 18 June 2018

Ramadan Kareem Mubarak

Asalam -u- Alikum,
To all of my blog readers.
Ramadan Mubarak to you and your family.
May the great Allah shower your path with light and knowledge. 
May this month be an enlightening celebration to all of us.
Enjoy the long lasting blessings of Allah.
Ramadan Mubarak.