package combio.cassandra; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; public class DataParsing { private CassandraCreate cc = new CassandraCreate(); public void ParsingTest(String input) { System.out.println("Inserting....."); URL url, urltable; // int count = 0; // int countNotM = 0; // int countEr = 0; // int countEmp = 0; Calendar cal = Calendar.getInstance(); String date = cal.get(Calendar.YEAR) + "/" + cal.get(Calendar.MONTH) + "/" + cal.get(Calendar.DAY_OF_MONTH); try { url = new URL(input); URLConnection conn = url.openConnection(); BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream())); String line; while ((line = br.readLine()) != null) { if (line.matches(date + "(.*)jp_[^\\s]+")) { String[] table; table = line.split("\\s+"); // count++; if (!cc.CheckIP(table[table.length - 1])) { // countNotM++; urltable = new URL("http://www.compbio.dundee.ac.uk/www-jpred/results/" +table[table.length - 1]+"/"+table[table.length - 1]+".concise.fasta"); HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); if (httpConnection.getResponseCode() > 200) { // countEr++; continue; } try { BufferedReader br1 = new BufferedReader(new InputStreamReader(httpConnection.getInputStream())); String lineNext; String newProt = ""; String jnetpred = ""; while ((lineNext = br1.readLine()) != null) { if (lineNext.equals(">QUERY")) { while (!(lineNext = br1.readLine()).matches(">[^\\s]+")) newProt += lineNext; } else if (lineNext.equals(">jnetpred")) { while (!(lineNext = br1.readLine()).matches(">[^\\s]+")) jnetpred += lineNext; } } br1.close(); if (newProt.length() <= 1) { // countEmp++; continue; } SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); String dateInString1 = table[0].substring(0, table[0].indexOf(":")); long dateWork1 = 0; try { Date dat1 = formatter.parse(dateInString1); dateWork1 = dat1.getTime(); } catch (ParseException e) { e.printStackTrace(); } cc.InsertData(dateWork1, table[0], table[1], table[2], table[table.length - 1], "OK", "OK", newProt, jnetpred); } catch (IOException e) { // e.printStackTrace(); } // } } } } br.close(); // System.out.println("Match " + count); // System.out.println("Not Match " + countNotM); // System.out.println("Error " + countEr); // System.out.println("No protein " + countEmp); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }