-
Notifications
You must be signed in to change notification settings - Fork 0
/
Model2DataScraper.java
92 lines (85 loc) · 3.44 KB
/
Model2DataScraper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Model2DataScraper
{
ArrayList<Object>[] scrape (String league) throws IOException
{
ArrayList<Object> teamData = new ArrayList<Object>();
ArrayList<Object> fixtureData = new ArrayList<Object>();
String url = "http://www.soccerstats.com/results.asp?league=" + league;
Document doc = Jsoup.connect( url ).ignoreContentType( true ).timeout(0).get();
Elements tables = doc.select( "table#btable" );
for ( int i = 0; i < tables.size(); i++ )
{
Element table = tables.get(i);
for ( Element row : table.select( "tr" ) )
{
Elements tds = row.select( "td" );
if (tds.size() > 1)
{
String score = tds.get(3).text();
if (score.contains(" - ") && Character.isDigit(score.charAt(0)))
{
String date = tds.get(0).text();
String time = tds.get(1).text();
String teams = tds.get(2).text();
String[] teamArr = teams.split(" - ");
String homeTeam = teamArr[0].substring(1);
String awayTeam = teamArr[1];
String[] teamScores = score.split(" - ");
int homeGoals = Integer.parseInt(teamScores[0]);
int awayGoals = Integer.parseInt(teamScores[1]);
Result r = new Result(date, time, homeTeam, awayTeam, homeGoals, awayGoals);
teamData.add(r);
}
else if (!score.equals("pp."))
{
String date = tds.get(0).text();
String time = tds.get(1).text();
String teams = tds.get(2).text();
String[] teamArr = teams.split(" - ");
String homeTeam = teamArr[0].substring(1);
String awayTeam = teamArr[1];
Fixture f = new Fixture(date, homeTeam, awayTeam);
fixtureData.add(f);
}
}
}
}
ArrayList<Object>[] result = new ArrayList[2];
result[0] = teamData;
result[1] = fixtureData;
return result;
}
public static void main(String[] args) throws IOException
{
String outputFile = "Model2Input.csv";
String outputFile2 = "Model2Fixtures.csv";
PrintWriter out = new PrintWriter(new FileWriter(outputFile));
out.println("Date,Team,Opponent,Goals,Home");
PrintWriter out2 = new PrintWriter(new FileWriter(outputFile2));
out2.println("Date,Team,Opponent,Home");
Model2DataScraper r = new Model2DataScraper();
ArrayList<Object>[] result = r.scrape("england");
for (Object o: result[0])
{
out.println(o);
out.flush();
}
for (Object o: result[1])
{
Fixture f = (Fixture)o;
out2.println(f.date + "," + f.homeTeam + "," + f.awayTeam + ",1");
out2.println(f.date + "," + f.awayTeam + "," + f.homeTeam + ",0");
out2.flush();
}
out.close();
out2.close();
}
}