-
Notifications
You must be signed in to change notification settings - Fork 0
/
Model1FixtureDataScraper.java
72 lines (66 loc) · 2.19 KB
/
Model1FixtureDataScraper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
public class Model1FixtureDataScraper
{
ArrayList<Fixture> scrape (String league, int gamesAhead) throws IOException
{
ArrayList<Fixture> result = new ArrayList<Fixture>();
String url = "http://www.soccerstats.com/table.asp?league=" + league + "&tid=10";
Document doc = Jsoup.connect( url ).ignoreContentType( true ).timeout(0).get();
Element table = doc.select( "table#btable" ).first();
for ( Element row : table.select( "tr" ) )
{
Elements tds = row.select( "td" );
if (tds.size() > 1)
{
String teamUrl = "http://www.soccerstats.com/" + tds.get(1).select("a[href]").attr("href");
// System.out.println(tds.text());
scrapeTeamFixture(teamUrl, gamesAhead, result);
}
}
return result;
}
private void scrapeTeamFixture(String url, int gamesAhead, ArrayList<Fixture> result) throws IOException
{
Document doc = Jsoup.connect( url ).ignoreContentType( true ).timeout(0).get();
Elements tables = doc.select( "table" );
int gameCnt = 0;
for( Element table: tables )
{
if (table.select("tr").size() < 4)
{
continue;
}
Element a = table.select("tr").get(3);
if (!a.text().equals("Opponent PPG"))
{
continue;
}
for ( Element row : table.select( "tr" ) )
{
Elements tds = row.select( "td" );
if (tds.size() > 3)
{
gameCnt++;
// System.out.println(tds.text());
String date = tds.get(0).text();
String homeTeam = tds.get(1).text();
String awayTeam = tds.get(3).text();
Fixture f = new Fixture(date, homeTeam, awayTeam);
if (!result.contains(f))
{
result.add(f);
}
if (gameCnt == gamesAhead)
{
return;
}
}
}
}
}
}