-
Notifications
You must be signed in to change notification settings - Fork 0
/
MLSDirect.java
151 lines (138 loc) · 5.24 KB
/
MLSDirect.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.BufferedReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
/**
* Writes out MLS teams' rosters for each season, including players' minutes and
* goals scored among other things.
*
* @author Gautam Sarkar
* @version Sep 7, 2016
*/
public class MLSDirect
{
/**
* Writes out the info to a csv file.
*/
static FileWriter writer = null;
/**
* Main method.
*
* @param args
* not used
* @throws Exception
*/
public static void main( String args[] ) throws Exception
{
writer = new FileWriter( "MLSSoccerSiteData.csv" );
writer.write( "Team,Year,Player,POS,GP,GS,MINS,G,A,SHTS,SOG,GWG,PKG/A,HmG,RdG,G/90min,SC%" );
writer.append( '\n' );
for ( int season = 2007; season <= 2016; season++ )
{
getData( season );
}
}
/**
* Scrapes data for the given season.
*
* @param season
* @throws IOException
*/
public static void getData( int season ) throws IOException
{
List<Object[]> list = new ArrayList<Object[]>();
list.add( new Object[] { "Chicago Fire", 1207 } );
list.add( new Object[] { "Chivas USA", 2079 } );
list.add( new Object[] { "Colorado Rapids", 436 } );
list.add( new Object[] { "Columbus Crew SC", 454 } );
list.add( new Object[] { "D.C. United", 1326 } );
list.add( new Object[] { "FC Dallas", 1903 } );
list.add( new Object[] { "Houston Dynamo", 1897 } );
list.add( new Object[] { "LA Galaxy", 1230 } );
list.add( new Object[] { "Miami Fusion", 2 } );
list.add( new Object[] { "Montreal Impact", 1616 } );
list.add( new Object[] { "New England Revolution", 928 } );
list.add( new Object[] { "New York City", 9668 } );
list.add( new Object[] { "New York Red Bulls", 399 } );
list.add( new Object[] { "Orlando City SC", 6900 } );
list.add( new Object[] { "Philadelphia Union", 5513 } );
list.add( new Object[] { "Portland Timbers", 1581 } );
list.add( new Object[] { "Real Salt Lake", 1899 } );
list.add( new Object[] { "San Jose Earthquakes", 1131 } );
list.add( new Object[] { "Seattle Sounders FC", 3500 } );
list.add( new Object[] { "Sporting Kansas City", 421 } );
list.add( new Object[] { "Tampa Bay Mutiny", 1 } );
list.add( new Object[] { "Toronto FC", 2077 } );
list.add( new Object[] { "Vancouver Whitecaps FC", 1708 } );
for ( int cnt = 0; cnt < list.size(); cnt++ )
{
getNumPlayers( list.get( cnt ), season, 0 );
}
}
/**
* Scrapes the tables for a given team id, season, and page number
* @param team the team's name and id
* @param season the year
* @param time the number of times this method was accessed before
* @throws IOException
*/
public static void getNumPlayers( Object[] team, int season, int time ) throws IOException
{
String urlString = "http://www.mlssoccer.com/stats/season?franchise=" + team[1] + "&year=" + season
+ "&season_type=REG&group=goals&op=Search&form_id=mp7_stats_hub_build_filter_form&sort=desc&order=MINS";
if ( time > 0 )
{
urlString += "&page=" + time;
}
// System.out.println( urlString );
URL url = new URL( urlString );
URLConnection con = url.openConnection();
BufferedReader in = new BufferedReader( new InputStreamReader( con.getInputStream() ) );
String inputLine;
String result = "";
while ( ( inputLine = in.readLine() ) != null )
{
result += inputLine;
}
Document doc = Jsoup.parse( result );
Element table = doc.select( "table.responsive.no-more-tables.season_stats" ).first();
boolean firstRow = false;
for ( Element row : table.select( "tr" ) )
{
Elements tds = row.select( "td" );
if ( tds.size() > 0 )
{
String csvStr = team[0] + "," + season + ",";
if ( firstRow )
{
csvStr += tds.get( 0 ).text() + ",";
System.out.println( tds.get( 0 ).attr( "a[href]" ) );
for ( int cnt = 1; cnt < tds.size(); cnt++ )
{
csvStr += tds.get( cnt ).text() + ",";
}
writer.write( csvStr );
writer.append( '\n' );
}
else
{
firstRow = true;
}
}
}
//System.out.println( team[0] + " " + season );
Elements nextPage = doc.select( "a[title=Go to next page]" );
if ( nextPage.size() > 0 )
{
getNumPlayers( team, season, time + 1 );
}
}
}