-
Notifications
You must be signed in to change notification settings - Fork 0
/
regresion_SGD_Singlenton
51 lines (40 loc) · 1.96 KB
/
regresion_SGD_Singlenton
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.regression.LinearRegressionModel
import org.apache.spark.mllib.regression.LinearRegressionWithSGD
import org.apache.spark.mllib.classification.{LogisticRegressionModel, LogisticRegressionWithLBFGS}
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.evaluation.MulticlassMetrics
val data = sc.textFile("hour.csv")
val filterRDD = data.zipWithIndex().collect { case (r, i) if i != 0 => r }
val labeledPoints = filterRDD.map {line =>
val valores = line.split(',').map(_.toDouble)
val features = Vectors.dense(valores.init)
val label = valores.last
LabeledPoint(label, features)
}
labeledPoints.cache
val numIterations = 100
val stepSize = 0.0001
// train a model (using singleton object)
val modelSGD = LinearRegressionWithSGD.train(labeledPoints, numIterations, stepSize)
// check the model parameters
val intercept = modelSGD.intercept
val weights = modelSGD.weights
// get actual and predicted label for each observation in the training set
val observedAndPredictedLabels = labeledPoints.map { observation =>
val predictedLabel = modelSGD.predict(observation.features)
(observation.label, predictedLabel)
}
// calculate square of difference between predicted and actual label for each observation
val squaredErrors = observedAndPredictedLabels.map{case(actual, predicted) =>
math.pow((actual - predicted), 2)
}
// calculate the mean of squared errors.
val meanSquaredError = squaredErrors.mean()
//utilizando la clase RegressionMetrics: creo una instancia
import org.apache.spark.mllib.evaluation.RegressionMetrics
val regressionMetrics2 = new RegressionMetrics(observedAndPredictedLabels)
val MSE = regressionMetrics2.meanSquaredError
val R2 = regressionMetrics2.r2