-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReza_Marzban_Program_2.scala
33 lines (26 loc) · 1.34 KB
/
Reza_Marzban_Program_2.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
// Reza Marzban
// Spark Batch processing
//Write a Spark job using the cube function to get the the average annual petroleum consumption in barrels for fuelType1 for each make of vehicle.
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.types.DoubleType
import org.apache.spark.sql.functions.desc
import org.apache.spark.sql.SparkSession
object Reza_Marzban_Program_2 {
def main(args:Array[String]){
val conf=new SparkConf().setAppName("Program Number 2")
val sc=new SparkContext(conf)
sc.setLogLevel("ERROR")
//creates Spark Session
val spark = SparkSession.builder().appName("Program Number 2").getOrCreate()
//input csv file address on HDFS server
val inputpath="hdfs://hdfs folder address"
//create a data frame with CSV input data
val OriginalDataFrame= spark.read.format("csv").option("inferScehma","true").option("header","true").option("mode","dropMalformed").load(inputpath)
// just get barrels08 and make column and convert barrels08 to double type
val cleanedDataFrame = OriginalDataFrame.select( OriginalDataFrame("barrels08").cast(DoubleType).as("barrels08"), OriginalDataFrame("make"))
//get average of each make with cube function
val result = cleanedDataFrame.cube("make").avg("barrels08").sort(desc("avg(barrels08)"))
//show top 150 rows
result.show(150,false)
}
}