GP
Size: a a a
GP
GP
KM
KM
KM
scala> val as_df = not_as_df.map( v => v.split(";")).map( v => my_class_case_defined_outside_main(v(0), v(1).toLong, v(2).toLong, v(3).toLong, v.lift(4).getOrElse(null))).toDF
as_df: org.apache.spark.sql.DataFrame = [field_type: string, uid: bigint ... 3 more fields]
scala> as_df()
res4: Array[org.apache.spark.sql.Row] = Array([field_type,916480005,3475350150,3614970576,….
KM
KM
object MyApp {
val conf = new SparkConf()
.setIfMissing("spark.app.name", "MyApp")
.setIfMissing("spark.master", "yarn-client")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext._
import sqlContext.implicits._
case class my_class_case_defined_outside_main (field_type: String, uid: Long, souid: Long, suid: Long, duration: String)
def main(args: Array[String]) {
GP
KM
/bin/spark-shell --master yarn --deploy-mode client
GP
KM
KM
GP
GP
KM
GP
GP
KM
KM