bveliqi
10/29/2017 - 9:47 PM

relationsship.scala

val relationships = df.select("id", "screenName", "friends")
  .withColumn("friends", regexp_replace('friends, "\\[ | ]", ""))
  .withColumn("friends", split('friends, "; "))
  .withColumn("friend_id", explode('friends))
  .withColumn("friend_id", regexp_replace('friend_id, "\"", ""))
  .withColumn("id", 'id.cast(LongType))
  .withColumn("friend_id", 'friend_id.cast(LongType))
  .drop("friends")