collect_list( opposite of explode)

 from pyspark.sql.functions import collect_list

from pyspark.sql import SparkSession

data1 = [

    (1,"apple"),

    (1,"banana"),

    (1,"cherry"),

    (2,"orange"),

    (3,"cherry")

]


spark = SparkSession.builder.appName("StreamingExample").getOrCreate()

df_exploded = spark.createDataFrame(data1, ["id", "fruit"])

df_exploded.show()


df_exploded.groupBy("id").agg(collect_list("fruit").alias("fruits")).show()

Comments

Popular posts from this blog

sql question set 1

SCD Type 2 in MySQL/ coforge

3 days running average / capco