collect_list( opposite of explode)
from pyspark.sql.functions import collect_list
from pyspark.sql import SparkSession
data1 = [
(1,"apple"),
(1,"banana"),
(1,"cherry"),
(2,"orange"),
(3,"cherry")
]
spark = SparkSession.builder.appName("StreamingExample").getOrCreate()
df_exploded = spark.createDataFrame(data1, ["id", "fruit"])
df_exploded.show()
df_exploded.groupBy("id").agg(collect_list("fruit").alias("fruits")).show()
Comments
Post a Comment