Create sample data:
from pyspark.sql import Row
x = [Row(col1="xx", col2="yy", col3="zz", col4=[123,234])]
rdd = sc.parallelize([Row(col1="xx", col2="yy", col3="zz", col4=[123,234])])
df = spark.createDataFrame(rdd)
df.show()
#+----+----+----+----------+
#|col1|col2|col3| col4|
#+----+----+----+----------+
#| xx| yy| zz|[123, 234]|
#+----+----+----+----------+
Use getItem
to extract element from the array column as this, in your actual case replace col4
with collect_set(TIMESTAMP)
:
df = df.withColumn("col5", df["col4"].getItem(1)).withColumn("col4", df["col4"].getItem(0))
df.show()
#+----+----+----+----+----+
#|col1|col2|col3|col4|col5|
#+----+----+----+----+----+
#| xx| yy| zz| 123| 234|
#+----+----+----+----+----+