Untitled

予早 2026-04-30 23:53:27
Categories: Tags:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

# 初始化Spark会话
if __name__ == '__main__':
    spark = SparkSession.builder.appName("FilterDataFrame").getOrCreate()

    # 创建示例数据
    data = [(4000, None), (4000, 5000)]
    df = spark.createDataFrame(data, ["num1", "num2"])

    # 打印原始数据
    df.show()

    # 过滤第一列数据大于第二列数据的行
    filtered_df = df.filter(col("num1") >= col("num2"))

    # 显示过滤后的数据
    filtered_df.show()