GP
Size: a a a
GP
NN
С
С
С
l
from pyspark.sql.functions import pandas_udf, PandasUDFType
@pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)
def subtract_mean(pdf):
# pdf is a pandas.DataFrame
v = pdf.v
return pdf.assign(v=v - v.mean())
NN
GP
NN
NN
GP
NN
GP
GP
GP
GP
NN
GP