【机器学习小案例篇】关于RFM模型的小案例
import pandas as pd
1. 导入数据
In [75]:
trad_flow = pd.read_csv(D:pythonScriptRFM_TRAD_FLOW.csv,encoding=gbk) #编码格式需要进行修改 trad_flow.head() #默认前五行
Out[75]:
2.计算 RFM
In [21]:
M = trad_flow.groupby([cumid,type])[[amount]].sum()
In [48]:
trains_M = pd.pivot_table(M,index=cumid,columns=type,values=amount) trains_M.head()
Out[48]:
In [47]:
F = trad_flow.groupby([cumid,type])[[transID]].count() F.head()
Out[47]:
In [46]:
R = trad_flow.groupby([cumid,type])[[time]].max() R.head()
Out[46]:
3.衡量客户对打折商品的偏好
In [53]:
trains_M[Special_offer] = trains_M[Special_offer].fillna(0) trains_M[Special_offer].head()
Out[53]:
cumid 10001 420.0 10002 0.0 10003 156.0 10004 373.0 10005 0.0 Name: Special_offer, dtype: float64
In [67]:
trains_M[spe_ratio] = trains_M[Special_offer]/(trains_M[Special_offer]+trains_M[Normal]) trains_M[spe_ratio].head()
Out[67]:
cumid 10001 0.104270 10002 0.000000 10003 0.042635 10004 0.111277 10005 0.000000 Name: spe_ratio, dtype: float64
In [68]:
M_rank = trains_M.sort_values(spe_ratio,ascending=False).head() M_rank.head()
Out[68]:
In [74]:
pd.qcut(M_rank[spe_ratio],4)
Out[74]:
cumid 10151 (0.387, 0.532] 40033 (0.374, 0.387] 40236 (0.333, 0.374] 30225 (0.308, 0.333] 20068 (0.308, 0.333] Name: spe_ratio, dtype: category Categories (4, interval[float64]): [(0.308, 0.333] < (0.333, 0.374] < (0.374, 0.387] < (0.387, 0.532]]
相关:
数据链接: