经常一起被购买的商品
如果大家买A商品通常也会买B商品,那么如果给买个A商品的人推荐B商品,会是一个好选择。下面来看一下这个逻辑是否合理。
代码
导入包
import cudf, gc
import cv2, matplotlib.pyplot as plt
from os.path import exists
print('RAPIDS version',cudf.__version__)
读取交易数据集
# LOAD TRANSACTIONS DATAFRAME
df = cudf.read_csv('../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv')
print('Transactions shape',df.shape)
display( df.head() )
# REDUCE MEMORY OF DATAFRAME
df = df[['customer_id','article_id']]
df.customer_id = df.customer_id.str[-16:].str.hex_to_int().astype('int64')
df.article_id = df.article_id.astype('int32')
_ = gc.collect()
# FIND ITEMS PURCHASED TOGETHER
vc = df.article_id.value_counts()
随机选32个商品,来看一下效果
# 取前3个最相关的article
pairs = {}
for j,i in enumerate(vc.index.values[1000:1032]):
# 如果取出买过这个article的user
USERS = df.loc[df.article_id==i.item(),'customer_id'].unique()
# 取出除了当前user外,其他的user买的article,倒排数量
vc2 = df.loc[(df.customer_id.isin(USERS))&(df.article_id!=i.item()),'article_id'].value_counts()
# 前3个
pairs[i.item()] = [vc2.index[0], vc2.index[1], vc2.index[2]]
plot
items = cudf.read_csv('../input/h-and-m-personalized-fashion-recommendations/articles.csv')
BASE = '../input/h-and-m-personalized-fashion-recommendations/images/'
for i,(k,v) in enumerate( pairs.items() ):
name1 = BASE+'0'+str(k)[:2]+'/0'+str(k)+'.jpg'
name2 = BASE+'0'+str(v[0])[:2]+'/0'+str(v[0])+'.jpg'
name3 = BASE+'0'+str(v[1])[:2]+'/0'+str(v[1])+'.jpg'
name4 = BASE+'0'+str(v[2])[:2]+'/0'+str(v[2])+'.jpg'
print(k)
if exists(name1) & exists(name2) & exists(name3) & exists(name4):
plt.figure(figsize=(20,5))
img1 = cv2.imread(name1)[:,:,::-1]
img2 = cv2.imread(name2)[:,:,::-1]
img3 = cv2.imread(name3)[:,:,::-1]
img4 = cv2.imread(name4)[:,:,::-1]
plt.subplot(1,4,1)
plt.title('When customers buy this',size=18)
plt.imshow(img1)
plt.subplot(1,4,2)
plt.title('They buy this',size=18)
plt.imshow(img2)
plt.subplot(1,4,3)
plt.title('They buy this',size=18)
plt.imshow(img3)
plt.subplot(1,4,4)
plt.title('They buy this',size=18)
plt.imshow(img4)
plt.show()
#if i==63: break