经常一起被购买的商品

如果大家买A商品通常也会买B商品,那么如果给买个A商品的人推荐B商品,会是一个好选择。下面来看一下这个逻辑是否合理。

代码

导入包

import cudf, gc
import cv2, matplotlib.pyplot as plt
from os.path import exists
print('RAPIDS version',cudf.__version__)

读取交易数据集

# LOAD TRANSACTIONS DATAFRAME
df = cudf.read_csv('../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv')
print('Transactions shape',df.shape)
display( df.head() )

# REDUCE MEMORY OF DATAFRAME
df = df[['customer_id','article_id']]
df.customer_id = df.customer_id.str[-16:].str.hex_to_int().astype('int64')
df.article_id = df.article_id.astype('int32')
_ = gc.collect()
# FIND ITEMS PURCHASED TOGETHER
vc = df.article_id.value_counts()

随机选32个商品,来看一下效果

# 取前3个最相关的article
pairs = {}
for j,i in enumerate(vc.index.values[1000:1032]):
    
    # 如果取出买过这个article的user
    USERS = df.loc[df.article_id==i.item(),'customer_id'].unique()
    
    # 取出除了当前user外,其他的user买的article,倒排数量
    vc2 = df.loc[(df.customer_id.isin(USERS))&(df.article_id!=i.item()),'article_id'].value_counts()
    
    # 前3个
    pairs[i.item()] = [vc2.index[0], vc2.index[1], vc2.index[2]]

plot

items = cudf.read_csv('../input/h-and-m-personalized-fashion-recommendations/articles.csv')
BASE = '../input/h-and-m-personalized-fashion-recommendations/images/'

for i,(k,v) in enumerate( pairs.items() ):
    name1 = BASE+'0'+str(k)[:2]+'/0'+str(k)+'.jpg'
    name2 = BASE+'0'+str(v[0])[:2]+'/0'+str(v[0])+'.jpg'
    name3 = BASE+'0'+str(v[1])[:2]+'/0'+str(v[1])+'.jpg'
    name4 = BASE+'0'+str(v[2])[:2]+'/0'+str(v[2])+'.jpg'
    print(k)
    if exists(name1) & exists(name2) & exists(name3) & exists(name4):
        plt.figure(figsize=(20,5))
        img1 = cv2.imread(name1)[:,:,::-1]
        img2 = cv2.imread(name2)[:,:,::-1]
        img3 = cv2.imread(name3)[:,:,::-1]
        img4 = cv2.imread(name4)[:,:,::-1]
        plt.subplot(1,4,1)
        plt.title('When customers buy this',size=18)
        plt.imshow(img1)
        plt.subplot(1,4,2)
        plt.title('They buy this',size=18)
        plt.imshow(img2)
        plt.subplot(1,4,3)
        plt.title('They buy this',size=18)
        plt.imshow(img3)
        plt.subplot(1,4,4)
        plt.title('They buy this',size=18)
        plt.imshow(img4)
        plt.show()
    #if i==63: break

最后修改:2022 年 03 月 31 日 07 : 37 PM
如果觉得我的文章对你有用,请随意赞赏