介绍

基于上个baseline,这次新加的逻辑是越近期的交易越重要:

  • 如果最近1周有交易记录,则推荐最近一周买过的商品
  • 否则,如果最近2周有交易记录,则推荐最近2周买过的商品
  • 否则,如果最近3周有交易记录,则推荐最近3周买过的商品
  • 否则,推荐最火的12个商品

代码

导入包

import numpy as np
import pandas as pd

from pathlib import Path

data_path = Path('/kaggle/input/h-and-m-personalized-fashion-recommendations/')

交易数据集

transactions = pd.read_csv(
    data_path / 'transactions_train.csv',
    # set dtype or pandas will drop the leading '0' and convert to int
    dtype={'article_id': str} 
)

submission = pd.read_csv(data_path / 'sample_submission.csv')

取出日期

transactions['t_dat'] = pd.to_datetime(transactions['t_dat'])

分别取出3周,2周,1周的交易数据和高销量数据

purchase_dict_3w = {}

for i,x in enumerate(zip(transactions_3w['customer_id'], transactions_3w['article_id'])):
    cust_id, art_id = x
    if cust_id not in purchase_dict_3w:
        purchase_dict_3w[cust_id] = {}
    
    if art_id not in purchase_dict_3w[cust_id]:
        purchase_dict_3w[cust_id][art_id] = 0
    
    purchase_dict_3w[cust_id][art_id] += 1
    
print(len(purchase_dict_3w))

dummy_list_3w = list((transactions_3w['article_id'].value_counts()).index)[:12]
purchase_dict_2w = {}

for i,x in enumerate(zip(transactions_2w['customer_id'], transactions_2w['article_id'])):
    cust_id, art_id = x
    if cust_id not in purchase_dict_2w:
        purchase_dict_2w[cust_id] = {}
    
    if art_id not in purchase_dict_2w[cust_id]:
        purchase_dict_2w[cust_id][art_id] = 0
    
    purchase_dict_2w[cust_id][art_id] += 1
    
print(len(purchase_dict_2w))

dummy_list_2w = list((transactions_2w['article_id'].value_counts()).index)[:12]
purchase_dict_1w = {}

for i,x in enumerate(zip(transactions_1w['customer_id'], transactions_1w['article_id'])):
    cust_id, art_id = x
    if cust_id not in purchase_dict_1w:
        purchase_dict_1w[cust_id] = {}
    
    if art_id not in purchase_dict_1w[cust_id]:
        purchase_dict_1w[cust_id][art_id] = 0
    
    purchase_dict_1w[cust_id][art_id] += 1
    
print(len(purchase_dict_1w))

dummy_list_1w = list((transactions_1w['article_id'].value_counts()).index)[:12]

遍历1,2,3周的数据

not_so_fancy_but_fast_benchmark = submission[['customer_id']]
prediction_list = []

dummy_list = list((transactions_1w['article_id'].value_counts()).index)[:12]
dummy_pred = ' '.join(dummy_list)

for i, cust_id in enumerate(submission['customer_id'].values.reshape((-1,))):
    if cust_id in purchase_dict_1w:
        l = sorted((purchase_dict_1w[cust_id]).items(), key=lambda x: x[1], reverse=True)
        l = [y[0] for y in l]
        if len(l)>12:
            s = ' '.join(l[:12])
        else:
            s = ' '.join(l+dummy_list_1w[:(12-len(l))])
    elif cust_id in purchase_dict_2w:
        l = sorted((purchase_dict_2w[cust_id]).items(), key=lambda x: x[1], reverse=True)
        l = [y[0] for y in l]
        if len(l)>12:
            s = ' '.join(l[:12])
        else:
            s = ' '.join(l+dummy_list_2w[:(12-len(l))])
    elif cust_id in purchase_dict_3w:
        l = sorted((purchase_dict_3w[cust_id]).items(), key=lambda x: x[1], reverse=True)
        l = [y[0] for y in l]
        if len(l)>12:
            s = ' '.join(l[:12])
        else:
            s = ' '.join(l+dummy_list_3w[:(12-len(l))])
    else:
        s = dummy_pred
    prediction_list.append(s)

not_so_fancy_but_fast_benchmark['prediction'] = prediction_list
print(not_so_fancy_but_fast_benchmark.shape)
not_so_fancy_but_fast_benchmark.head()

提交

not_so_fancy_but_fast_benchmark.to_csv('not_so_fancy_but_fast_benchmark.csv', index=False)
最后修改:2022 年 04 月 01 日 10 : 48 PM
如果觉得我的文章对你有用,请随意赞赏