#library(plyr)
transaction_list_costumer <- plyr::ddply(basket,c("Member_number","Date"),
function(df1)paste(df1$itemDescription,
collapse = ","))
colnames(transaction_list_costumer) <- c("Member_number","Date","Baskets")
transaction_list_costumer
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# How many member
base::table(basket$Member_number) %>% length()
## [1] 3898
# 3898
# How many items
base::table(basket$itemDescription) %>% length()
## [1] 167
# 167
library(dplyr)
# How many transactions by Member, the data will be arranged by descending
basket %>% group_by(Member_number) %>%
arrange(Member_number) %>%
dplyr::summarise(N_operation=n()) %>%
arrange(desc(N_operation))
Costumers_10 <- basket %>% group_by(Member_number) %>%
arrange(Member_number) %>%
dplyr::summarise(N_operation=n()) %>%
arrange(desc(N_operation)) %>%
head(10) %>%
tibble::as.tibble()
Costumers_10
target <- c("3180", "2015", "3050", "3737", "2271", "2433", "2625", "3915", "3872", "2394")
transaction_list_costumer_10 <- transaction_list_costumer %>%
filter( Member_number %in% target)
transaction_list_costumer_10
#set column Member_number of dataframe transaction_list
transaction_list_costumer_10$Member_number <- NULL
#set column Date of dataframe transaction_list
transaction_list_costumer_10$Date <- NULL
#Rename column to Baskets
colnames(transaction_list_costumer_10) <- c("Baskets")
#Show Dataframe transactionData
transaction_list_costumer_10
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
# Write the Baskets list to a file
write.csv(transaction_list_costumer_10,"transactions_list_costumer_10.csv", quote = FALSE, row.names = TRUE)
# load the Baskets list as a basket format using arules package
tr_list_costumer_10 <- read.transactions('transactions_list_costumer_10.csv', format = 'basket', sep=',')
## Warning in asMethod(object): removing duplicated items in transactions
tr_list_costumer_10
## transactions in sparse format with
## 91 transactions (rows) and
## 168 items (columns)
summary(tr_list_costumer_10)
## transactions as itemMatrix in sparse format with
## 91 rows (elements/itemsets/transactions) and
## 168 columns (items) and a density of 0.0242674
##
## most frequent items:
## whole milk other vegetables yogurt rolls/buns
## 25 14 14 11
## soda (Other)
## 11 296
##
## element (itemset/transaction) length distribution:
## sizes
## 1 3 4 5 6 7 8 9 10
## 1 50 15 12 3 4 1 4 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 3.000 4.077 5.000 10.000
##
## includes extended item information - examples:
## labels
## 1 1
## 2 10
## 3 11
The summary shows that the baskets with 3 items are the most frequent: 50 transactions.
library(RColorBrewer)
#par(mfrow=c(2,1))
arules::itemFrequencyPlot(tr_list_costumer_10,topN=10,type="absolute",col=brewer.pal(8,'Pastel2'), main="Absolute Product Frequency Plot for the 10 most important Costumers")
arules::itemFrequencyPlot(tr_list_costumer_10,topN=10,type="relative",col=brewer.pal(8,'Pastel2'), main="Relative Product Frequency Plot for the 10 most important Costumers")
The whole milk
and other vegetables
remain at the first rank. The yogurt
transactions progress from 5th position to the 3rd. Also we observe a progress for Sausage
(9->6th), but a considerable regress of the root vegetables
from 6th to 10th position.
Arules
algothimes to predict Rules that associate items#Min Support as 0.0001, confidence as 0.9 and maximum of 3 (where ther the most number of transaction (itemset/transaction)) products.
association.rules_Costumer_10 <- arules::apriori(tr_list_costumer_10, parameter = list(supp=0.0001, conf=0.9, maxlen=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 1e-04 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 0
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[168 item(s), 91 transaction(s)] done [0.00s].
## sorting and recoding items ... [168 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10 done [0.00s].
## writing ... [17690 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
With 0.0001 support, 0.9 of confidence and 3 maximums items per basket, we obtain 17690 rules.
# summary rules of min Support as 0.0001, confidence as 0.8 and maximum of 10 products.
summary(association.rules_Costumer_10)
## set of 17690 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6 7 8 9 10
## 393 2264 4059 4620 3630 1932 656 126 10
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 4.000 5.000 5.014 6.000 10.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01099 Min. :1 Min. : 3.640 Min. :1.000
## 1st Qu.:0.01099 1st Qu.:1 1st Qu.: 8.273 1st Qu.:1.000
## Median :0.01099 Median :1 Median :22.750 Median :1.000
## Mean :0.01100 Mean :1 Mean :33.060 Mean :1.001
## 3rd Qu.:0.01099 3rd Qu.:1 3rd Qu.:45.500 3rd Qu.:1.000
## Max. :0.02198 Max. :1 Max. :91.000 Max. :2.000
##
## mining info:
## data ntransactions support confidence
## tr_list_costumer_10 91 1e-04 0.9
inspect(association.rules_Costumer_10[1:15])
## lhs rhs support confidence lift count
## [1] {53} => {ham} 0.01098901 1 45.500000 1
## [2] {53} => {sugar} 0.01098901 1 30.333333 1
## [3] {52} => {zwieback} 0.01098901 1 45.500000 1
## [4] {52} => {tropical fruit} 0.01098901 1 18.200000 1
## [5] {50} => {other vegetables} 0.01098901 1 6.500000 1
## [6] {50} => {rolls/buns} 0.01098901 1 8.272727 1
## [7] {46} => {butter} 0.01098901 1 30.333333 1
## [8] {46} => {waffles} 0.01098901 1 30.333333 1
## [9] {43} => {yogurt} 0.01098901 1 6.500000 1
## [10] {43} => {whole milk} 0.01098901 1 3.640000 1
## [11] {41} => {dessert} 0.01098901 1 22.750000 1
## [12] {41} => {root vegetables} 0.01098901 1 11.375000 1
## [13] {40} => {pork} 0.01098901 1 11.375000 1
## [14] {40} => {whole milk} 0.01098901 1 3.640000 1
## [15] {38} => {bottled water} 0.01098901 1 11.375000 1
The inspection shows that the most 10 important costumers got transactions with small basket mainly with 3 items.
There is no a complexe association between the 10 most important items.
The costumers bougth directly, mainly 3 items whitout running through supermarket department.
library(arulesViz)
## Loading required package: grid
# Filter rules with confidence greater than 0.4 or 40%
subRules_costumer_10<-association.rules_Costumer_10[quality(association.rules_Costumer_10)$confidence>0.4]
#Plot SubRules
plot(subRules_costumer_10)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(subRules_costumer_10,method="two-key plot")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
#top10subRules_costumer_10 <- head(subRules_costumer_10, n = 16, by = "confidence")
plot(subRules_costumer_10[1:20], method = "graph", engine = "htmlwidget")
Interpretation
The inspection shows that the most 10 important costumers got transactions with small basket mainly with 3 items.
There is no a complexe association between the 10 most important items.
The costumers bougth directly, mainly 3 items whitout running through supermarket department.
# Filter top 10 rules with highest lift
subRules2_costumer_10<-head(subRules_costumer_10, n=10, by="lift")
plot(subRules2_costumer_10, method="paracoord") #, control = list(reorder = TRUE))
## Warning in cbind(pl, pr): number of rows of result is not a multiple of
## vector length (arg 2)
** Interpretation$**
There is not particular significant rule for the 10 most important costumers.
The 10 most important costumers buy small basket (3 or 4 items) without running through supermarket department