我覺得像這樣的東西可能會爲你正在努力完成的工作。
library(dplyr)
# Create test dataframe
index <- c(0:19)
Order_id <- c(rep(001,8),rep(002,3),rep(003,4),rep(004,3),rep(005,2))
hours_delta <- c(720,552,rep(0,5),432,0,72,96,121,0,0,0,33,0,0,77,0)
df <- data.frame(index,Order_id,hours_delta)
# Start dplyr modifications
df <- df %>%
# Group data by Order_id
group_by(Order_id) %>%
# Get the number of repitions of 0 for in the hours_delta field for that Order_id
mutate(rle = ifelse(hours_delta == 0,rle(hours_delta)[[1]][rle(hours_delta)[[2]] == 0],NA),
# Set the row above a zero sequence to the number of repetitions
rle = ifelse(is.na(rle),lead(rle),rle)) %>%
# ungroup the data
ungroup() %>%
# Set the flags based on number of repetitions
mutate(flagger = case_when(is.na(.$rle)
~ "flag_4",
.$rle == 1
~ "flag_3",
(.$rle <= 3 & .$rle > 1)
~ "flag_2",
.$rle > 3
~ "flag_1"
)
) %>%
# Remove the temporary rle column
select(-rle)
如果在非零值之間有3個確切的零,該怎麼辦? – amonk
也可以請澄清*小於3和超過1 *的含義。就代數而言,它是[1,3],(1,3],[1,3]還是(1,3)? – amonk