tidybins



library(tidybins)
suppressPackageStartupMessages(library(dplyr))

Bin Value

Binning by value is the only original binning method implemented in this package. It is inspired by the case in marketing when accounts need to be binned by their sales. For example, creating 10 bins, where each bin represent 10% of all market sales. The first bin contains the highest sales accounts, thus has the small total number of accounts, whereas the last bin contains the smallest sales accounts, thus requiring the most number of accounts per bin to reach 10% of the market sales.


tibble::tibble(SALES = as.integer(rnorm(1000L, mean = 10000L, sd = 3000))) -> sales_data

sales_data %>% 
  bin_cols(SALES, bin_type = "value") -> sales_data1
#> Warning: SALES contains negative values. Negative values are treated as 0.

sales_data1
#> # A tibble: 1,000 × 2
#>    SALES SALES_va10
#>    <int>      <int>
#>  1 10609          5
#>  2  8370          2
#>  3 10786          6
#>  4 12320          8
#>  5 11110          6
#>  6 10797          6
#>  7 10213          5
#>  8  7287          2
#>  9  9904          4
#> 10  6460          1
#> # ℹ 990 more rows

Notice that the sum is equal across bins.

sales_data1 %>% 
  bin_summary() %>% 
  print(width = Inf)
#> # A tibble: 11 × 14
#>    column method      n_bins .rank  .min  .mean  .max .count .uniques
#>    <chr>  <chr>        <int> <int> <int>  <dbl> <int>  <int>    <int>
#>  1 SALES  equal value     10    10 14348 15719. 19905     63       61
#>  2 SALES  equal value     10     9 13038 13574. 14339     73       69
#>  3 SALES  equal value     10     8 12160 12527. 13006     79       75
#>  4 SALES  equal value     10     7 11254 11722. 12153     84       81
#>  5 SALES  equal value     10     6 10719 10969. 11251     90       83
#>  6 SALES  equal value     10     5 10142 10433. 10714     95       84
#>  7 SALES  equal value     10     4  9350  9698. 10137    102       94
#>  8 SALES  equal value     10     3  8420  8905.  9345    111      108
#>  9 SALES  equal value     10     2  7194  7832.  8419    126      117
#> 10 SALES  equal value     10     1   760  5603.  7176    176      170
#> 11 SALES  equal value     10     0  -180  -180   -180      1        1
#>    relative_value   .sum   .med   .sd width
#>             <dbl>  <int>  <dbl> <dbl> <int>
#>  1         100    990276 15492  1147.  5557
#>  2          86.4  990874 13552   360.  1301
#>  3          79.7  989631 12503   265.   846
#>  4          74.6  984631 11662.  274.   899
#>  5          69.8  987232 10938.  148.   532
#>  6          66.4  991174 10423   181.   572
#>  7          61.7  989162  9668.  231.   787
#>  8          56.7  988447  8902   273.   925
#>  9          49.8  986795  7798.  367.  1225
#> 10          35.6  986058  6161  1506.  6416
#> 11          -1.15   -180  -180    NA      0