G Appendix _ Dplyr to SQL translations

You may be interested in exactly how the DBI package translates R functions into their SQL quivalents – and in which functions are translated and which are not.
This Appendix answers those questions. It is based on the work of Dewey Dunnington (@paleolimbot) which he published here:

https://apps.fishandwhistle.net/archives/1503

https://rud.is/b/2019/04/10/lost-in-sql-translation-charting-dbplyr-mapped-sql-function-support-across-all-backends/

G.1 Overview

These packages are called below:

library(tidyverse)
library(dbplyr)
library(gt)
library(here)
library(sqlpetr)

list the DBI functions that are available:

names(sql_translate_env(simulate_dbi()))

##   [1] "-"               ":"               "!"               "!="             
##   [5] "("               "["               "[["              "{"              
##   [9] "*"               "/"               "&"               "&&"             
##  [13] "%%"              "%>%"             "%in%"            "^"              
##  [17] "+"               "<"               "<="              "=="             
##  [21] ">"               ">="              "|"               "||"             
##  [25] "$"               "abs"             "acos"            "as_date"        
##  [29] "as_datetime"     "as.character"    "as.Date"         "as.double"      
##  [33] "as.integer"      "as.integer64"    "as.logical"      "as.numeric"     
##  [37] "as.POSIXct"      "asin"            "atan"            "atan2"          
##  [41] "between"         "bitwAnd"         "bitwNot"         "bitwOr"         
##  [45] "bitwShiftL"      "bitwShiftR"      "bitwXor"         "c"              
##  [49] "case_when"       "ceil"            "ceiling"         "coalesce"       
##  [53] "cos"             "cosh"            "cot"             "coth"           
##  [57] "day"             "desc"            "exp"             "floor"          
##  [61] "hour"            "if"              "if_else"         "ifelse"         
##  [65] "is.na"           "is.null"         "log"             "log10"          
##  [69] "mday"            "minute"          "month"           "na_if"          
##  [73] "nchar"           "now"             "paste"           "paste0"         
##  [77] "pmax"            "pmin"            "qday"            "round"          
##  [81] "second"          "sign"            "sin"             "sinh"           
##  [85] "sql"             "sqrt"            "str_c"           "str_conv"       
##  [89] "str_count"       "str_detect"      "str_dup"         "str_extract"    
##  [93] "str_extract_all" "str_flatten"     "str_glue"        "str_glue_data"  
##  [97] "str_interp"      "str_length"      "str_locate"      "str_locate_all" 
## [101] "str_match"       "str_match_all"   "str_order"       "str_pad"        
## [105] "str_remove"      "str_remove_all"  "str_replace"     "str_replace_all"
## [109] "str_replace_na"  "str_sort"        "str_split"       "str_split_fixed"
## [113] "str_squish"      "str_sub"         "str_subset"      "str_to_lower"   
## [117] "str_to_title"    "str_to_upper"    "str_trim"        "str_trunc"      
## [121] "str_view"        "str_view_all"    "str_which"       "str_wrap"       
## [125] "substr"          "switch"          "tan"             "tanh"           
## [129] "today"           "tolower"         "toupper"         "trimws"         
## [133] "wday"            "xor"             "yday"            "year"           
## [137] "cume_dist"       "cummax"          "cummean"         "cummin"         
## [141] "cumsum"          "dense_rank"      "first"           "lag"            
## [145] "last"            "lead"            "max"             "mean"           
## [149] "median"          "min"             "min_rank"        "n"              
## [153] "n_distinct"      "nth"             "ntile"           "order_by"       
## [157] "percent_rank"    "quantile"        "rank"            "row_number"     
## [161] "sum"             "var"             "cume_dist"       "cummax"         
## [165] "cummean"         "cummin"          "cumsum"          "dense_rank"     
## [169] "first"           "lag"             "last"            "lead"           
## [173] "max"             "mean"            "median"          "min"            
## [177] "min_rank"        "n"               "n_distinct"      "nth"            
## [181] "ntile"           "order_by"        "percent_rank"    "quantile"       
## [185] "rank"            "row_number"      "sum"             "var"

sql_translate_env(simulate_dbi())

## <sql_variant>
## scalar:    -, :, !, !=, (, [, [[, {, *, /, &, &&, %%, %>%, %in%, ^, +,
## scalar:    <, <=, ==, >, >=, |, ||, $, abs, acos, as_date, as_datetime,
## scalar:    as.character, as.Date, as.double, as.integer, as.integer64,
## scalar:    as.logical, as.numeric, as.POSIXct, asin, atan, atan2,
## scalar:    between, bitwAnd, bitwNot, bitwOr, bitwShiftL, bitwShiftR,
## scalar:    bitwXor, c, case_when, ceil, ceiling, coalesce, cos, cosh,
## scalar:    cot, coth, day, desc, exp, floor, hour, if, if_else, ifelse,
## scalar:    is.na, is.null, log, log10, mday, minute, month, na_if,
## scalar:    nchar, now, paste, paste0, pmax, pmin, qday, round, second,
## scalar:    sign, sin, sinh, sql, sqrt, str_c, str_conv, str_count,
## scalar:    str_detect, str_dup, str_extract, str_extract_all,
## scalar:    str_flatten, str_glue, str_glue_data, str_interp,
## scalar:    str_length, str_locate, str_locate_all, str_match,
## scalar:    str_match_all, str_order, str_pad, str_remove,
## scalar:    str_remove_all, str_replace, str_replace_all,
## scalar:    str_replace_na, str_sort, str_split, str_split_fixed,
## scalar:    str_squish, str_sub, str_subset, str_to_lower, str_to_title,
## scalar:    str_to_upper, str_trim, str_trunc, str_view, str_view_all,
## scalar:    str_which, str_wrap, substr, switch, tan, tanh, today,
## scalar:    tolower, toupper, trimws, wday, xor, yday, year
## aggregate: cume_dist, cummax, cummean, cummin, cumsum, dense_rank,
## aggregate: first, lag, last, lead, max, mean, median, min, min_rank, n,
## aggregate: n_distinct, nth, ntile, order_by, percent_rank, quantile,
## aggregate: rank, row_number, sum, var
## window:    cume_dist, cummax, cummean, cummin, cumsum, dense_rank,
## window:    first, lag, last, lead, max, mean, median, min, min_rank, n,
## window:    n_distinct, nth, ntile, order_by, percent_rank, quantile,
## window:    rank, row_number, sum, var

source(here("book-src", "dbplyr-sql-function-translation.R"))

## Warning: The `.drop` argument of `unnest()` is deprecated as of tidyr 1.0.0.
## All list-columns are now preserved.
## This warning is displayed once per session.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Each of the following dbplyr back ends may have a slightly different translation:

translations %>%
  filter(!is.na(sql)) %>% count(variant)

## # A tibble: 11 x 2
##    variant      n
##    <chr>    <int>
##  1 access     193
##  2 dbi        183
##  3 hive       187
##  4 impala     190
##  5 mssql      196
##  6 mysql      194
##  7 odbc       186
##  8 oracle     184
##  9 postgres   204
## 10 sqlite     183
## 11 teradata   196

Only one postgres translation produces an output:

psql <- translations %>%
  filter(!is.na(sql), variant == "postgres") %>%
  select(r, n_args, sql) %>%
  arrange(r)

# sp_print_df(head(psql, n = 40))
sp_print_df(psql)