Query Generation

John Mount

2018-02-10

The primary purpose of rquery is SQL query generation. We demonstrate this below.

scale <- 0.237

dq <- d %.>%
  extend_nse(.,
             probability :=
               exp(assessmentTotal * scale)/
               sum(exp(assessmentTotal * scale)),
             count := count(1),
             partitionby = 'subjectID') %.>%
  extend_nse(.,
             rank := rank(),
             partitionby = 'subjectID',
             orderby = c('probability', 'surveyCategory'))  %.>%
  rename_columns(., 'diagnosis' := 'surveyCategory') %.>%
  select_rows_nse(., rank == count) %.>%
  select_columns(., c('subjectID', 
                      'diagnosis', 
                      'probability')) %.>%
  orderby(., 'subjectID')

class(my_db)
#> [1] "SQLiteConnection"
#> attr(,"package")
#> [1] "RSQLite"

sql <- to_sql(dq, db = my_db, source_limit = 1000)

to_sql() SQL:

  SELECT * FROM (
   SELECT
    `subjectID`,
    `diagnosis`,
    `probability`
   FROM (
    SELECT * FROM (
     SELECT
      `count` AS `count`,
      `probability` AS `probability`,
      `rank` AS `rank`,
      `subjectID` AS `subjectID`,
      `surveyCategory` AS `diagnosis`
     FROM (
      SELECT
       `count`,
       `probability`,
       `subjectID`,
       `surveyCategory`,
       rank ( ) OVER (  PARTITION BY `subjectID` ORDER BY `probability`, `surveyCategory` ) AS `rank`
      FROM (
       SELECT
        `subjectID`,
        `surveyCategory`,
        `assessmentTotal`,
        exp ( `assessmentTotal` * 0.237 ) / sum ( exp ( `assessmentTotal` * 0.237 ) ) OVER (  PARTITION BY `subjectID` ) AS `probability`,
        count ( 1 ) OVER (  PARTITION BY `subjectID` ) AS `count`
       FROM (
        SELECT
         `d`.`subjectID`,
         `d`.`surveyCategory`,
         `d`.`assessmentTotal`
        FROM
         `d` LIMIT 1000
        ) tsql_33378296177050285131_0000000000
       ) tsql_33378296177050285131_0000000001
     ) tsql_33378296177050285131_0000000002
    ) tsql_33378296177050285131_0000000003
    WHERE `rank` = `count`
   ) tsql_33378296177050285131_0000000004
  ) tsql_33378296177050285131_0000000005 ORDER BY `subjectID`