分组语句

标准Group By

SmartESQL 支持如下标准的Group By子句：

    -- show items in P with average quantity > 320
    SELECT DISTINCT SP.pid FROM SP GROUP BY SP.pid, SP.jid HAVING avg(SP.qty) > 320;
     
    -- show items in S with links to P2
    SELECT DISTINCT S.sname FROM S,SP WHERE S.sid = SP.sid AND SP.pid = 'P2' GROUP BY S.sname;
     
    -- show Weight in Grams for red and blue items in P with quantity > 200 and sum > 350 with text in output
    SELECT P.pid, 'Grams = ' AS TEXT1, P.weight * 454 AS GMWET, P.color, 'Max Qty = ' AS TEXT2,
    max(qty) as MQY FROM P,SP WHERE P.pid = SP.pid AND (P.color = 'Red' OR P.color = 'Blue') AND SP.qty > 200
    GROUP BY P.pid, P.weight, P.color HAVING sum(SP.qty) > 350;

以及使用having条件的更复杂的从句，例如以下这种：

    -- show items in S grouped by city with count > 1
    SELECT count(*) AS c,sid FROM S GROUP BY city HAVING c>1;
     
    -- show items in S grouped by city with count > 1
    SELECT count(*),sid FROM S GROUP BY city HAVING count(*)>1;

聚合函数也可以与 Group By子句一起使用：

    -- show count of rows having the same 'city'
    SELECT city, COUNT(sid) "Count" FROM S GROUP BY city;
     
    -- show count of rows having the same 'status'
    SELECT status, COUNT(sid) "Count" FROM S GROUP BY status;
     
    -- use HAVING clause to show count of rows having the 'status' = 30
    SELECT status, COUNT(sid) "Count" FROM S GROUP BY status HAVING status = 30;

用法提示

请注意，Group By子句要求对未包含在 Group By列表中的目标值使用聚合函数，以便为每个分组值仅显示一行。或者，也可以将所有目标值都指定在 Group By列表中。

例如，假设我们定义并填充一个名为Cities的表，如下所示：

    create table Cities(city string, jid string);
    insert into Cities values('Paris','J1');
    insert into Cities values('Rome','J2');
    insert into Cities values('Athens','J3');
    insert into Cities values('Athens','J4');
    insert into Cities values('London','J5');
    insert into Cities values('Oslo','J6');
    insert into Cities values('London','J7');

现在，如果我们用 xSQL 查询该表，就会发现 Athens 和 London 有多个对象：

    XSQL>select * from Cities;
    city    jid
    ------------------------------------------------------------------------------
    Paris   J1
    Rome    J2
    Athens  J3
    Athens  J4
    London  J5
    Oslo    J6
    London  J7
     
    Selected records: 7

因此，如果我们尝试按 city 列进行分组，就会得到以下错误：

    XSQL>select * from Cities group by city;
    ERROR: Compiler error at position 25: No aggregates are specified for GROUP BY
    select * from Cities group by city

相反，我们需要使用聚合函数来减少可能具有多个值的列的结果数量。例如：

    XSQL>select city, max(jid) from Cities group by city;
    city    #2
    ------------------------------------------------------------------------------
    Athens  J4
    London  J7
    Paris   J1
    Rome    J2
    Oslo    J6
     
    Selected records: 5

或者，若要选择所有目标值，则必须在选择列表和分组列表中列出所有可能具有多个重复值的列。例如：

    XSQL>select city,jid from Cities group by city,jid;
    city    jid
    ------------------------------------------------------------------------------
    Athens  J3
    Athens  J4
    London  J5
    London  J7
    Oslo    J6
    Paris   J1
    Rome    J2
     
    Selected records: 7

基于用户自定义函数的分组操作

要在分组子句中使用用户定义函数（UDF），必须按照用户定义函数页面中所述进行定义和注册。以下示例代码演示了如何使用名为dateformat的 UDF 自定义格式化日期字段输出：

    const char * db_name = "groupbydb";
     
    // 定义全局 SQL 引擎
    using namespace McoSql;
    McoSqlEngine engine;
     
    // 定义与数据库表“Object”相对应的结构。
    struct _Contribution
    {
        char const * name;
        uint4        date_val;
        uint4        amount;
    };
     
    _Contribution contributions[] = 
    {
        { "Michael", 91224, 250 },
        { "Sharon", 91224, 50 },
        { "George", 100101, 100 },
        { "Sharon", 100101, 150 },
        { "Michael", 100321, 300 },
        { "Liz", 100321, 20 },
        { "Bob", 100322, 25 }
    };
     
    // 声明并实例化用户自定义的日期格式函数
    static String* dateformat( McoSql::Value* date)
    {
        char str[64];
        int date_val = (int)date->intValue();
        int yy = date_val / 10000;
        int mm = date_val / 100 % 100;
        int dd = date_val % 100;
         
        // 格式为 dd.mm.yyyy，假设所有日期都在 2000 年之后。
        sprintf( str, "%d.%d.20%02d", dd, mm, yy );
        return String::create( str );
    }
     
    static SqlFunctionDeclaration udf
    (
        tpString,          // tpInt is the return value of the UDF
        "dateformat",      // the name of the function as we’ll use it in a query
        (void*)dateformat, // the function pointer
        1                  // the number of arguments to the UDF
    );
     
    int main()
    {
        ...
        engine.open( db_name, groupbydb_get_dictionary(), DATABASE_SIZE, MEMORY_PAGE_SIZE);
     
        // 填充数据
        for ( int i=0; i < (int)(sizeof( contributions ) / sizeof( contributions[0] )); i++ ) 
        {
            addContribution( &contributions[i] );
        }
         
        // 显示所有对象
        queryContributions( "Select * from Contributions" );
         
        // 按贡献者显示小计
        showGroup1();
         
        // 使用用户自定义的日期格式按日期显示小计
        showGroup2();
        engine.close();
        sample_pause_end("\n\nPress any key to continue . . . ");
        return 0;
    }
     
    int addContribution( _Contribution* c)
    {
        int ret = engine.executeStatement("insert into Contributions %r", c);
        if ( 0 == ret )
        {
            printf("\t\tError inserting Contributions %s\n", c->name );
        }
        return ret;
    }
     
    int queryContributions( char * sql )
    {
        printf( "\n%s\n", sql);
        QueryResult result( engine.executeQuery( sql ) );
        int count = 0;
        Cursor* cursor = result->records();
        while ( cursor->hasNext() )
        {
            Record* rec = cursor->next();
            _Contribution c;
            result->extract( rec, &c, sizeof(c) );
            printContribution( count+1, c );
            count++;
        }
        return count;
    }
     
    void showGroup1()
    {
        sprintf( sql, "Select name, sum(amount) from Contributions group by name" );
        printf( "\n%s\n", sql );
        QueryResult result( engine.executeQuery( sql ) );
        Cursor* cursor = result->records();
        for (int i = 1; cursor->hasNext(); i++)
        {
            Record* rec = cursor->next();
            printf( "\n\t%d)\t%s\t%f", i, ((String*)rec->get(0))->body(), rec->get(1)->realValue() );
        }
    }
     
    void showGroup2()
    {
        sprintf( sql, "Select dateformat(date_val), sum(amount) from Contributions group by date_val" );
        printf( "\n\n%s\n", sql );
        QueryResult result( engine.executeQuery( sql ) );
        Cursor* cursor = result->records();
        for (int i = 1; cursor->hasNext(); i++)
        {
            Record* rec = cursor->next();
            printf( "\n\t%d)\t%s\t%f", i, ((String*)rec->get(0))->body(), rec->get(1)->realValue() );
        }
    }