SQL参考
# SQL参考
本章提供StreamSQL支持的完整SQL语法参考,包括所有支持的子句、函数和操作符。
# 📋 SQL语法概览
StreamSQL支持标准SQL语法的子集,专门针对流处理进行了优化。
SELECT [DISTINCT] select_list
FROM stream
[WHERE condition]
[GROUP BY grouping_element [, ...]]
[HAVING condition]
[LIMIT count]
[WITH (option = value [, ...])]
1
2
3
4
5
6
7
2
3
4
5
6
7
StreamSQL支持标准SQL语法的子集,专门针对流处理场景进行了优化。本章提供完整的SQL语法参考。
# SQL语法概览
# 基本查询结构
SELECT [DISTINCT] select_list
FROM stream_name
[WHERE condition]
[GROUP BY grouping_list]
[HAVING condition]
[ORDER BY ordering_list]
[LIMIT number]
[WITH (option_list)]
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 支持的子句
子句 | 必需 | 说明 |
---|---|---|
SELECT | 是 | 指定输出字段 |
FROM | 是 | 指定数据源 |
WHERE | 否 | 过滤条件 |
GROUP BY | 否 | 分组和窗口 |
HAVING | 否 | 聚合结果过滤 |
ORDER BY | 否 | 排序(有限支持) |
LIMIT | 否 | 限制结果数量 |
WITH | 否 | 配置选项 |
# SELECT 子句
SELECT子句定义查询的输出字段和计算表达式。
# 基本语法
SELECT column1, column2, expression AS alias
FROM stream
1
2
2
# 支持的选择类型
# 1. 字段选择
-- 选择所有字段
SELECT * FROM stream
-- 选择特定字段
SELECT deviceId, temperature FROM stream
-- 字段别名
SELECT deviceId AS device, temperature AS temp FROM stream
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 2. 嵌套字段访问
-- 点号语法访问嵌套字段
SELECT device.info.name, device.location.building FROM stream
-- 深层嵌套
SELECT sensor.data.temperature.value FROM stream
1
2
3
4
5
2
3
4
5
# 3. 表达式计算
-- 算术表达式
SELECT temperature * 1.8 + 32 AS fahrenheit FROM stream
-- 字符串连接
SELECT CONCAT(deviceId, '-', location) AS full_id FROM stream
-- 条件表达式
SELECT CASE
WHEN temperature > 30 THEN 'hot'
WHEN temperature < 10 THEN 'cold'
ELSE 'normal'
END AS temp_level FROM stream
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
# 4. 聚合函数
-- 基础聚合
SELECT COUNT(*), AVG(temperature), MAX(humidity) FROM stream
-- 带分组的聚合
SELECT deviceId, AVG(temperature) FROM stream GROUP BY deviceId
1
2
3
4
5
2
3
4
5
# DISTINCT 去重
-- 去重查询
SELECT DISTINCT deviceType FROM stream
-- 多字段去重
SELECT DISTINCT deviceId, location FROM stream
1
2
3
4
5
2
3
4
5
# 基本语法
SELECT column1, column2, ...
SELECT expression AS alias
SELECT *
SELECT DISTINCT column1
1
2
3
4
2
3
4
# 字段选择
# 1. 直接字段引用
-- 选择特定字段
SELECT deviceId, temperature, humidity FROM stream
-- 选择所有字段
SELECT * FROM stream
1
2
3
4
5
2
3
4
5
# 2. 表达式计算
-- 算术表达式
SELECT deviceId, temperature * 1.8 + 32 as fahrenheit FROM stream
-- 字符串连接
SELECT CONCAT(deviceId, '_', status) as device_status FROM stream
-- 条件表达式
SELECT deviceId,
CASE
WHEN temperature > 30 THEN 'HIGH'
WHEN temperature > 20 THEN 'NORMAL'
ELSE 'LOW'
END as temp_level
FROM stream
1
2
3
4
5
6
7
8
9
10
11
12
13
14
2
3
4
5
6
7
8
9
10
11
12
13
14
# 3. 函数调用
-- 内置函数
SELECT deviceId, UPPER(status), ABS(temperature) FROM stream
-- 聚合函数
SELECT deviceId, AVG(temperature), COUNT(*) FROM stream
GROUP BY deviceId, TumblingWindow('1m')
-- 自定义函数
SELECT deviceId, custom_function(temperature) FROM stream
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 别名 (AS)
-- 字段别名
SELECT temperature AS temp, humidity AS hum FROM stream
-- 表达式别名
SELECT temperature * 1.8 + 32 AS fahrenheit FROM stream
-- AS关键字可省略
SELECT temperature temp, humidity hum FROM stream
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# DISTINCT
-- 去重(在窗口聚合中使用)
SELECT DISTINCT deviceId, location
FROM stream
GROUP BY TumblingWindow('1m')
1
2
3
4
2
3
4
# FROM 子句
FROM子句指定数据源,在StreamSQL中通常为stream
。
# 基本语法
SELECT * FROM stream
1
# 数据源类型
- stream: 默认的流数据源
- 未来版本可能支持多个命名流
-- 当前支持
SELECT * FROM stream
-- 未来可能支持
SELECT * FROM sensor_stream
SELECT * FROM device_stream
1
2
3
4
5
6
2
3
4
5
6
# 数据源指定
-- 标准数据源名称
FROM stream
-- 自定义数据源名称
FROM sensor_data
FROM device_stream
1
2
3
4
5
6
2
3
4
5
6
提示
FROM子句中的名称是逻辑概念,实际数据通过 AddData()
方法输入。
# WHERE 子句
WHERE子句用于过滤数据,只处理满足条件的记录。
# 基本语法
SELECT * FROM stream WHERE condition
1
# 支持的条件类型
# 1. 比较操作符
-- 数值比较
SELECT * FROM stream WHERE temperature > 25
SELECT * FROM stream WHERE humidity <= 60
SELECT * FROM stream WHERE pressure = 1013.25
SELECT * FROM stream WHERE voltage != 0
-- 字符串比较
SELECT * FROM stream WHERE deviceId = 'sensor001'
SELECT * FROM stream WHERE location != 'offline'
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 2. 逻辑操作符
-- AND 操作
SELECT * FROM stream WHERE temperature > 20 AND humidity < 80
-- OR 操作
SELECT * FROM stream WHERE deviceType = 'temperature' OR deviceType = 'humidity'
-- NOT 操作
SELECT * FROM stream WHERE NOT (temperature < 0)
-- 复合条件
SELECT * FROM stream WHERE (temperature > 30 OR humidity > 90) AND deviceId LIKE 'sensor%'
1
2
3
4
5
6
7
8
9
10
11
2
3
4
5
6
7
8
9
10
11
# 3. 范围操作
-- BETWEEN 范围
SELECT * FROM stream WHERE temperature BETWEEN 20 AND 30
-- IN 列表
SELECT * FROM stream WHERE deviceType IN ('temperature', 'humidity', 'pressure')
-- NOT IN
SELECT * FROM stream WHERE deviceId NOT IN ('test001', 'test002')
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 4. 模式匹配
-- LIKE 模式匹配
SELECT * FROM stream WHERE deviceId LIKE 'sensor%' -- 以sensor开头
SELECT * FROM stream WHERE location LIKE '%room%' -- 包含room
SELECT * FROM stream WHERE deviceId LIKE 'dev___' -- dev后跟3个字符
-- 字符串函数
SELECT * FROM stream WHERE STARTSWITH(deviceId, 'sensor')
SELECT * FROM stream WHERE ENDSWITH(location, 'floor')
SELECT * FROM stream WHERE CONTAINS(description, 'temperature')
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 5. NULL 检查
-- NULL 检查
SELECT * FROM stream WHERE temperature IS NOT NULL
SELECT * FROM stream WHERE error_msg IS NULL
-- 空字符串检查
SELECT * FROM stream WHERE deviceId != ''
SELECT * FROM stream WHERE TRIM(location) != ''
1
2
3
4
5
6
7
2
3
4
5
6
7
# 6. 嵌套字段条件
-- 嵌套字段过滤
SELECT * FROM stream WHERE device.info.status = 'active'
SELECT * FROM stream WHERE sensor.data.temperature > 25
SELECT * FROM stream WHERE config.settings.enabled = true
1
2
3
4
2
3
4
# 过滤条件
# 1. 比较操作
-- 数值比较
WHERE temperature > 25
WHERE humidity BETWEEN 30 AND 70
WHERE pressure != 1013.25
-- 字符串比较
WHERE deviceId = 'sensor001'
WHERE status IN ('active', 'online')
WHERE location LIKE 'building_%'
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 2. 逻辑操作
-- 逻辑组合
WHERE temperature > 25 AND humidity < 60
WHERE status = 'active' OR status = 'standby'
WHERE NOT (temperature < 0)
-- 空值检查
WHERE temperature IS NOT NULL
WHERE deviceId IS NULL
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 3. 复杂条件
-- 嵌套条件
WHERE (temperature > 30 AND humidity > 80)
OR (temperature < 0 AND pressure > 1020)
-- 函数条件
WHERE ABS(temperature - 25) > 5
WHERE LENGTH(deviceId) > 10
1
2
3
4
5
6
7
2
3
4
5
6
7
# 支持的操作符
操作符 | 说明 | 示例 |
---|---|---|
= | 等于 | temperature = 25 |
!= , <> | 不等于 | status != 'offline' |
> , >= | 大于、大于等于 | humidity > 50 |
< , <= | 小于、小于等于 | pressure <= 1000 |
BETWEEN | 范围 | temperature BETWEEN 20 AND 30 |
IN | 包含 | status IN ('on', 'off') |
LIKE | 模式匹配 | name LIKE 'sensor%' |
IS NULL | 空值检查 | value IS NULL |
AND | 逻辑与 | temp > 20 AND humid < 60 |
OR | 逻辑或 | status = 'on' OR status = 'ready' |
NOT | 逻辑非 | NOT (temperature < 0) |
# GROUP BY 子句
GROUP BY子句用于数据分组和窗口定义,是流处理中的核心功能。
# 基本语法
SELECT aggregate_function(column)
FROM stream
GROUP BY grouping_columns [, window_function]
1
2
3
2
3
# 分组类型
# 1. 字段分组
-- 单字段分�?
SELECT deviceId, COUNT(*) FROM stream GROUP BY deviceId
-- 多字段分�?
SELECT deviceId, location, AVG(temperature)
FROM stream
GROUP BY deviceId, location
-- 表达式分�?
SELECT HOUR(timestamp) as hour, COUNT(*)
FROM stream
GROUP BY HOUR(timestamp)
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
# 2. 窗口分组
# 滚动窗口 (Tumbling Window)
-- 时间滚动窗口
SELECT COUNT(*) FROM stream GROUP BY TumblingWindow('5s')
SELECT AVG(temperature) FROM stream GROUP BY TumblingWindow('1m')
SELECT SUM(value) FROM stream GROUP BY TumblingWindow('1h')
-- 带字段分组的滚动窗口
SELECT deviceId, AVG(temperature)
FROM stream
GROUP BY deviceId, TumblingWindow('5s')
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 滑动窗口 (Sliding Window)
-- 滑动窗口:窗口大小,滑动间隔
SELECT AVG(temperature) FROM stream GROUP BY SlidingWindow('30s', '10s')
SELECT MAX(pressure) FROM stream GROUP BY SlidingWindow('1m', '30s')
-- 带分组的滑动窗口
SELECT deviceId, AVG(temperature)
FROM stream
GROUP BY deviceId, SlidingWindow('30s', '10s')
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 计数窗口 (Counting Window)
-- 每N条记录触发一�?
SELECT COUNT(*) FROM stream GROUP BY CountingWindow(100)
SELECT AVG(value) FROM stream GROUP BY CountingWindow(50)
-- 带分组的计数窗口
SELECT deviceId, COUNT(*)
FROM stream
GROUP BY deviceId, CountingWindow(10)
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 会话窗口 (Session Window)
-- 会话超时窗口
SELECT userId, COUNT(*) FROM stream GROUP BY userId, SessionWindow('5m')
SELECT deviceId, SUM(events) FROM stream GROUP BY deviceId, SessionWindow('30s')
1
2
3
2
3
# 窗口函数
在GROUP BY中可以使用窗口相关的函数�?
SELECT deviceId,
COUNT(*) as event_count,
window_start() as start_time,
window_end() as end_time,
window_duration() as duration
FROM stream
GROUP BY deviceId, TumblingWindow('5s')
1
2
3
4
5
6
7
2
3
4
5
6
7
# 分组字段
-- 单字段分�?
GROUP BY deviceId
-- 多字段分�?
GROUP BY deviceId, location, status
-- 表达式分�?
GROUP BY FLOOR(temperature / 10) * 10 -- 按温度区间分�?
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 窗口函数
# 1. 滚动窗口 (TumblingWindow)
-- 基本语法
GROUP BY deviceId, TumblingWindow('5m')
-- 支持的时间单位
GROUP BY TumblingWindow('30s') -- 30秒
GROUP BY TumblingWindow('5m') -- 5分钟
GROUP BY TumblingWindow('1h') -- 1小时
GROUP BY TumblingWindow('1d') -- 1天
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 2. 滑动窗口 (SlidingWindow)
-- 基本语法: SlidingWindow(窗口大小, 滑动间隔)
GROUP BY deviceId, SlidingWindow('10m', '2m')
-- 示例
GROUP BY SlidingWindow('1h', '15m') -- 1小时窗口,每15分钟滑动
GROUP BY SlidingWindow('30s', '5s') -- 30秒窗口,每5秒滑动
1
2
3
4
5
6
2
3
4
5
6
# 3. 计数窗口 (CountingWindow)
-- 基本语法
GROUP BY deviceId, CountingWindow(100) -- 每100条数据
-- 示例
GROUP BY CountingWindow(50) -- 每50条数据触发
GROUP BY CountingWindow(1000) -- 每1000条数据触发
1
2
3
4
5
6
2
3
4
5
6
# 4. 会话窗口 (SessionWindow)
-- 基本语法
GROUP BY user_id, SessionWindow('5m') -- 5分钟超时
-- 示例
GROUP BY device_id, SessionWindow('30s') -- 30秒无数据则关闭会话
GROUP BY session_key, SessionWindow('10m') -- 10分钟会话超时
1
2
3
4
5
6
2
3
4
5
6
# HAVING 子句
HAVING子句用于过滤聚合结果,类似于WHERE但作用于GROUP BY之后。
# 基本语法
SELECT aggregate_function(column)
FROM stream
GROUP BY grouping_columns
HAVING aggregate_condition
1
2
3
4
2
3
4
# 使用示例
# 1. 聚合函数过滤
-- 过滤平均温度
SELECT deviceId, AVG(temperature) as avg_temp
FROM stream
GROUP BY deviceId, TumblingWindow('5s')
HAVING AVG(temperature) > 25
-- 过滤计数
SELECT location, COUNT(*) as event_count
FROM stream
GROUP BY location, TumblingWindow('1m')
HAVING COUNT(*) >= 10
-- 多个聚合条件
SELECT deviceId, AVG(temperature), MAX(humidity)
FROM stream
GROUP BY deviceId, TumblingWindow('5s')
HAVING AVG(temperature) > 20 AND MAX(humidity) < 80
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 2. 复合条件
-- 复杂的HAVING条件
SELECT deviceType, COUNT(*) as count, AVG(value) as avg_value
FROM stream
GROUP BY deviceType, TumblingWindow('1m')
HAVING COUNT(*) > 5 AND (AVG(value) > 100 OR MAX(value) > 500)
1
2
3
4
5
2
3
4
5
# WHERE vs HAVING
-- 正确:WHERE过滤原始数据,HAVING过滤聚合结果
SELECT deviceId, AVG(temperature)
FROM stream
WHERE temperature > 0 -- 过滤原始数据
GROUP BY deviceId, TumblingWindow('5s')
HAVING AVG(temperature) > 25 -- 过滤聚合结果
-- 错误:在HAVING中过滤原始字段
SELECT deviceId, AVG(temperature)
FROM stream
GROUP BY deviceId, TumblingWindow('5s')
HAVING deviceId = 'sensor001' -- 应该在WHERE中
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
用于过滤聚合结果。
-- 基本用法
SELECT deviceId, AVG(temperature) as avg_temp
FROM stream
GROUP BY deviceId, TumblingWindow('5m')
HAVING avg_temp > 25
-- 复杂条件
SELECT deviceId, COUNT(*) as count, AVG(temperature) as avg_temp
FROM stream
GROUP BY deviceId, TumblingWindow('5m')
HAVING count > 10 AND avg_temp BETWEEN 20 AND 30
-- 使用聚合函数
SELECT location, AVG(temperature) as avg_temp
FROM stream
GROUP BY location, TumblingWindow('10m')
HAVING AVG(temperature) > 25 AND COUNT(*) >= 5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# ORDER BY 子句
注意
ORDER BY在流处理中支持有限,主要用于窗口结果排序。
-- 按聚合结果排序(在窗口内)
SELECT deviceId, AVG(temperature) as avg_temp
FROM stream
GROUP BY deviceId, TumblingWindow('5m')
ORDER BY avg_temp DESC
-- 多字段排序
ORDER BY avg_temp DESC, deviceId ASC
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# LIMIT 子句
LIMIT子句限制查询结果的数量,在流处理中通常用于限制窗口输出的记录数。
# 基本语法
SELECT columns FROM stream [WHERE condition] LIMIT count
1
# 使用示例
-- 限制结果数量
SELECT * FROM stream LIMIT 100
-- 与窗口结合使用
SELECT deviceId, temperature
FROM stream
GROUP BY TumblingWindow('5s')
LIMIT 10
-- 获取最新的N条记录
SELECT deviceId, temperature, timestamp
FROM stream
WHERE deviceId = 'sensor001'
ORDER BY timestamp DESC
LIMIT 5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 注意事项
- LIMIT在流处理中主要用于控制输出量
- 对于聚合查询,LIMIT限制的是聚合结果的数量
- 建议合理设置LIMIT以避免内存压力
限制输出结果数量。
-- 基本用法
SELECT deviceId, temperature FROM stream LIMIT 10
-- 与窗口结合
SELECT deviceId, AVG(temperature) as avg_temp
FROM stream
GROUP BY deviceId, TumblingWindow('5m')
LIMIT 5 -- 每个窗口最多5个结果
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# WITH 子句
WITH子句用于指定查询的配置选项,如时间戳字段、时间单位等。
# 基本语法
SELECT columns FROM stream
[WHERE condition]
[GROUP BY grouping]
WITH (option = value [, ...])
1
2
3
4
2
3
4
# 支持的选项
# 1. 时间戳配置
-- 指定时间戳字段
SELECT AVG(temperature) FROM stream
GROUP BY TumblingWindow('5s')
WITH (TIMESTAMP = 'event_time')
-- 指定时间单位
SELECT COUNT(*) FROM stream
GROUP BY TumblingWindow('5000')
WITH (TIMEUNIT = 'ms', TIMESTAMP = 'timestamp')
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 2. 时间单位选项
-- 支持的时间单位
WITH (TIMEUNIT = 'ms') -- 毫秒
WITH (TIMEUNIT = 'ss') -- 秒(默认)
WITH (TIMEUNIT = 'mi') -- 分钟
WITH (TIMEUNIT = 'hh') -- 小时
WITH (TIMEUNIT = 'dd') -- 天
1
2
3
4
5
6
2
3
4
5
6
# 3. 完整示例
-- 使用事件时间的窗口聚合
SELECT deviceId,
AVG(temperature) as avg_temp,
window_start() as start_time
FROM stream
WHERE temperature > 0
GROUP BY deviceId, TumblingWindow('300000') -- 5分钟 = 300000毫秒
HAVING AVG(temperature) > 20
WITH (TIMESTAMP = 'event_timestamp', TIMEUNIT = 'ms')
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
配置查询选项。
-- 时间戳字段配置
WITH (TIMESTAMP='event_time')
-- 时间单位配置
WITH (TIMEUNIT='ss') -- 秒
WITH (TIMEUNIT='ms') -- 毫秒
WITH (TIMEUNIT='mi') -- 分钟
WITH (TIMEUNIT='hh') -- 小时
-- 组合配置
WITH (TIMESTAMP='ts', TIMEUNIT='ms')
1
2
3
4
5
6
7
8
9
10
11
2
3
4
5
6
7
8
9
10
11
# 支持的配置项
配置项 | 类型 | 说明 | 默认值 |
---|---|---|---|
TIMESTAMP | string | 时间戳字段名 | 系统时间 |
TIMEUNIT | string | 时间单位 | 'ss' (秒) |
# 数据类型和常量
# 数值常量
SELECT 42, 3.14, -10.5, 1e6 FROM stream
1
# 字符串常量
SELECT 'hello', "world", 'it''s ok' FROM stream
1
# 布尔常量
SELECT true, false, temperature > 25 FROM stream
1
# NULL值
SELECT NULL, temperature IS NULL FROM stream
1
# 表达式和操作符
# 算术操作符
操作符 | 说明 | 示例 |
---|---|---|
+ | 加法 | temperature + 10 |
- | 减法 | temperature - 5 |
* | 乘法 | temperature * 1.8 |
/ | 除法 | total / count |
% | 取模 | id % 10 |
# 字符串操作
-- 字符串连接
SELECT deviceId + '_' + status FROM stream
SELECT CONCAT(deviceId, '_', status) FROM stream
-- 字符串函数
SELECT UPPER(status), LOWER(location), LENGTH(deviceId) FROM stream
1
2
3
4
5
6
2
3
4
5
6
# 条件表达式
# CASE表达式
-- 简单CASE
SELECT deviceId,
CASE status
WHEN 'active' THEN 1
WHEN 'inactive' THEN 0
ELSE -1
END as status_code
FROM stream
-- 搜索CASE
SELECT deviceId,
CASE
WHEN temperature > 30 THEN 'HOT'
WHEN temperature > 20 THEN 'WARM'
WHEN temperature > 10 THEN 'COOL'
ELSE 'COLD'
END as temp_category
FROM stream
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 内置函数
# 聚合函数
函数 | 说明 | 示例 |
---|---|---|
COUNT(*) | 计数 | COUNT(*) |
SUM(expr) | 求和 | SUM(temperature) |
AVG(expr) | 平均值 | AVG(temperature) |
MIN(expr) | 最小值 | MIN(temperature) |
MAX(expr) | 最大值 | MAX(temperature) |
STDDEV(expr) | 标准差 | STDDEV(temperature) |
MEDIAN(expr) | 中位数 | MEDIAN(temperature) |
# 数学函数
函数 | 说明 | 示例 |
---|---|---|
ABS(x) | 绝对值 | ABS(temperature) |
ROUND(x, d) | 四舍五入 | ROUND(temperature, 2) |
FLOOR(x) | 向下取整 | FLOOR(temperature) |
CEIL(x) | 向上取整 | CEIL(temperature) |
SQRT(x) | 平方根 | SQRT(area) |
POWER(x, y) | 幂运算 | POWER(distance, 2) |
# 字符串函数
函数 | 说明 | 示例 |
---|---|---|
CONCAT(s1, s2, ...) | 字符串连接 | CONCAT(first, '_', last) |
UPPER(s) | 转大写 | UPPER(status) |
LOWER(s) | 转小写 | LOWER(deviceId) |
LENGTH(s) | 字符串长度 | LENGTH(message) |
SUBSTRING(s, start, len) | 子字符串 | SUBSTRING(deviceId, 1, 5) |
TRIM(s) | 去除空白 | TRIM(name) |
# 时间函数
函数 | 说明 | 示例 |
---|---|---|
window_start() | 窗口开始时间 | window_start() |
window_end() | 窗口结束时间 | window_end() |
NOW() | 当前时间 | NOW() |
# 类型转换函数
函数 | 说明 | 示例 |
---|---|---|
CAST(expr AS type) | 类型转换 | CAST(temperature AS STRING) |
# 完整示例
# 1. 基础查询
-- 简单过滤
SELECT deviceId, temperature, status
FROM stream
WHERE temperature > 25 AND status = 'active'
1
2
3
4
2
3
4
# 2. 聚合分析
-- 设备温度统计
SELECT deviceId,
COUNT(*) as sample_count,
AVG(temperature) as avg_temp,
MIN(temperature) as min_temp,
MAX(temperature) as max_temp,
STDDEV(temperature) as temp_stddev
FROM stream
WHERE temperature IS NOT NULL
GROUP BY deviceId, TumblingWindow('5m')
HAVING sample_count >= 10
1
2
3
4
5
6
7
8
9
10
11
2
3
4
5
6
7
8
9
10
11
# 3. 复杂表达式
-- 温度异常检测
SELECT deviceId,
temperature,
ABS(temperature - AVG(temperature)) as deviation,
CASE
WHEN ABS(temperature - AVG(temperature)) > 2 * STDDEV(temperature)
THEN 'ANOMALY'
ELSE 'NORMAL'
END as anomaly_status
FROM stream
GROUP BY deviceId, SlidingWindow('10m', '1m')
1
2
3
4
5
6
7
8
9
10
11
2
3
4
5
6
7
8
9
10
11
# 4. 多窗口分析
-- 多层级时间分析
SELECT deviceId,
'1m' as window_type,
AVG(temperature) as avg_temp,
window_start() as start_time
FROM stream
GROUP BY deviceId, TumblingWindow('1m')
UNION ALL
SELECT deviceId,
'5m' as window_type,
AVG(temperature) as avg_temp,
window_start() as start_time
FROM stream
GROUP BY deviceId, TumblingWindow('5m')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
注意
上述UNION ALL示例仅为语法展示,StreamSQL当前不支持UNION操作。
# 语法限制
# 不支持的特性
JOIN
操作(多表连接)UNION
操作(结果合并)子查询
(嵌套SELECT)INSERT/UPDATE/DELETE
(数据修改)CREATE TABLE
(表定义)视图
(VIEW)存储过程
(PROCEDURE)触发器
(TRIGGER)
# 限制说明
- 单数据源:只支持单个数据流处理
- 无持久化:不支持数据持久化存储
- 无事务:不支持事务操作
- 内存限制:受限于单机内存容量
# 性能优化建议
# 1. WHERE子句优化
-- 好的实践:早期过滤
SELECT deviceId, AVG(temperature)
FROM stream
WHERE temperature BETWEEN 0 AND 100 -- 先过滤异常数据
GROUP BY deviceId, TumblingWindow('5m')
-- 避免:复杂WHERE条件
WHERE UPPER(CONCAT(deviceId, status)) LIKE '%ACTIVE%'
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 2. 合理使用窗口
-- 好的实践:根据需求选择合适窗口
GROUP BY TumblingWindow('1m') -- 需要精确周期统计
GROUP BY SlidingWindow('5m', '1m') -- 需要平滑分析
-- 避免:过小的窗口间隔
GROUP BY SlidingWindow('1h', '1s') -- 计算开销巨大
1
2
3
4
5
6
2
3
4
5
6
# 3. 表达式优化
-- 好的实践:简单表达式
SELECT temperature * 1.8 + 32 as fahrenheit
-- 避免:复杂嵌套表达式
SELECT POWER(SQRT(ABS(temperature - AVG(temperature))), 2)
1
2
3
4
5
2
3
4
5
在 GitHub 上编辑此页 (opens new window)
上次更新: 2025/07/27, 15:17:27