oxide_sql_core/lexer/
token.rs

1//! Token types for the SQL lexer.
2
3use super::Span;
4
5/// SQL keywords.
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum Keyword {
8    // Data Query Language (DQL)
9    Select,
10    From,
11    Where,
12    Order,
13    By,
14    Group,
15    Having,
16    Limit,
17    Offset,
18    Distinct,
19    All,
20
21    // Joins
22    Join,
23    Inner,
24    Left,
25    Right,
26    Full,
27    Outer,
28    Cross,
29    On,
30    Using,
31
32    // Set operations
33    Union,
34    Intersect,
35    Except,
36
37    // Data Manipulation Language (DML)
38    Insert,
39    Into,
40    Values,
41    Update,
42    Set,
43    Delete,
44
45    // Data Definition Language (DDL)
46    Create,
47    Drop,
48    Alter,
49    Table,
50    Index,
51    View,
52    Database,
53    Schema,
54    Trigger,
55
56    // Constraints
57    Primary,
58    Key,
59    Foreign,
60    References,
61    Unique,
62    Check,
63    Default,
64    Constraint,
65    Cascade,
66    Restrict,
67
68    // Logical operators
69    And,
70    Or,
71    Not,
72    In,
73    Between,
74    Like,
75    Is,
76    Null,
77    True,
78    False,
79    Exists,
80
81    // Ordering
82    Asc,
83    Desc,
84    Nulls,
85    First,
86    Last,
87
88    // Aggregates
89    Count,
90    Sum,
91    Avg,
92    Min,
93    Max,
94
95    // Data types
96    Int,
97    Integer,
98    Smallint,
99    Bigint,
100    Real,
101    Double,
102    Float,
103    Decimal,
104    Numeric,
105    Char,
106    Varchar,
107    Text,
108    Blob,
109    Boolean,
110    Date,
111    Time,
112    Timestamp,
113    Datetime,
114
115    // SQLite specific
116    Autoincrement,
117    If,
118    Temporary,
119    Temp,
120    Conflict,
121    Replace,
122    Abort,
123    Rollback,
124    Fail,
125    Ignore,
126
127    // Common clauses
128    As,
129    Case,
130    When,
131    Then,
132    Else,
133    End,
134    Cast,
135    Coalesce,
136    Nullif,
137
138    // Transaction
139    Begin,
140    Commit,
141    Transaction,
142
143    // Misc
144    With,
145    Recursive,
146    Over,
147    Partition,
148    Window,
149    Rows,
150    Range,
151    Unbounded,
152    Preceding,
153    Following,
154    Current,
155    Row,
156}
157
158impl Keyword {
159    /// Attempts to parse a keyword from a string (case-insensitive).
160    #[must_use]
161    #[allow(clippy::should_implement_trait)]
162    pub fn from_str(s: &str) -> Option<Self> {
163        // Convert to uppercase for comparison
164        match s.to_ascii_uppercase().as_str() {
165            "SELECT" => Some(Self::Select),
166            "FROM" => Some(Self::From),
167            "WHERE" => Some(Self::Where),
168            "ORDER" => Some(Self::Order),
169            "BY" => Some(Self::By),
170            "GROUP" => Some(Self::Group),
171            "HAVING" => Some(Self::Having),
172            "LIMIT" => Some(Self::Limit),
173            "OFFSET" => Some(Self::Offset),
174            "DISTINCT" => Some(Self::Distinct),
175            "ALL" => Some(Self::All),
176            "JOIN" => Some(Self::Join),
177            "INNER" => Some(Self::Inner),
178            "LEFT" => Some(Self::Left),
179            "RIGHT" => Some(Self::Right),
180            "FULL" => Some(Self::Full),
181            "OUTER" => Some(Self::Outer),
182            "CROSS" => Some(Self::Cross),
183            "ON" => Some(Self::On),
184            "USING" => Some(Self::Using),
185            "UNION" => Some(Self::Union),
186            "INTERSECT" => Some(Self::Intersect),
187            "EXCEPT" => Some(Self::Except),
188            "INSERT" => Some(Self::Insert),
189            "INTO" => Some(Self::Into),
190            "VALUES" => Some(Self::Values),
191            "UPDATE" => Some(Self::Update),
192            "SET" => Some(Self::Set),
193            "DELETE" => Some(Self::Delete),
194            "CREATE" => Some(Self::Create),
195            "DROP" => Some(Self::Drop),
196            "ALTER" => Some(Self::Alter),
197            "TABLE" => Some(Self::Table),
198            "INDEX" => Some(Self::Index),
199            "VIEW" => Some(Self::View),
200            "DATABASE" => Some(Self::Database),
201            "SCHEMA" => Some(Self::Schema),
202            "TRIGGER" => Some(Self::Trigger),
203            "PRIMARY" => Some(Self::Primary),
204            "KEY" => Some(Self::Key),
205            "FOREIGN" => Some(Self::Foreign),
206            "REFERENCES" => Some(Self::References),
207            "UNIQUE" => Some(Self::Unique),
208            "CHECK" => Some(Self::Check),
209            "DEFAULT" => Some(Self::Default),
210            "CONSTRAINT" => Some(Self::Constraint),
211            "CASCADE" => Some(Self::Cascade),
212            "RESTRICT" => Some(Self::Restrict),
213            "AND" => Some(Self::And),
214            "OR" => Some(Self::Or),
215            "NOT" => Some(Self::Not),
216            "IN" => Some(Self::In),
217            "BETWEEN" => Some(Self::Between),
218            "LIKE" => Some(Self::Like),
219            "IS" => Some(Self::Is),
220            "NULL" => Some(Self::Null),
221            "TRUE" => Some(Self::True),
222            "FALSE" => Some(Self::False),
223            "EXISTS" => Some(Self::Exists),
224            "ASC" => Some(Self::Asc),
225            "DESC" => Some(Self::Desc),
226            "NULLS" => Some(Self::Nulls),
227            "FIRST" => Some(Self::First),
228            "LAST" => Some(Self::Last),
229            "COUNT" => Some(Self::Count),
230            "SUM" => Some(Self::Sum),
231            "AVG" => Some(Self::Avg),
232            "MIN" => Some(Self::Min),
233            "MAX" => Some(Self::Max),
234            "INT" => Some(Self::Int),
235            "INTEGER" => Some(Self::Integer),
236            "SMALLINT" => Some(Self::Smallint),
237            "BIGINT" => Some(Self::Bigint),
238            "REAL" => Some(Self::Real),
239            "DOUBLE" => Some(Self::Double),
240            "FLOAT" => Some(Self::Float),
241            "DECIMAL" => Some(Self::Decimal),
242            "NUMERIC" => Some(Self::Numeric),
243            "CHAR" => Some(Self::Char),
244            "VARCHAR" => Some(Self::Varchar),
245            "TEXT" => Some(Self::Text),
246            "BLOB" => Some(Self::Blob),
247            "BOOLEAN" => Some(Self::Boolean),
248            "DATE" => Some(Self::Date),
249            "TIME" => Some(Self::Time),
250            "TIMESTAMP" => Some(Self::Timestamp),
251            "DATETIME" => Some(Self::Datetime),
252            "AUTOINCREMENT" => Some(Self::Autoincrement),
253            "IF" => Some(Self::If),
254            "TEMPORARY" => Some(Self::Temporary),
255            "TEMP" => Some(Self::Temp),
256            "CONFLICT" => Some(Self::Conflict),
257            "REPLACE" => Some(Self::Replace),
258            "ABORT" => Some(Self::Abort),
259            "ROLLBACK" => Some(Self::Rollback),
260            "FAIL" => Some(Self::Fail),
261            "IGNORE" => Some(Self::Ignore),
262            "AS" => Some(Self::As),
263            "CASE" => Some(Self::Case),
264            "WHEN" => Some(Self::When),
265            "THEN" => Some(Self::Then),
266            "ELSE" => Some(Self::Else),
267            "END" => Some(Self::End),
268            "CAST" => Some(Self::Cast),
269            "COALESCE" => Some(Self::Coalesce),
270            "NULLIF" => Some(Self::Nullif),
271            "BEGIN" => Some(Self::Begin),
272            "COMMIT" => Some(Self::Commit),
273            "TRANSACTION" => Some(Self::Transaction),
274            "WITH" => Some(Self::With),
275            "RECURSIVE" => Some(Self::Recursive),
276            "OVER" => Some(Self::Over),
277            "PARTITION" => Some(Self::Partition),
278            "WINDOW" => Some(Self::Window),
279            "ROWS" => Some(Self::Rows),
280            "RANGE" => Some(Self::Range),
281            "UNBOUNDED" => Some(Self::Unbounded),
282            "PRECEDING" => Some(Self::Preceding),
283            "FOLLOWING" => Some(Self::Following),
284            "CURRENT" => Some(Self::Current),
285            "ROW" => Some(Self::Row),
286            _ => None,
287        }
288    }
289
290    /// Returns the keyword as a string.
291    #[must_use]
292    pub const fn as_str(&self) -> &'static str {
293        match self {
294            Self::Select => "SELECT",
295            Self::From => "FROM",
296            Self::Where => "WHERE",
297            Self::Order => "ORDER",
298            Self::By => "BY",
299            Self::Group => "GROUP",
300            Self::Having => "HAVING",
301            Self::Limit => "LIMIT",
302            Self::Offset => "OFFSET",
303            Self::Distinct => "DISTINCT",
304            Self::All => "ALL",
305            Self::Join => "JOIN",
306            Self::Inner => "INNER",
307            Self::Left => "LEFT",
308            Self::Right => "RIGHT",
309            Self::Full => "FULL",
310            Self::Outer => "OUTER",
311            Self::Cross => "CROSS",
312            Self::On => "ON",
313            Self::Using => "USING",
314            Self::Union => "UNION",
315            Self::Intersect => "INTERSECT",
316            Self::Except => "EXCEPT",
317            Self::Insert => "INSERT",
318            Self::Into => "INTO",
319            Self::Values => "VALUES",
320            Self::Update => "UPDATE",
321            Self::Set => "SET",
322            Self::Delete => "DELETE",
323            Self::Create => "CREATE",
324            Self::Drop => "DROP",
325            Self::Alter => "ALTER",
326            Self::Table => "TABLE",
327            Self::Index => "INDEX",
328            Self::View => "VIEW",
329            Self::Database => "DATABASE",
330            Self::Schema => "SCHEMA",
331            Self::Trigger => "TRIGGER",
332            Self::Primary => "PRIMARY",
333            Self::Key => "KEY",
334            Self::Foreign => "FOREIGN",
335            Self::References => "REFERENCES",
336            Self::Unique => "UNIQUE",
337            Self::Check => "CHECK",
338            Self::Default => "DEFAULT",
339            Self::Constraint => "CONSTRAINT",
340            Self::Cascade => "CASCADE",
341            Self::Restrict => "RESTRICT",
342            Self::And => "AND",
343            Self::Or => "OR",
344            Self::Not => "NOT",
345            Self::In => "IN",
346            Self::Between => "BETWEEN",
347            Self::Like => "LIKE",
348            Self::Is => "IS",
349            Self::Null => "NULL",
350            Self::True => "TRUE",
351            Self::False => "FALSE",
352            Self::Exists => "EXISTS",
353            Self::Asc => "ASC",
354            Self::Desc => "DESC",
355            Self::Nulls => "NULLS",
356            Self::First => "FIRST",
357            Self::Last => "LAST",
358            Self::Count => "COUNT",
359            Self::Sum => "SUM",
360            Self::Avg => "AVG",
361            Self::Min => "MIN",
362            Self::Max => "MAX",
363            Self::Int => "INT",
364            Self::Integer => "INTEGER",
365            Self::Smallint => "SMALLINT",
366            Self::Bigint => "BIGINT",
367            Self::Real => "REAL",
368            Self::Double => "DOUBLE",
369            Self::Float => "FLOAT",
370            Self::Decimal => "DECIMAL",
371            Self::Numeric => "NUMERIC",
372            Self::Char => "CHAR",
373            Self::Varchar => "VARCHAR",
374            Self::Text => "TEXT",
375            Self::Blob => "BLOB",
376            Self::Boolean => "BOOLEAN",
377            Self::Date => "DATE",
378            Self::Time => "TIME",
379            Self::Timestamp => "TIMESTAMP",
380            Self::Datetime => "DATETIME",
381            Self::Autoincrement => "AUTOINCREMENT",
382            Self::If => "IF",
383            Self::Temporary => "TEMPORARY",
384            Self::Temp => "TEMP",
385            Self::Conflict => "CONFLICT",
386            Self::Replace => "REPLACE",
387            Self::Abort => "ABORT",
388            Self::Rollback => "ROLLBACK",
389            Self::Fail => "FAIL",
390            Self::Ignore => "IGNORE",
391            Self::As => "AS",
392            Self::Case => "CASE",
393            Self::When => "WHEN",
394            Self::Then => "THEN",
395            Self::Else => "ELSE",
396            Self::End => "END",
397            Self::Cast => "CAST",
398            Self::Coalesce => "COALESCE",
399            Self::Nullif => "NULLIF",
400            Self::Begin => "BEGIN",
401            Self::Commit => "COMMIT",
402            Self::Transaction => "TRANSACTION",
403            Self::With => "WITH",
404            Self::Recursive => "RECURSIVE",
405            Self::Over => "OVER",
406            Self::Partition => "PARTITION",
407            Self::Window => "WINDOW",
408            Self::Rows => "ROWS",
409            Self::Range => "RANGE",
410            Self::Unbounded => "UNBOUNDED",
411            Self::Preceding => "PRECEDING",
412            Self::Following => "FOLLOWING",
413            Self::Current => "CURRENT",
414            Self::Row => "ROW",
415        }
416    }
417}
418
419/// The kind of token.
420#[derive(Debug, Clone, PartialEq)]
421pub enum TokenKind {
422    // Literals
423    /// Integer literal (e.g., 42)
424    Integer(i64),
425    /// Float literal (e.g., 3.14)
426    Float(f64),
427    /// String literal (e.g., 'hello')
428    String(String),
429    /// Blob literal (e.g., X'1234')
430    Blob(Vec<u8>),
431
432    // Identifiers and keywords
433    /// Identifier (e.g., column_name)
434    Identifier(String),
435    /// SQL keyword
436    Keyword(Keyword),
437
438    // Operators
439    /// +
440    Plus,
441    /// -
442    Minus,
443    /// *
444    Star,
445    /// /
446    Slash,
447    /// %
448    Percent,
449    /// =
450    Eq,
451    /// != or <>
452    NotEq,
453    /// <
454    Lt,
455    /// <=
456    LtEq,
457    /// >
458    Gt,
459    /// >=
460    GtEq,
461    /// ||
462    Concat,
463    /// &
464    BitAnd,
465    /// |
466    BitOr,
467    /// ~
468    BitNot,
469    /// <<
470    LeftShift,
471    /// >>
472    RightShift,
473
474    // Delimiters
475    /// (
476    LeftParen,
477    /// )
478    RightParen,
479    /// [
480    LeftBracket,
481    /// ]
482    RightBracket,
483    /// ,
484    Comma,
485    /// ;
486    Semicolon,
487    /// .
488    Dot,
489    /// :
490    Colon,
491    /// ::
492    DoubleColon,
493    /// ?
494    Question,
495    /// @
496    At,
497
498    // Special
499    /// End of input
500    Eof,
501    /// Invalid/unknown token
502    Error(String),
503}
504
505/// A token with its span in the source code.
506#[derive(Debug, Clone, PartialEq)]
507pub struct Token {
508    /// The kind of token.
509    pub kind: TokenKind,
510    /// The location in the source code.
511    pub span: Span,
512}
513
514impl Token {
515    /// Creates a new token.
516    #[must_use]
517    pub const fn new(kind: TokenKind, span: Span) -> Self {
518        Self { kind, span }
519    }
520
521    /// Returns true if this is an EOF token.
522    #[must_use]
523    pub const fn is_eof(&self) -> bool {
524        matches!(self.kind, TokenKind::Eof)
525    }
526
527    /// Returns true if this is a keyword.
528    #[must_use]
529    pub const fn is_keyword(&self) -> bool {
530        matches!(self.kind, TokenKind::Keyword(_))
531    }
532
533    /// Returns the keyword if this is a keyword token.
534    #[must_use]
535    pub const fn as_keyword(&self) -> Option<Keyword> {
536        match &self.kind {
537            TokenKind::Keyword(kw) => Some(*kw),
538            _ => None,
539        }
540    }
541}
542
543#[cfg(test)]
544mod tests {
545    use super::*;
546
547    #[test]
548    fn test_keyword_from_str() {
549        assert_eq!(Keyword::from_str("SELECT"), Some(Keyword::Select));
550        assert_eq!(Keyword::from_str("select"), Some(Keyword::Select));
551        assert_eq!(Keyword::from_str("SeLeCt"), Some(Keyword::Select));
552        assert_eq!(Keyword::from_str("not_a_keyword"), None);
553    }
554
555    #[test]
556    fn test_keyword_as_str() {
557        assert_eq!(Keyword::Select.as_str(), "SELECT");
558        assert_eq!(Keyword::From.as_str(), "FROM");
559        assert_eq!(Keyword::Where.as_str(), "WHERE");
560    }
561
562    #[test]
563    fn test_token_is_eof() {
564        let eof = Token::new(TokenKind::Eof, Span::new(0, 0));
565        let select = Token::new(TokenKind::Keyword(Keyword::Select), Span::new(0, 6));
566        assert!(eof.is_eof());
567        assert!(!select.is_eof());
568    }
569
570    #[test]
571    fn test_token_as_keyword() {
572        let select = Token::new(TokenKind::Keyword(Keyword::Select), Span::new(0, 6));
573        let plus = Token::new(TokenKind::Plus, Span::new(0, 1));
574        assert_eq!(select.as_keyword(), Some(Keyword::Select));
575        assert_eq!(plus.as_keyword(), None);
576    }
577}