Mercurial > hg > Members > anatofuz > monkey
changeset 0:72d22ea56795
imple simple lexer
author | anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 15 May 2020 10:12:16 +0900 |
parents | |
children | a1166907ac2d |
files | .gitignore go.mod lexer/lexer.go lexer/lexer_test.go token/token.go |
diffstat | 5 files changed, 239 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gitignore Fri May 15 10:12:16 2020 +0900 @@ -0,0 +1,27 @@ +syntax:glob + +# Created by https://www.gitignore.io/api/go +# Edit at https://www.gitignore.io/?templates=go + +### Go ### +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +### Go Patch ### +/vendor/ +/Godeps/ + +# End of https://www.gitignore.io/api/go
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/go.mod Fri May 15 10:12:16 2020 +0900 @@ -0,0 +1,3 @@ +module firefly/hg/Members/anatofuz/monkey + +go 1.14
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/lexer.go Fri May 15 10:12:16 2020 +0900 @@ -0,0 +1,109 @@ +package lexer + +import "firefly/hg/Members/anatofuz/monkey/token" + +// Lexer model +type Lexer struct { + input string + position int // current position in input (points to current char) + readPosition int // current reading position in input(after current char) + ch byte +} + +// New create Lexer instance and start lex +func New(input string) *Lexer { + l := &Lexer{input: input} + l.readChar() + return l +} + +func (l *Lexer) readChar() { + if l.readPosition >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPosition] + } + + l.position = l.readPosition + l.readPosition++ +} + +//NextToken is create token.Token after read from input +func (l *Lexer) NextToken() token.Token { + var tok token.Token + + l.skipWhitespace() + + switch l.ch { + case '=': + tok = newToken(token.ASSIGN, l.ch) + case ';': + tok = newToken(token.SEMICOLON, l.ch) + case '(': + tok = newToken(token.LPAREN, l.ch) + case ')': + tok = newToken(token.RPAREN, l.ch) + case ',': + tok = newToken(token.COMMA, l.ch) + case '+': + tok = newToken(token.PLUS, l.ch) + case '{': + tok = newToken(token.LBRACE, l.ch) + case '}': + tok = newToken(token.RBRACE, l.ch) + case 0: + tok.Literal = "" + tok.Type = token.EOF + + default: + if isLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = token.LookupIdent(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Type = token.INT + tok.Literal = l.readNumber() + return tok + } else { + tok = newToken(token.ILLEGAL, l.ch) + } + + } + + l.readChar() + return tok +} + +func newToken(tokenType token.TokenType, ch byte) token.Token { + return token.Token{Type: tokenType, Literal: string(ch)} +} + +func (l *Lexer) readIdentifier() string { + position := l.position + for isLetter(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} + +func (l *Lexer) readNumber() string { + positon := l.position + for isDigit(l.ch) { + l.readChar() + } + return l.input[positon:l.position] +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/lexer_test.go Fri May 15 10:12:16 2020 +0900 @@ -0,0 +1,57 @@ +package lexer + +import ( + "firefly/hg/Members/anatofuz/monkey/token" + "testing" +) + +func TestNextToken(t *testing.T) { + input := `let five = 5; + let ten = 10; + + let add = fn(x, y) { + x + y; + }; + + let result = add(five, ten); + ` + + tests := []struct { + expectedType token.TokenType + expectedLiteral string + }{ + {token.LET, "let"}, + {token.IDENT, "five"}, + {token.ASSIGN, "="}, + {token.INT, "5"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "ten"}, + {token.ASSIGN, "="}, + {token.INT, "10"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "add"}, + {token.ASSIGN, "="}, + {token.FUNCTION, "fn"}, + {token.LPAREN, "("}, + {token.IDENT, "x"}, + } + + l := New(input) + + for i, tt := range tests { + tok := l.NextToken() + + if tok.Type != tt.expectedType { + t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", + i, tt.expectedType, tok.Type) + } + + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", + i, tt.expectedLiteral, tok.Literal) + + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/token/token.go Fri May 15 10:12:16 2020 +0900 @@ -0,0 +1,43 @@ +package token + +const ( + ILLEGAL = "ILLEGAL" + EOF = "EOF" + + IDENT = "IDENT" + INT = "INT" + + ASSIGN = "=" + PLUS = "+" + + COMMA = "," + SEMICOLON = ";" + + LPAREN = "(" + RPAREN = "(" + LBRACE = "{" + RBRACE = "}" + + FUNCTION = "FUNCTION" + LET = "LET" +) + +type TokenType string + +type Token struct { + Type TokenType + Literal string +} + +var keywords = map[string]TokenType{ + "fn": FUNCTION, + "let": LET, +} + +//LookupIdent is jugde ident or defined keywords +func LookupIdent(ident string) TokenType { + if tok, ok := keywords[ident]; ok { + return tok + } + return IDENT +}