Skip to content

How would I separate statements? #111

@ghost

Description

So I'm making this markup language that compiles to HTML, it's called bml.

But i'm having trouble separating statements.

This is its syntax:

Statements:
in HTML:
<!DOCTYPE HTML>
in BML:
{ "DOCTYPE" "HTML" };

Declarations:
in HTML:
<h1>Hello, World1</h1>
in BML:
"h1" { "Hello, World!" };

And in the lexer and parser classes:

Lexer:

from rply import LexerGenerator


class BMLLexer():
    def __init__(self):
        self.__lexer = LexerGenerator()

    def __add_tokens(self):
        # Statement definitions
        self.__lexer.add('OPEN_STATEMENT', r'\{')
        self.__lexer.add('CLOSE_STATEMENT', r'\}')
        self.__lexer.add('STATEMENT_END', r'\;')
        # Ignore spaces
        self.__lexer.ignore('\s+')
        # Anything
        self.__lexer.add('STRING', r'["][\w\s]+["]')

    def build(self):
        self.__add_tokens()
        return self.__lexer.build()

Parser:

import re
from bmls.language.parser.definitions import BMLDefinition, BMLStatement
from rply import ParserGenerator

class BMLParser():
    def __init__(self):
        self.pg = ParserGenerator(
            # A list of all token names accepted by the parser.
            [
                'OPEN_STATEMENT',
                'CLOSE_STATEMENT',
                'STATEMENT_END',
                'STRING'
            ]
        )

    def parse(self):
        @self.pg.production('expression : OPEN_STATEMENT STRING STRING CLOSE_STATEMENT STATEMENT_END')
        def statement(p):
            name = ""
            definition = ""

            if p[1].gettokentype() == "STRING" and p[2].gettokentype() == "STRING":
                name = self.__removeQuotes(p[1].getstr())
                definition = self.__removeQuotes(p[2].getstr())
                print("Statement: (" + name + " , " + definition + ")")

            return BMLStatement(name, definition)

        @self.pg.production('expression : STRING OPEN_STATEMENT STRING CLOSE_STATEMENT STATEMENT_END')
        def definition(p):
            name = ""
            definition = ""

            if p[0].gettokentype() == "STRING" and p[2].gettokentype() == "STRING":
                name = self.__removeQuotes(p[0].getstr())
                definition = self.__removeQuotes(p[2].getstr())

                print("Definition: (" + name + " , " + definition + ")")
            
            return BMLDefinition(name, definition)

        @self.pg.error
        def error_handle(token):
            raise SyntaxError("Error on ( Token, type \"" + token.gettokentype() + "\" , Value \"" + token.getstr() + "\")")


    def build(self):
        return self.pg.build()

    def __removeQuotes(self, tok):
        return re.sub(r'^"|"$', '', tok)

Okay, everything seems okay right? Well I'm running into this issue. Maybe I'm a stupid beginner that doesn't understand this yet, who cares...

So here's where I call things:
SimpleBML is the simple thing I use to call the lexer and parser.

from bmls.language.lexer import BMLLexer
from bmls.language.parser import BMLParser
from bmls.language.parser.definitions import BMLDefinition

class SimpleBML():
    def __init__(self):
        pass

    def parse(self, content):
        lexer = BMLLexer().build()
        tokens = lexer.lex(content)

        pg = BMLParser()
        pg.parse()
        parser = pg.build()
        tkk = parser.parse(tokens)

And this is where I call SimpleBML:

from bmls.language.simple import SimpleBML
import rply

class BMLInterpreter:
    def run(self):
        self.lexer = SimpleBML()

        while True:
            inf = input("Interpreter > ")
            tokens = self.lexer.parse(inf)
            if tokens != None:
                for token in tokens:
                    print(token)


BMLInterpreter().run()

Anyways, tldr; I can't write multiple statements in one line.

Statement stuff:

PS C:\Users\*****\OneDrive\Documents\langs\bml> & C:/Python39/python.exe 
    c:/Users/*****/OneDrive/Documents/langs/bml/interp.py
    Interpreter > { "DOCTYPE" "HTML" };
    Statement: (DOCTYPE , HTML)
    Interpreter >

Definition stuff:

PS C:\Users\*****\OneDrive\Documents\langs\bml> & C:/Python39/python.exe 
    c:/Users/*****/OneDrive/Documents/langs/bml/interp.py
    Interpreter > "h1" { "Hello" };
    Definition: (h1 , Hello)
    Interpreter >

But I can't combine these things:

Interpreter > "h1" { "Hello" }; { "h1" "Hello" };
Definition: (h1 , Hello)
Traceback (most recent call last):
  File "c:\Users\*****\OneDrive\Documents\langs\bml\interp.py", line 16, in <module>
    BMLInterpreter().run()
  File "c:\Users\*****\OneDrive\Documents\langs\bml\interp.py", line 10, in run
    tokens = self.lexer.parse(inf)
  File "c:\Users\*****\OneDrive\Documents\langs\bml\bmls\language\simple\simple.py", line 16, in parse
    tkk = parser.parse(tokens)
  File "C:\Python39\lib\site-packages\rply\parser.py", line 60, in parse
    self.error_handler(lookahead)
  File "c:\Users\*****\OneDrive\Documents\langs\bml\bmls\language\parser\parser.py", line 45, in error_handle       
    raise SyntaxError("Error on ( Token, type \"" + token.gettokentype() + "\" , Value \"" + token.getstr() + "\")")
SyntaxError: Error on ( Token, type "OPEN_STATEMENT" , Value "{")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions