From 30c036d9e6a327815f7327d103dba0847b7e8448 Mon Sep 17 00:00:00 2001 From: Boris Shingarov Date: Mon, 24 Jun 2024 11:36:17 -0400 Subject: [PATCH 1/5] [PreSmalltalks] Allow RBNodes to signal parserErrors In the RB parser, nodes are stateful objects which perform some of the parser's work. For one example, nodes are reparented during parsing; this happens down a chain of sends between nodes. If a parse error is discovered during execution of a node's method, the RBParser's explicit (non-exception) juggling of RBParseErrorNodes is out of reach (at a minimum, because nodes don't know the parser who is manipulating them). This commit allows RBNodes to signal `parserError: aString` as if it happened in the RBParser, by unwinding to the nearest RBParser context. This is an ad-hoc hack, but the solution I would like (cf. megaparsec) would involve having a language with first-class composition, which is what we are trying to build, and need this support in the substrate for. --- src/MathNotation-Pharo/RBNode.extension.st | 11 +++++++++++ src/PreSmalltalks/Behavior.extension.st | 6 ++++++ 2 files changed, 17 insertions(+) create mode 100644 src/MathNotation-Pharo/RBNode.extension.st create mode 100644 src/PreSmalltalks/Behavior.extension.st diff --git a/src/MathNotation-Pharo/RBNode.extension.st b/src/MathNotation-Pharo/RBNode.extension.st new file mode 100644 index 000000000..b13f68a76 --- /dev/null +++ b/src/MathNotation-Pharo/RBNode.extension.st @@ -0,0 +1,11 @@ +Extension { #name : #RBNode } + +{ #category : #'*MathNotation-Pharo' } +RBNode >> parserError: aString [ + self parserError: aString in: RBParser findContextReceivingMe +] + +{ #category : #'*MathNotation-Pharo' } +RBNode >> parserError: aString in: parsersContext [ + parsersContext return: (parsersContext receiver parserError: aString) +] diff --git a/src/PreSmalltalks/Behavior.extension.st b/src/PreSmalltalks/Behavior.extension.st new file mode 100644 index 000000000..1e8765820 --- /dev/null +++ b/src/PreSmalltalks/Behavior.extension.st @@ -0,0 +1,6 @@ +Extension { #name : #Behavior } + +{ #category : #'*PreSmalltalks' } +Behavior >> findContextReceivingMe [ + ^thisContext findContextSuchThat: [ :c | c receiver isKindOf: self ] +] From ceaa82a15d042719e5eed06c94b6b684d292399d Mon Sep 17 00:00:00 2001 From: Boris Shingarov Date: Sat, 22 Jun 2024 12:00:04 -0400 Subject: [PATCH 2/5] [PreSmalltalks] Abort first attempt at holes --- src/PreSmalltalks-Tests/HoleTest.class.st | 11 ----------- src/PreSmalltalks/Hole.class.st | 19 ------------------- src/PreSmalltalks/ProtoObject.extension.st | 6 ------ 3 files changed, 36 deletions(-) delete mode 100644 src/PreSmalltalks-Tests/HoleTest.class.st delete mode 100644 src/PreSmalltalks/Hole.class.st delete mode 100644 src/PreSmalltalks/ProtoObject.extension.st diff --git a/src/PreSmalltalks-Tests/HoleTest.class.st b/src/PreSmalltalks-Tests/HoleTest.class.st deleted file mode 100644 index 68af7f4a6..000000000 --- a/src/PreSmalltalks-Tests/HoleTest.class.st +++ /dev/null @@ -1,11 +0,0 @@ -Class { - #name : #HoleTest, - #superclass : #TestCase, - #category : #'PreSmalltalks-Tests' -} - -{ #category : #tests } -HoleTest >> testNotHole [ - self deny: nil isHole. - self deny: 123 isHole -] diff --git a/src/PreSmalltalks/Hole.class.st b/src/PreSmalltalks/Hole.class.st deleted file mode 100644 index 848b91916..000000000 --- a/src/PreSmalltalks/Hole.class.st +++ /dev/null @@ -1,19 +0,0 @@ -" -I represent a hole in a context, in a term-rewriting sense. -For the theoretical background, see e.g. ""Term Rewriting Systems"" by Terese (vol. 55 of ""Cambridge Tracts in Theoretical Computer Science""). -" -Class { - #name : #Hole, - #superclass : #Object, - #category : #PreSmalltalks -} - -{ #category : #testing } -Hole >> isHole [ - ^true -] - -{ #category : #printing } -Hole >> printOn: aStream [ - aStream nextPut: $_ -] diff --git a/src/PreSmalltalks/ProtoObject.extension.st b/src/PreSmalltalks/ProtoObject.extension.st deleted file mode 100644 index 4b93824f9..000000000 --- a/src/PreSmalltalks/ProtoObject.extension.st +++ /dev/null @@ -1,6 +0,0 @@ -Extension { #name : #ProtoObject } - -{ #category : #'*PreSmalltalks' } -ProtoObject >> isHole [ - ^false -] From ec98e8705b6c727fbd26c5aab7f782caaede860c Mon Sep 17 00:00:00 2001 From: Boris Shingarov Date: Sat, 22 Jun 2024 14:05:48 -0400 Subject: [PATCH 3/5] [MathNotation] Implement lexical scanning of holes --- src/MathNotation-Pharo/RBHoleToken.class.st | 25 +++++++++++++++++++ src/MathNotation-Pharo/RBScanner.extension.st | 24 ++++++++++++++++++ src/MathNotation-Pharo/RBToken.extension.st | 6 +++++ 3 files changed, 55 insertions(+) create mode 100644 src/MathNotation-Pharo/RBHoleToken.class.st create mode 100644 src/MathNotation-Pharo/RBToken.extension.st diff --git a/src/MathNotation-Pharo/RBHoleToken.class.st b/src/MathNotation-Pharo/RBHoleToken.class.st new file mode 100644 index 000000000..f4380d687 --- /dev/null +++ b/src/MathNotation-Pharo/RBHoleToken.class.st @@ -0,0 +1,25 @@ +Class { + #name : #RBHoleToken, + #superclass : #RBToken, + #category : #'MathNotation-Pharo' +} + +{ #category : #'concrete syntax' } +RBHoleToken class >> codePoint [ + "In the current implementation, we use $β–‘ ('WHITE SQUARE', U+25A1) + to denote 'Hole' in concrete syntax, but any suitable character, + such as $– ('EN DASH' (U+2013), $☐ ('BALLOT BOX', U+2610), etc + should work, by changing ONLY this method." + + ^16r25A1 +] + +{ #category : #testing } +RBHoleToken >> isHole [ + ^true +] + +{ #category : #accessing } +RBHoleToken >> length [ + ^1 +] diff --git a/src/MathNotation-Pharo/RBScanner.extension.st b/src/MathNotation-Pharo/RBScanner.extension.st index c552b026c..1dfc76fae 100644 --- a/src/MathNotation-Pharo/RBScanner.extension.st +++ b/src/MathNotation-Pharo/RBScanner.extension.st @@ -102,3 +102,27 @@ RBScanner >> isUnicodeMathOperator: aCharacter [ ) includes: aCharacter asInteger ] + +{ #category : #'*MathNotation-Pharo' } +RBScanner >> scanHole [ + self step. + ^RBHoleToken start: tokenStart +] + +{ #category : #'*MathNotation-Pharo' } +RBScanner >> scanToken [ + "fast-n-ugly. Don't write stuff like this. Has been found to cause cancer in laboratory rats. Basically a + case statement. Didn't use Dictionary because lookup is pretty slow." + + characterType = #alphabetic ifTrue: [^self scanIdentifierOrKeyword]. + (characterType = #digit + or: [currentCharacter = $- and: [(self classify: stream peek) = #digit]]) + ifTrue: [^self scanNumber]. + characterType = #binary ifTrue: [^self scanBinary: RBBinarySelectorToken]. + characterType = #special ifTrue: [^self scanSpecialCharacter]. + currentCharacter = $' ifTrue: [^self scanLiteralString]. + currentCharacter = $# ifTrue: [^self scanLiteral]. + currentCharacter = $$ ifTrue: [^self scanLiteralCharacter]. + currentCharacter codePoint = RBHoleToken codePoint ifTrue: [^self scanHole]. + ^self scanUnknownCharacter +] diff --git a/src/MathNotation-Pharo/RBToken.extension.st b/src/MathNotation-Pharo/RBToken.extension.st new file mode 100644 index 000000000..41cec4b86 --- /dev/null +++ b/src/MathNotation-Pharo/RBToken.extension.st @@ -0,0 +1,6 @@ +Extension { #name : #RBToken } + +{ #category : #'*MathNotation-Pharo' } +RBToken >> isHole [ + ^false +] From 029dbd6a7f915e3cccc7c7ac9a8abc1e11fc9544 Mon Sep 17 00:00:00 2001 From: Boris Shingarov Date: Mon, 24 Jun 2024 11:43:36 -0400 Subject: [PATCH 4/5] [MathNotation] Implement sectioning See RBHoleToken class comment for a general description of how this works. We shall give more meaningful examples when we implement Squiggol (hopefully in the next few PRs). Things are going to become much more interesting when we implement LqST so Smalltalk blocks are routed to Core. --- src/MathNotation-Pharo/RBHoleToken.class.st | 51 +++++++++++++++++++ .../RBMessageNode.extension.st | 35 +++++++++++++ src/MathNotation-Pharo/RBNode.extension.st | 10 ++++ src/MathNotation-Pharo/RBParser.extension.st | 31 +++++++++++ .../SectionArgumentNode.class.st | 37 ++++++++++++++ .../RBParserTest.extension.st | 29 +++++++++++ 6 files changed, 193 insertions(+) create mode 100644 src/MathNotation-Pharo/RBMessageNode.extension.st create mode 100644 src/MathNotation-Pharo/RBParser.extension.st create mode 100644 src/MathNotation-Pharo/SectionArgumentNode.class.st create mode 100644 src/MathNotation-Tests/RBParserTest.extension.st diff --git a/src/MathNotation-Pharo/RBHoleToken.class.st b/src/MathNotation-Pharo/RBHoleToken.class.st index f4380d687..6e8273ed7 100644 --- a/src/MathNotation-Pharo/RBHoleToken.class.st +++ b/src/MathNotation-Pharo/RBHoleToken.class.st @@ -1,3 +1,54 @@ +" +RBHoleToken is a lexical-level representation of a hole, i.e. an empty space +in an incomplete term. For example, in category theory it is standard to write +π’ž(A,–) meaning 'that thing which takes B to answer π’ž(A,B)'. In the 'sectioning' +notation (standard in BMF formalism), the function βŠ•y takes x to xβŠ•y (here +βŠ•::AΓ—Bβ†’C is an arbitrary binary operator). In term rewriting, it is customary +to allow terms to contain occurrences of a special constant symbol β–‘. + +Although this construct can, in principle, be spelled out in coordinates, +in the above-mentioned applications this notational abstraction becomes +essential, as opposed to merely convenient. + +Implementation Notes: + +At the lexical level, the Hole is represented by a dedicated single character; +which character it is, is specified by RBHoleToken class>>codePoint. +The scanner isolates the rest of the parser from knowledge of this concrete +syntax. + +In the present notation, the scope of an incomplete term is always the message +send. For example, you can write + + a := 3+β–‘ + +but not + + a := β–‘ + +or + + ^β–‘. + +Because this notation is an extension to our substrate language, which is +Smalltalk-80, the meaning of these incomplete terms is given by BlockClosures. +For example, + + 3+β–‘ + +stands for + + [ :x | 3+x ] + +where x is some image-unique variable name. + +A message send may contain zero or more hole arguments (including self). +If there are more than one hole, each hole becomes a separate block argument; +the order of currying is + + self β†’ arg₁ β†’ … β†’ argβ‚™. + +" Class { #name : #RBHoleToken, #superclass : #RBToken, diff --git a/src/MathNotation-Pharo/RBMessageNode.extension.st b/src/MathNotation-Pharo/RBMessageNode.extension.st new file mode 100644 index 000000000..d5031de7e --- /dev/null +++ b/src/MathNotation-Pharo/RBMessageNode.extension.st @@ -0,0 +1,35 @@ +Extension { #name : #RBMessageNode } + +{ #category : #'*MathNotation-Pharo' } +RBMessageNode class >> receiver: aValueNode selector: aSelector keywordsPositions: positionList arguments: valueNodes [ + ^(self new) + receiver: aValueNode + selector: aSelector + keywordsPositions: positionList + arguments: valueNodes; + wrapSectionedSend +] + +{ #category : #'*MathNotation-Pharo' } +RBMessageNode >> wrapSectionedSend [ + | allArgs holes block blockArgs | + allArgs := {receiver}, arguments. + (allArgs noneSatisfy: #isSectionArgument) ifTrue: [ + ^self "ordinary MessageNode" + ]. + + "sectioned message send" + holes := allArgs select: #isSectionArgument. + blockArgs := holes collect: #holePlug. + + receiver := receiver holePlug. + receiver parent: self. + arguments := arguments collect: #holePlug. + arguments do: [ :eachArgument | eachArgument parent: self ]. + + block := RBBlockNode + arguments: blockArgs + body: (RBSequenceNode statements: (OrderedCollection with: self)). + blockArgs do: [ :eachBlockArg | eachBlockArg parent: block ]. + ^block +] diff --git a/src/MathNotation-Pharo/RBNode.extension.st b/src/MathNotation-Pharo/RBNode.extension.st index b13f68a76..f328ceb5a 100644 --- a/src/MathNotation-Pharo/RBNode.extension.st +++ b/src/MathNotation-Pharo/RBNode.extension.st @@ -1,5 +1,15 @@ Extension { #name : #RBNode } +{ #category : #'*MathNotation-Pharo' } +RBNode >> holePlug [ + ^self +] + +{ #category : #'*MathNotation-Pharo' } +RBNode >> isSectionArgument [ + ^false +] + { #category : #'*MathNotation-Pharo' } RBNode >> parserError: aString [ self parserError: aString in: RBParser findContextReceivingMe diff --git a/src/MathNotation-Pharo/RBParser.extension.st b/src/MathNotation-Pharo/RBParser.extension.st new file mode 100644 index 000000000..341a8db83 --- /dev/null +++ b/src/MathNotation-Pharo/RBParser.extension.st @@ -0,0 +1,31 @@ +Extension { #name : #RBParser } + +{ #category : #'*MathNotation-Pharo' } +RBParser >> parseHole [ + | token node | + token := currentToken. + self step. + node := SectionArgumentNode newAt: token start. + self addCommentsTo: node. + ^node + +] + +{ #category : #'*MathNotation-Pharo' } +RBParser >> parsePrimitiveObject [ + currentToken isHole ifTrue: [^self parseHole]. + currentToken isIdentifier ifTrue: [^self parsePrimitiveIdentifier]. + (currentToken isLiteralToken and: [currentToken isMultiKeyword not]) + ifTrue: [^self parsePrimitiveLiteral]. + currentToken isLiteralArrayToken + ifTrue: + [^currentToken isForByteArray + ifTrue: [self parseLiteralByteArray] + ifFalse: [self parseLiteralArray]]. + currentToken isSpecial + ifTrue: + [currentToken value = $[ ifTrue: [^self saveCommentsDuring:[self parseBlock]]. + currentToken value = $( ifTrue: [^self parseParenthesizedExpression]. + currentToken value = ${ ifTrue: [^self parseArray]]. + ^ self parserError: 'Variable or expression expected' +] diff --git a/src/MathNotation-Pharo/SectionArgumentNode.class.st b/src/MathNotation-Pharo/SectionArgumentNode.class.st new file mode 100644 index 000000000..6342a1565 --- /dev/null +++ b/src/MathNotation-Pharo/SectionArgumentNode.class.st @@ -0,0 +1,37 @@ +Class { + #name : #SectionArgumentNode, + #superclass : #RBVariableNode, + #classVars : [ + 'N' + ], + #category : #'MathNotation-Pharo' +} + +{ #category : #naming } +SectionArgumentNode class >> freshName [ + ^'_ß', self nextN printString +] + +{ #category : #'instance creation' } +SectionArgumentNode class >> newAt: aPosition [ + ^self + identifierNamed: self freshName + at: aPosition +] + +{ #category : #naming } +SectionArgumentNode class >> nextN [ + N isNil ifTrue: [ N :=0 ]. + N := N+1. + ^N +] + +{ #category : #substitution } +SectionArgumentNode >> holePlug [ + ^RBVariableNode named: name start: start +] + +{ #category : #testing } +SectionArgumentNode >> isSectionArgument [ + ^true +] diff --git a/src/MathNotation-Tests/RBParserTest.extension.st b/src/MathNotation-Tests/RBParserTest.extension.st new file mode 100644 index 000000000..52786f51c --- /dev/null +++ b/src/MathNotation-Tests/RBParserTest.extension.st @@ -0,0 +1,29 @@ +Extension { #name : #RBParserTest } + +{ #category : #'*MathNotation-Tests' } +RBParserTest >> testSectioningCollect [ + self + assert: (#(1 2 3) collect: 100+β–‘) + equals: #(101 102 103) +] + +{ #category : #'*MathNotation-Tests' } +RBParserTest >> testSectioningL [ + self + assert: (β–‘+4 value: 3) + equals: 7 +] + +{ #category : #'*MathNotation-Tests' } +RBParserTest >> testSectioningLR [ + self + assert: (β–‘+β–‘ value: 3 value: 4) + equals: 7 +] + +{ #category : #'*MathNotation-Tests' } +RBParserTest >> testSectioningR [ + self + assert: (3+β–‘ value: 4) + equals: 7 +] From 799f66395b74c14c04a091e56cfbcb86a23b0eb3 Mon Sep 17 00:00:00 2001 From: Boris Shingarov Date: Mon, 24 Jun 2024 09:42:21 -0400 Subject: [PATCH 5/5] =?UTF-8?q?[Sectioning]=20Signal=20parserError=20if=20?= =?UTF-8?q?=E2=96=A1=20is=20used=20in=20something=20other=20than=20a=20mes?= =?UTF-8?q?sage=20send?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/MathNotation-Pharo/SectionArgumentNode.class.st | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/MathNotation-Pharo/SectionArgumentNode.class.st b/src/MathNotation-Pharo/SectionArgumentNode.class.st index 6342a1565..5e6027eae 100644 --- a/src/MathNotation-Pharo/SectionArgumentNode.class.st +++ b/src/MathNotation-Pharo/SectionArgumentNode.class.st @@ -35,3 +35,11 @@ SectionArgumentNode >> holePlug [ SectionArgumentNode >> isSectionArgument [ ^true ] + +{ #category : #accessing } +SectionArgumentNode >> parent: p [ + (p isKindOf: RBMessageNode) ifFalse: [ + self parserError: 'β–‘ can only be a message''s arg or receiver' + ]. + ^super parent: p +]