Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 75: add stringClass attributes to FileSystem #82

Merged
merged 11 commits into from
Dec 14, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/FileSystem-Core/AbstractFileReference.class.st
Original file line number Diff line number Diff line change
@@ -743,10 +743,13 @@ AbstractFileReference >> readStreamDo: doBlock ifAbsent: absentBlock [

{ #category : 'streams' }
AbstractFileReference >> readStreamEncoded: anEncoding [

^ ZnCharacterReadStream
on: self binaryReadStream
encoding: anEncoding
stringClass:
(String isInUnicodeComparisonMode
ifTrue: [ Unicode7 ]
ifFalse: [ String ])
]

{ #category : 'streams' }
11 changes: 7 additions & 4 deletions src/FileSystem-GemStone-Kernel/CharacterCollection.extension.st
Original file line number Diff line number Diff line change
@@ -20,12 +20,15 @@ CharacterCollection >> asResolvedBy: aFileSystem [
{ #category : '*filesystem-gemstone-kernel' }
CharacterCollection >> asZnCharacterEncoder [
"Return a ZnCharacterEncoder instance using the receiver as identifier"

" 'UTF-8' asZnCharacterEncoder "

((self select: [ :each | each isAlphaNumeric ]) asLowercase) = 'utf8' ifFalse: [ self error: 'Only utf8 encoding supported'].
^ ZnUTF8Encoder new

(self select: [ :each | each isAlphaNumeric ]) asLowercase = 'utf8'
ifTrue: [ ^ ZnUTF8Encoder new ]
ifFalse: [
(self select: [ :each | each isAlphaNumeric ]) asLowercase = '8bit'
ifFalse: [ self error: 'only 8bit or utf8 encoding supported' ] ].
^ Zn8BITEncoder new
]

{ #category : '*filesystem-gemstone-kernel' }
4 changes: 2 additions & 2 deletions src/FileSystem-Tests-Core/FileReferenceTest.class.st
Original file line number Diff line number Diff line change
@@ -1164,7 +1164,7 @@ FileReferenceTest >> testReadStreamIfAbsent [
{ #category : 'tests' }
FileReferenceTest >> testRelativeTo [

| alpha beta reference path result |
| alpha beta |
alpha := sandbox / 'alpha'.
beta := alpha / 'beta'.
self
@@ -1326,7 +1326,7 @@ FileReferenceTest >> testWithExtension [
{ #category : 'tests' }
FileReferenceTest >> testWithoutExtension [

| reference result |
| reference |
reference := sandbox / 'alpha.beta.gamma'.
reference := reference withoutExtension.
self
123 changes: 123 additions & 0 deletions src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"
Part of FileSystem

=========

I implement the encoding and decoding of Extended ASCII (8 bit character encoding) that produces instances of class String.

The encoding is consistent with topaz 'fileformat 8BIT' (see section 1.3 Handling text outside the ASCII range in the topaz manual[1] for more details).

[1] https://downloads.gemtalksystems.com/docs/GemStone64/3.6.x/GS64-Topaz-3.6/GS64-Topaz-3.6.htm?https://downloads.gemtalksystems.com/docs/GemStone64/3.6.x/GS64-Topaz-3.6/1-Tutorial.htm#pgfId-1130673
"
Class {
#name : 'Zn8BITEncoder',
#superclass : 'ZnCharacterEncoder',
#classVars : [
'Default'
],
#category : 'Zinc-Character-Encoding-Core'
}

{ #category : 'accessing' }
Zn8BITEncoder class >> default [
"Return a cached instance of the most commonly used encoder,
which is faster than going via #newForEncoding: that does a subclass search"

^ Default ifNil: [ Default := self new ]
]

{ #category : 'accessing' }
Zn8BITEncoder class >> handlesEncoding: string [
"Return true when my instances handle the encoding described by string"

^ (self canonicalEncodingIdentifier: string) = '8bit'
]

{ #category : 'accessing' }
Zn8BITEncoder class >> knownEncodingIdentifiers [
^ #( #'8bit' )
]

{ #category : 'instance creation' }
Zn8BITEncoder class >> newForEncoding: string stringClass: stringClass [
"Return a new character encoder object for an encoding described by string.
Search for a subclass that handles it and delegate (subclassResponsibility)."

^ self new stringClass: stringClass
]

{ #category : 'converting' }
Zn8BITEncoder >> backOnStream: stream [
"Move back one character on stream"

stream position = 0
ifTrue: [Error signal: 'Cannot move backward past the start of the stream.'].
stream skip: -1
]

{ #category : 'convenience' }
Zn8BITEncoder >> decodeAsCodePoints: bytes [
"Decode bytes and return the resulting code points"

^ String withBytes: bytes
]

{ #category : 'convenience' }
Zn8BITEncoder >> decodeBytes: bytes [
"Decode bytes and return the resulting string"

^ self stringClass withBytes: bytes
]

{ #category : 'converting' }
Zn8BITEncoder >> encodedByteCountFor: character [
"Return how many bytes are needed to encode character"

^ 1
]

{ #category : 'convenience' }
Zn8BITEncoder >> encodeString: string [
"Encode string and return the resulting Utf8 instance"

^ string asByteArray
]

{ #category : 'accessing' }
Zn8BITEncoder >> identifier [
^ #'8bit'
]

{ #category : 'converting' }
Zn8BITEncoder >> nextCodePointFromStream: stream [
"Read and return the next integer code point from stream"

^ stream next
]

{ #category : 'converting' }
Zn8BITEncoder >> nextFromStream: stream [
"Read and return the next character from stream"

^ Character codePoint: stream next
]

{ #category : 'converting' }
Zn8BITEncoder >> nextPutCodePoint: codePoint toStream: stream [
"Write the encoding for Integer code point to stream"

^ stream nextPut: (Character codePoint: codePoint)
]

{ #category : 'convenience' }
Zn8BITEncoder >> readInto: string startingAt: offset count: requestedCount fromStream: stream [
"Read requestedCount characters into string starting at offset,
returning the number read, there could be less available when stream is atEnd."

| stringBuffer |
stringBuffer := string.
offset to: offset + requestedCount - 1 do: [ :index |
stream atEnd ifTrue: [ ^ index - offset ].
stringBuffer codePointAt: index put: (self nextCodePointFromStream: stream)].
^ requestedCount
]
30 changes: 21 additions & 9 deletions src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st
Original file line number Diff line number Diff line change
@@ -68,7 +68,13 @@ ZnBufferedReadStream >> closed [
ZnBufferedReadStream >> collectionSpecies [
^ stream isBinary
ifTrue: [ ByteArray ]
ifFalse: [ String ]
ifFalse: [
(stream respondsTo: #'stringClass')
ifTrue: [ stream stringClass ]
ifFalse: [
String isInUnicodeComparisonMode
ifTrue: [ Unicode7 ]
ifFalse: [ String ] ] ]
]

{ #category : 'accessing' }
@@ -403,25 +409,31 @@ ZnBufferedReadStream >> uint8 [
{ #category : 'accessing' }
ZnBufferedReadStream >> upTo: value [
"Read upto but not including value and return them as a collection.
If value is not found, return the entire contents of the stream.
This could be further optimzed."
If value is not found, return the entire contents of the stream."

^ self collectionSpecies
streamContents: [ :writeStream | | element |
[ self atEnd or: [ (element := self next) = value ] ] whileFalse: [
writeStream nextPut: element ] ]
streamContents: [ :writeStream | | ch |
[ self atEnd or: [ (ch := self next) = value ] ] whileFalse: [
writeStream nextPut: ch ] ]
]

{ #category : 'accessing' }
ZnBufferedReadStream >> upToAll: aCollection [
"Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of aCollection. If aCollection is not in the stream, answer the entire rest of the stream."

| startPos endMatch result x |
aCollection isEmpty ifTrue: [ ^aCollection ].
startPos := self position.
"upTo: will stop before aCollection first"
x := self upTo: aCollection first.
self atEnd ifTrue: [ ^ x ].
2 to: aCollection size do: [:i |
self peek = (aCollection at: i)
self atEnd
ifTrue: [
aCollection size <= 1
ifTrue: [ ^ x ].
self position: startPos.
^ self upToEnd].
2 to: aCollection size do: [:i | | y |
(y := self peek) = (aCollection at: i)
ifTrue: [ self next ]
ifFalse: [ self position: startPos.
^ self upToEnd ] ].
32 changes: 31 additions & 1 deletion src/Zinc-Character-Encoding-Core/ZnCharacterEncoder.class.st
Original file line number Diff line number Diff line change
@@ -41,6 +41,9 @@ Part of Zinc HTTP Components.
Class {
#name : 'ZnCharacterEncoder',
#superclass : 'Object',
#instVars : [
'stringClass'
],
#category : 'Zinc-Character-Encoding-Core'
}

@@ -70,12 +73,25 @@ ZnCharacterEncoder class >> knownEncodingIdentifiers [
ZnCharacterEncoder class >> newForEncoding: string [
"Return a new character encoder object for an encoding described by string.
Search for a subclass that handles it and delegate (subclassResponsibility)."

^ self
newForEncoding: string
stringClass:
(String isInUnicodeComparisonMode
ifTrue: [ Unicode7 ]
ifFalse: [ String ])
]

{ #category : 'instance creation' }
ZnCharacterEncoder class >> newForEncoding: string stringClass: stringClass [
"Return a new character encoder object for an encoding described by string.
Search for a subclass that handles it and delegate (subclassResponsibility)."

| concreteSubclass |
concreteSubclass := self allSubclasses
detect: [ :each | each handlesEncoding: string ]
ifNone: [ ^ self error: 'The ', string printString, ' is not currently supported.' ].
^ concreteSubclass newForEncoding: string
^ concreteSubclass newForEncoding: string stringClass: stringClass
]

{ #category : 'converting' }
@@ -157,3 +173,17 @@ ZnCharacterEncoder >> nextPut: character toStream: stream [

self nextPutCodePoint: character asInteger toStream: stream
]

{ #category : 'accessing' }
ZnCharacterEncoder >> stringClass [
^ stringClass
ifNil: [
stringClass := String isInUnicodeComparisonMode
ifTrue: [ Unicode7 ]
ifFalse: [ String ] ]
]

{ #category : 'accessing' }
ZnCharacterEncoder >> stringClass: object [
stringClass := object
]
29 changes: 28 additions & 1 deletion src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st
Original file line number Diff line number Diff line change
@@ -13,12 +13,29 @@ Part of Zinc HTTP Components.
Class {
#name : 'ZnCharacterReadStream',
#superclass : 'ZnEncodedReadStream',
#instVars : [
'stringClass'
],
#category : 'Zinc-Character-Encoding-Core'
}

{ #category : 'instance creation' }
ZnCharacterReadStream class >> on: wrappedStream encoding: encoding stringClass: stringClass [
^ self new
on: wrappedStream;
encoding: encoding;
stringClass: stringClass;
yourself
]

{ #category : 'accessing' }
ZnCharacterReadStream >> collectionSpecies [
^ String
^ self stringClass
]

{ #category : 'accessing' }
ZnCharacterReadStream >> encoder [
^ encoder ifNil: [ encoder := super encoder stringClass: self stringClass ]
]

{ #category : 'accessing' }
@@ -91,6 +108,16 @@ ZnCharacterReadStream >> readInto: collection startingAt: offset count: requeste

]

{ #category : 'accessing' }
ZnCharacterReadStream >> stringClass [
^ stringClass ifNil: [ stringClass := String ]
]

{ #category : 'accessing' }
ZnCharacterReadStream >> stringClass: object [
stringClass := object
]

{ #category : 'accessing' }
ZnCharacterReadStream >> upToAll: aCollection [
"Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of aCollection. If aCollection is not in the stream, answer the entire rest of the stream."
Loading