-
Notifications
You must be signed in to change notification settings - Fork 0
/
TextSource.cs
268 lines (226 loc) · 8.67 KB
/
TextSource.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
using System;
using System.IO;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
// THis file comes from ExCSS almost unmodified
namespace SkiaSharpOpenGLBenchmark
{
public sealed class TextSource : IDisposable
{
private const int BufferSize = 4096;
private readonly Stream _baseStream;
private readonly MemoryStream _raw;
private readonly byte[] _buffer;
private readonly char[] _chars;
private StringBuilder _content;
private EncodingConfidence _confidence;
private bool _finished;
private Encoding _encoding;
private Decoder _decoder;
public void Dispose()
{
var isDisposed = _content == null;
if (isDisposed) return;
_raw.Dispose();
_content.Clear().ToPool();
_content = null;
}
private enum EncodingConfidence : byte
{
Tentative,
Certain,
Irrelevant
}
private TextSource(Encoding encoding)
{
_buffer = new byte[BufferSize];
_chars = new char[BufferSize + 1];
_raw = new MemoryStream();
Index = 0;
_encoding = encoding ?? TextEncoding.Utf8;
_decoder = _encoding.GetDecoder();
}
public TextSource(string source) : this(null, TextEncoding.Utf8)
{
_finished = true;
_content.Append(source);
_confidence = EncodingConfidence.Irrelevant;
}
public TextSource(Stream baseStream, Encoding encoding = null) : this(encoding)
{
_baseStream = baseStream;
_content = Pool.NewStringBuilder();
_confidence = EncodingConfidence.Tentative;
}
public string Text => _content.ToString();
public char this[int index] => _content[index];
public int Index { get; set; }
public int Length => _content.Length;
public Encoding CurrentEncoding
{
get => _encoding;
set
{
if (_confidence != EncodingConfidence.Tentative) return;
if (_encoding.IsUnicode())
{
_confidence = EncodingConfidence.Certain;
return;
}
if (value.IsUnicode()) value = TextEncoding.Utf8;
if (value == _encoding)
{
_confidence = EncodingConfidence.Certain;
return;
}
_encoding = value;
_decoder = value.GetDecoder();
var raw = _raw.ToArray();
var rawChars = new char[_encoding.GetMaxCharCount(raw.Length)];
var charLength = _decoder.GetChars(raw, 0, raw.Length, rawChars, 0);
var content = new string(rawChars, 0, charLength);
var index = Math.Min(Index, content.Length);
if (content.Substring(0, index).Is(_content.ToString(0, index)))
{
// If everything seems to fit up to this point, do an
// instant switch
_confidence = EncodingConfidence.Certain;
_content.Remove(index, _content.Length - index);
_content.Append(content.Substring(index));
}
else
{
// Otherwise consider restart from beginning ...
Index = 0;
_content.Clear().Append(content);
throw new NotSupportedException();
}
}
}
public char ReadCharacter()
{
if (Index < _content.Length) return _content[Index++];
ExpandBuffer(BufferSize);
var index = Index++;
return index < _content.Length ? _content[index] : Symbols.EndOfFile;
}
public string ReadCharacters(int characters)
{
var start = Index;
var end = start + characters;
if (end <= _content.Length)
{
Index += characters;
return _content.ToString(start, characters);
}
ExpandBuffer(Math.Max(BufferSize, characters));
Index += characters;
characters = Math.Min(characters, _content.Length - start);
return _content.ToString(start, characters);
}
/*
* Look at the character in the stream that starts at
* offset bytes from the cursor
*
* offset - Byte offset of start of character
*/
public char Peek(int offset)
{
if (Index + offset >= _content.Length)
return Symbols.EndOfFile;
else
return _content[Index + offset];
}
// Advance the stream's current position
public void Advance(int bytes)
{
if (bytes > _content.Length - Index)
bytes = _content.Length - Index;
Index += bytes;
}
public string GetContents(int start, int len)
{
var end = start + len;
if (end <= _content.Length)
{
return _content.ToString(start, len);
}
Console.WriteLine("ERROR: TextSource.GetContents asked for more than there is");
return ""; // should throw an exception!
}
public async Task PrefetchAllAsync(CancellationToken cancellationToken)
{
if (_content.Length == 0) await DetectByteOrderMarkAsync(cancellationToken).ConfigureAwait(false);
while (!_finished) await ReadIntoBufferAsync(cancellationToken).ConfigureAwait(false);
}
#pragma warning disable IDE0060 // Remove unused parameter
private async Task DetectByteOrderMarkAsync(CancellationToken cancellationToken)
#pragma warning restore IDE0060 // Remove unused parameter
{
var count = await _baseStream.ReadAsync(_buffer, 0, BufferSize, cancellationToken).ConfigureAwait(false);
var offset = 0;
//TODO readable hex values
if (count > 2 && _buffer[0] == 0xef && _buffer[1] == 0xbb && _buffer[2] == 0xbf)
{
_encoding = TextEncoding.Utf8;
offset = 3;
}
else if (count > 3 && _buffer[0] == 0xff && _buffer[1] == 0xfe && _buffer[2] == 0x0 && _buffer[3] == 0x0)
{
_encoding = TextEncoding.Utf32Le;
offset = 4;
}
else if (count > 3 && _buffer[0] == 0x0 && _buffer[1] == 0x0 && _buffer[2] == 0xfe && _buffer[3] == 0xff)
{
_encoding = TextEncoding.Utf32Be;
offset = 4;
}
else if (count > 1 && _buffer[0] == 0xfe && _buffer[1] == 0xff)
{
_encoding = TextEncoding.Utf16Be;
offset = 2;
}
else if (count > 1 && _buffer[0] == 0xff && _buffer[1] == 0xfe)
{
_encoding = TextEncoding.Utf16Le;
offset = 2;
}
else if (count > 3 && _buffer[0] == 0x84 && _buffer[1] == 0x31 && _buffer[2] == 0x95 && _buffer[3] == 0x33)
{
_encoding = TextEncoding.Gb18030;
offset = 4;
}
if (offset > 0)
{
count -= offset;
Array.Copy(_buffer, offset, _buffer, 0, count);
_decoder = _encoding.GetDecoder();
_confidence = EncodingConfidence.Certain;
}
AppendContentFromBuffer(count);
}
private async Task ReadIntoBufferAsync(CancellationToken cancellationToken)
{
var returned = await _baseStream.ReadAsync(_buffer, 0, BufferSize, cancellationToken).ConfigureAwait(false);
AppendContentFromBuffer(returned);
}
private void ExpandBuffer(long size)
{
if (!_finished && _content.Length == 0) DetectByteOrderMarkAsync(CancellationToken.None).Wait();
while (size + Index > _content.Length && !_finished) ReadIntoBuffer();
}
private void ReadIntoBuffer()
{
var returned = _baseStream.Read(_buffer, 0, BufferSize);
AppendContentFromBuffer(returned);
}
private void AppendContentFromBuffer(int size)
{
_finished = size == 0;
var charLength = _decoder.GetChars(_buffer, 0, size, _chars, 0);
if (_confidence != EncodingConfidence.Certain) _raw.Write(_buffer, 0, size);
_content.Append(_chars, 0, charLength);
}
}
}