Skip to content

Commit

Permalink
Standardise string encoding with constant variable
Browse files Browse the repository at this point in the history
Addresses #1, with #6 in mind
  • Loading branch information
jakewilliami committed Sep 1, 2022
1 parent e38e925 commit 1c1ba4b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 21 deletions.
4 changes: 2 additions & 2 deletions src/HiddenFiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ export ishidden
# https://github.com/osquery/osquery/blob/598983db97459f858e7a9cc5c731409ffc089b48/osquery/tables/system/darwin/extended_attributes.cpp#L111-L144
# https://github.com/objective-see/ProcInfo/blob/ec51090fcf741a9e045dd3e5119cb5cc8750efd3/procInfo/Binary.m#L121-L172
# NOTE: this function will fail if you give it f as "/"
function _k_mditem_content_type_tree(f::AbstractString, str_encoding::Unsigned = K_CFSTRING_ENCODING_MACROMAN)
function _k_mditem_content_type_tree(f::AbstractString, str_encoding::Unsigned = CF_STRING_ENCODING)
cfstr = _cfstring_create_with_cstring(f, str_encoding)
mditem = _mditem_create(cfstr)
mdattrs = _mditem_copy_attribute(mditem, K_MDITEM_CONTENT_TYPE_TREE)
Expand All @@ -81,7 +81,7 @@ export ishidden
for i in 0:(cfarr_len - 1)
attr = _cfarray_get_value_at_index(mdattrs, i)
if attr != C_NULL #&& !iszero(_cfstring_get_length(attr))
push!(content_types, _string_from_cf_string(attr))
push!(content_types, _string_from_cf_string(attr, str_encoding))
end
end
return content_types
Expand Down
40 changes: 21 additions & 19 deletions src/utils/darwin.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
# https://opensource.apple.com/source/CF/CF-635/CFString.h.auto.html
# https://developer.apple.com/documentation/corefoundation/cfstringbuiltinencodings
const K_CFSTRING_ENCODING_MACROMAN = 0x0
const K_CFSTRING_ENCODING_WINDOWSLATIN1 = 0x0500 # ANSI codepage 1252
const K_CFSTRING_ENCODING_ISOLATIN1 = 0x0201 # ISO 8859-1
const K_CFSTRING_ENCODING_NEXTSTEPLATIN = 0x0B01 # NextStep encoding
const K_CFSTRING_ENCODING_ASCII = 0x0600 # 0..127 (in creating CFString, values greater than 0x7F are treated as corresponding Unicode value)
const K_CFSTRING_ENCODING_UNICODE = 0x0100 # kTextEncodingUnicodeDefault + kTextEncodingDefaultFormat (aka kUnicode16BitFormat)
const K_CFSTRING_ENCODING_UTF8 = 0x08000100 # kTextEncodingUnicodeDefault + kUnicodeUTF8Format
const K_CFSTRING_ENCODING_NONLOSSYASCII = 0x0BFF # 7bit Unicode variants used by Cocoa & Java
const K_CFSTRING_ENCODING_UTF16 = 0x0100 # kTextEncodingUnicodeDefault + kUnicodeUTF16Format (alias of kCFStringEncodingUnicode)
const K_CFSTRING_ENCODING_UTF16BE = 0x10000100 # kTextEncodingUnicodeDefault + kUnicodeUTF16BEFormat
const K_CFSTRING_ENCODING_UTF16LE = 0x14000100 # kTextEncodingUnicodeDefault + kUnicodeUTF16LEFormat
const K_CFSTRING_ENCODING_UTF32 = 0x0c000100 # kTextEncodingUnicodeDefault + kUnicodeUTF32Format
const K_CFSTRING_ENCODING_UTF32BE = 0x18000100 # kTextEncodingUnicodeDefault + kUnicodeUTF32BEFormat
const K_CFSTRING_ENCODING_UTF32LE = 0x1c000100 # kTextEncodingUnicodeDefault + kUnicodeUTF32LEFormat
# https://opensource.apple.com/source/CF/CF-368/String.subproj/CFStringUtilities.c.auto.html
const K_CF_STRING_ENCODING_UTF8 = UInt32(65001)
const K_CF_STRING_ENCODING_MAC_ROMAN = 0x0
const K_CF_STRING_ENCODING_WINDOWS_LATIN_1 = 0x0500 # ANSI codepage 1252
const K_CF_STRING_ENCODING_ISO_LATIN_1 = 0x0201 # ISO 8859-1
const K_CF_STRING_ENCODING_NEXT_STEP_LATIN = 0x0B01 # NextStep encoding
const K_CF_STRING_ENCODING_ASCII = 0x0600 # 0..127 (in creating CFString, values greater than 0x7F are treated as corresponding Unicode value)
const K_CF_STRING_ENCODING_UNICODE = 0x0100 # kTextEncodingUnicodeDefault + kTextEncodingDefaultFormat (aka kUnicode16BitFormat)
const K_CF_STRING_ENCODING_UTF8 = 0x08000100 # kTextEncodingUnicodeDefault + kUnicodeUTF8Format
const K_CF_STRING_ENCODING_NON_LOSSY_ASCII = 0x0BFF # 7bit Unicode variants used by Cocoa & Java
const K_CF_STRING_ENCODING_UTF16 = 0x0100 # kTextEncodingUnicodeDefault + kUnicodeUTF16Format (alias of kCFStringEncodingUnicode)
const K_CF_STRING_ENCODING_UTF16BE = 0x10000100 # kTextEncodingUnicodeDefault + kUnicodeUTF16BEFormat
const K_CF_STRING_ENCODING_UTF16LE = 0x14000100 # kTextEncodingUnicodeDefault + kUnicodeUTF16LEFormat
const K_CF_STRING_ENCODING_UTF32 = 0x0c000100 # kTextEncodingUnicodeDefault + kUnicodeUTF32Format
const K_CF_STRING_ENCODING_UTF32BE = 0x18000100 # kTextEncodingUnicodeDefault + kUnicodeUTF32BEFormat
const K_CF_STRING_ENCODING_UTF32LE = 0x1c000100 # kTextEncodingUnicodeDefault + kUnicodeUTF32LEFormat
const K_CF_STRING_ENCODING_UTF8 = 0x08000100 # https://opensource.apple.com/source/CF/CF-368/String.subproj/CFStringUtilities.c.auto.html

# This will be out main/default string encoding
const CF_STRING_ENCODING = K_CF_STRING_ENCODING_MAC_ROMAN # K_CF_STRING_ENCODING_UTF8 or UTF16 doesn't seem to work

# https://developer.apple.com/documentation/corefoundation/1542942-cfstringcreatewithcstring
function _cfstring_create_with_cstring(s::AbstractString, encoding::Unsigned = K_CFSTRING_ENCODING_MACROMAN)
function _cfstring_create_with_cstring(s::AbstractString, encoding::Unsigned = CF_STRING_ENCODING)
cfstr = ccall(:CFStringCreateWithCString, Cstring,
(Ptr{Cvoid}, Cstring, UInt32),
C_NULL, s, encoding)
Expand Down Expand Up @@ -56,7 +58,7 @@ function _cfstring_get_length(cfstr::Cstring)
end

# https://developer.apple.com/documentation/corefoundation/1542143-cfstringgetmaximumsizeforencodin
function _cfstring_get_maximum_size_for_encoding(strlen::T, encoding::Unsigned = K_CFSTRING_ENCODING_MACROMAN) where {T <: Integer}
function _cfstring_get_maximum_size_for_encoding(strlen::T, encoding::Unsigned = CF_STRING_ENCODING) where {T <: Integer}
return ccall(:CFStringGetMaximumSizeForEncoding, Int32, (Int32, UInt32), strlen, encoding)
end

Expand All @@ -67,7 +69,7 @@ end

# https://github.com/vovkasm/input-source-switcher/blob/c5bab3de716db5e3dae3703ed3b72f2bf1cd51d3/utils.cpp#L9-L18
# https://www.tabnine.com/code/java/methods/org.eclipse.swt.internal.webkit.WebKit_win32/CFStringGetCharactersPtr
function _string_from_cf_string(cfstr::Cstring, encoding::Unsigned = K_CFSTRING_ENCODING_MACROMAN)
function _string_from_cf_string(cfstr::Cstring, encoding::Unsigned = CF_STRING_ENCODING)
strlen = _cfstring_get_length(cfstr)
maxsz = _cfstring_get_maximum_size_for_encoding(strlen, encoding)
cfio = IOBuffer()
Expand Down

0 comments on commit 1c1ba4b

Please sign in to comment.