From b999d862e2b26f5aeeac5aeff3ed08263ebd53de Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Thu, 29 Aug 2024 10:21:47 -0400 Subject: [PATCH] Add VCF data formats (#6941) * Add vCard with sample * Add sample + remove comment in yml * Add vCard grammar * Add id * Edit aliases * Add vcf to TSV + heuristics * Add test --- .gitmodules | 3 + grammars.yml | 2 + lib/linguist/heuristics.yml | 6 + lib/linguist/languages.yml | 16 +++ samples/TSV/merged1.vcf | 131 ++++++++++++++++++ samples/TSV/z.vcf | 12 ++ samples/vCard/forrest-gump.vcf | 16 +++ samples/vCard/vcard.vcf | 34 +++++ test/test_heuristics.rb | 7 + vendor/README.md | 1 + vendor/grammars/vscode-vcard | 1 + .../git_submodule/vscode-vcard.dep.yml | 21 +++ 12 files changed, 250 insertions(+) create mode 100644 samples/TSV/merged1.vcf create mode 100644 samples/TSV/z.vcf create mode 100644 samples/vCard/forrest-gump.vcf create mode 100644 samples/vCard/vcard.vcf create mode 160000 vendor/grammars/vscode-vcard create mode 100644 vendor/licenses/git_submodule/vscode-vcard.dep.yml diff --git a/.gitmodules b/.gitmodules index 45c4f88a86..751e0602d6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1368,6 +1368,9 @@ [submodule "vendor/grammars/vscode-vba"] path = vendor/grammars/vscode-vba url = https://github.com/serkonda7/vscode-vba.git +[submodule "vendor/grammars/vscode-vcard"] + path = vendor/grammars/vscode-vcard + url = https://github.com/cstrachan88/vscode-vcard.git [submodule "vendor/grammars/vscode-vlang"] path = vendor/grammars/vscode-vlang url = https://github.com/0x9ef/vscode-vlang diff --git a/grammars.yml b/grammars.yml index 8f4e14bf9c..7d327603c8 100644 --- a/grammars.yml +++ b/grammars.yml @@ -1231,6 +1231,8 @@ vendor/grammars/vscode-slice: vendor/grammars/vscode-vba: - source.vba - source.wwb +vendor/grammars/vscode-vcard: +- source.vcard vendor/grammars/vscode-vlang: - source.v vendor/grammars/vscode-wit: diff --git a/lib/linguist/heuristics.yml b/lib/linguist/heuristics.yml index 7cb50e5213..249ab8f8e3 100644 --- a/lib/linguist/heuristics.yml +++ b/lib/linguist/heuristics.yml @@ -829,6 +829,12 @@ disambiguations: - language: Vim Script pattern: '^UseVimball' - language: VBA +- extensions: ['.vcf'] + rules: + - language: TSV + pattern: '\A##fileformat=VCF' + - language: vCard + pattern: '\ABEGIN:VCARD' - extensions: ['.w'] rules: - language: OpenEdge ABL diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index ab3d48bb7b..3e3f98ebc7 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -7143,6 +7143,9 @@ TSV: tm_scope: source.generic-db extensions: - ".tsv" + - ".vcf" + aliases: + - tab-seperated values language_id: 1035892117 TSX: type: programming @@ -8549,6 +8552,19 @@ templ: ace_mode: text tm_scope: source.templ language_id: 795579337 +vCard: + type: data + color: "#ee2647" + extensions: + - ".vcf" + tm_scope: source.vcard + aliases: + - virtual contact file + - electronic business card + ace_mode: properties + codemirror_mode: properties + codemirror_mime_type: text/x-properties + language_id: 851476558 wisp: type: programming ace_mode: clojure diff --git a/samples/TSV/merged1.vcf b/samples/TSV/merged1.vcf new file mode 100644 index 0000000000..02a33037bb --- /dev/null +++ b/samples/TSV/merged1.vcf @@ -0,0 +1,131 @@ +##fileformat=VCFv4.1 +##source=Sniffles +##fileDate=20190902 +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT alns_merged/hs37d5.all.md.bam +1 10001 27885 N . PASS IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=10407;ZMW=4;STD_quant_start=0.500000;STD_quant_stop=78.452215;Kurtosis_quant_start=1.000000;Kurtosis_quant_stop=-1.530914;SVTYPE=INS;SUPTYPE=SR;SVLEN=406;STRANDS=-+;RE=4 GT:DR:DV ./.:.:4 +1 10329 1 CCCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC N . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=10390;ZMW=21;STD_quant_start=0.316228;STD_quant_stop=3.794733;Kurtosis_quant_start=-0.866914;Kurtosis_quant_stop=-1.344369;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-61;STRANDS=+-;RE=21 GT:DR:DV ./.:.:21 +1 10407 2 N CACCCTCACCCTCACCCTCACCCCCACCCCCACCCCCACCCCCACCCCCACCCC . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=10461;ZMW=20;STD_quant_start=0.000000;STD_quant_stop=0.316228;Kurtosis_quant_start=-1.254650;Kurtosis_quant_stop=0.212091;SVTYPE=INS;SUPTYPE=AL;SVLEN=54;STRANDS=+-;RE=20 GT:DR:DV ./.:.:20 +1 10862 3 N CAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGGGCCGGCGCAGGCGCAGAGACACATGCTAGCGCGTCCAGGGGAGGAGGCGTGGCA . PASS IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=10996;ZMW=77;STD_quant_start=0.000000;STD_quant_stop=19.477382;Kurtosis_quant_start=-1.283652;Kurtosis_quant_stop=-2.008307;SVTYPE=INS;SUPTYPE=AL;SVLEN=47;STRANDS=+-;RE=77 GT:DR:DV ./.:.:77 +1 136934 4 N CAAGGGGCTCGGGCTGACCTCTGTCCGCGTGGGAGGGGCCGGTGTGAGGCAAGGGGCTCGGGCTGACCTCTCTCAGTGTGGGAGGGGCCGGTGTGAGGCAAGGGGCTCACGCTGACCTCTGTCTGCGTGGGAGGGGCCGGTGTGAGACAAGGGGCTCGGGCTGACCTCTCTCAGCGTGGGAGGGGCCGGTGTGAGGCAAGGGGCTCGGGCTGACCTCTCTCAGCGTGGGAGGGGCCAGTGTGAGGCAAGGGCTCACACTGACCTCTCTCAGCATGGGAGGGGCCGGTGTGAGA . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=137227;ZMW=14;STD_quant_start=0.000000;STD_quant_stop=2.549510;Kurtosis_quant_start=2.193074;Kurtosis_quant_stop=2.855605;SVTYPE=INS;SUPTYPE=AL;SVLEN=293;STRANDS=+-;RE=14 GT:DR:DV ./.:.:14 +1 533244 5 N CGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACGCGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACA . PASS IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=533338;ZMW=3;STD_quant_start=27.135463;STD_quant_stop=25.980762;Kurtosis_quant_start=-0.000000;Kurtosis_quant_stop=0.000000;SVTYPE=INS;SUPTYPE=AL;SVLEN=94;STRANDS=+-;RE=3 GT:DR:DV ./.:.:3 +1 545927 6 N TGCAGGAGAGGAGATGCCCAGGCCTGGCGGCCGGCGCACGCGGGTTCTCTGTGGCCAGCAGGCGGCGCTGCAGGAGAGGAGATGCCCAGGCCAGGCGGCCGGCGCACGCGGGTTCTCTGTGGCCAGCAGGCGGCGCTGCAGGAGAGGAGATGCCCAGGCCTGGCGGCCGGCGCACGCGGGTTCTCTGTGGCCAGCAGGCGGCGA . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=546131;ZMW=23;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=4.593400;Kurtosis_quant_stop=4.281659;SVTYPE=INS;SUPTYPE=AL;SVLEN=204;STRANDS=+-;RE=23 GT:DR:DV ./.:.:23 +1 605031 7 N TACATGGAGGGGAACAACACACACCAGGGCCTCTCAGGGGGACAGGGGGTAGGAGACCATCAGGACAAACACGTGGATACATGGAGGGGAACAACACACACCAGGGCCTCTCAGGGGGACAGGGGGTAGGAGACCATCAGGACAAACACGTGGATACATGGAGGGGAACAACACACACCAGGGCCTCTCAGGGGGACAGGGGGTAGGAGACCATCAGGACAAACACGTGGG . PASS IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=605262;ZMW=9;STD_quant_start=45.186035;STD_quant_stop=44.628093;Kurtosis_quant_start=2.035202;Kurtosis_quant_stop=1.944981;SVTYPE=INS;SUPTYPE=AL;SVLEN=231;STRANDS=+-;RE=9 GT:DR:DV ./.:.:9 +1 662612 8 N GTGAGACAAGGGGCTCACGCTGACCTCTGTCCACGTGGGAGGGGCCGGG . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=662661;ZMW=4;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=-nan;Kurtosis_quant_stop=-nan;SVTYPE=INS;SUPTYPE=AL;SVLEN=49;STRANDS=+-;RE=4 GT:DR:DV ./.:.:4 +1 664891 9 GCAGGAGCTGGGCCTGGAGAGGCTGCAAAGAA N . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=664923;ZMW=4;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=-nan;Kurtosis_quant_stop=-nan;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;RE=4 GT:DR:DV ./.:.:4 +1 725451 10 GGATGGGATGCGATGGGATGGGATGGGATGGGATGGGATGGGATGGGATGGATTT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=725506;ZMW=3;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=-nan;Kurtosis_quant_stop=-nan;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-55;STRANDS=+-;RE=3 GT:DR:DV ./.:.:3 +1 756259 11 N TCTACACTACCTGCCTGGCCAGCAGATCCACCCTGTCTACACTACCTGCCTGGGCAGTAGTTCCACGCAATCTCCCCTACCTGCCTCTCCAGCAGACCCGCCCTATCTATACTACTTGCCTGTCCAGCAGATCCACTCTATCTACACGACCTGCCTGTCCAGCAGATCCACCCTGTCTACACTACCTGCTTGTCCAGCAGGTCCACCCTGTCTATACTACCTGCCTGGCCAGTAGATCCACACTA . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=1;END=756504;ZMW=19;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=1.759442;Kurtosis_quant_stop=1.789241;SVTYPE=INS;SUPTYPE=AL;SVLEN=245;STRANDS=+-;RE=19 GT:DR:DV ./.:.:19 diff --git a/samples/TSV/z.vcf b/samples/TSV/z.vcf new file mode 100644 index 0000000000..0039244cab --- /dev/null +++ b/samples/TSV/z.vcf @@ -0,0 +1,12 @@ +##fileformat=VCFv4.2 +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b +a 281 >1>9 AGCCGGGGCAGAAAGTTCTTCCTTGAATGTGGTCATCTGCATTTCAGCTCAGGAATCCTGCAAAAGACAG CTGTCTTTTGCAGGATTCCTGTGCTGAAATGCAGATGACCGCATTCAAGGAAGAACTATCTGCCCCGGCT 60.0 . AC=1;AF=1;AN=1;AT=>1>2>3>4>5>6>7>8>9,>1<8>10<6>11<4>12<2>9;NS=1;LV=0 GT 1 diff --git a/samples/vCard/forrest-gump.vcf b/samples/vCard/forrest-gump.vcf new file mode 100644 index 0000000000..71f1eac0b1 --- /dev/null +++ b/samples/vCard/forrest-gump.vcf @@ -0,0 +1,16 @@ +BEGIN:VCARD +VERSION:2.1 +N:Gump;Forrest +FN:Forrest Gump +ORG:Bubba Gump Shrimp Co. +TITLE:Shrimp Man +PHOTO;GIF:http://www.example.com/dir_photos/my_photo.gif +TEL;WORK;VOICE:(111) 555-1212 +TEL;HOME;VOICE:(404) 555-1212 +ADR;WORK:;;100 Waters Edge;Baytown;LA;30314;United States of America +LABEL;WORK;ENCODING=QUOTED-PRINTABLE:100 Waters Edge=0D=0ABaytown, LA 30314=0D=0AUnited States of America +ADR;HOME:;;42 Plantation St.;Baytown;LA;30314;United States of America +LABEL;HOME;ENCODING=QUOTED-PRINTABLE:42 Plantation St.=0D=0ABaytown, LA 30314=0D=0AUnited States of America +EMAIL;PREF;INTERNET:forrestgump@example.com +REV:20080424T195243Z +END:VCARD diff --git a/samples/vCard/vcard.vcf b/samples/vCard/vcard.vcf new file mode 100644 index 0000000000..bab8344616 --- /dev/null +++ b/samples/vCard/vcard.vcf @@ -0,0 +1,34 @@ +BEGIN:VCARD +VERSION:4.0 +ADR;TYPE=work:pobox;apt;street;city;state;zipcode;country +ANNIVERSARY:19960415 +BDAY:--0203 +CALADRURI:http://example.com/calendar/jdoe +CALURI;MEDIATYPE=text/calendar:ftp://ftp.example.com/calA.ics +CLIENTPIDMAP:1;urn:uuid:3df403f4-5924-4bb7-b077-3c711d9eb34b +EMAIL;TYPE=work:jqpublic@xyz.example.com +FBURL;MEDIATYPE=text/calendar:ftp://example.com/busy/project-a.ifb +FN:J. Doe +GENDER:M;Fellow +GEO:geo:37.386013\,-122.082932 +IMPP;PREF=1:xmpp:alice@example.com +KEY:http://www.example.com/keys/jdoe.cer +KIND:individual +LANG;PREF=1:fr +LOGO:http://www.example.com/pub/logos/abccorp.jpg +MEMBER:urn:uuid:03a0e51f-d1aa-4385-8a53-e29025acd8af +N:Stevenson;John;Philip,Paul;Dr.;Jr.,M.D.,A.C.P. +NICKNAME;TYPE=work:Boss +NOTE:This fax number is operational 0800 to 1715 EST\, Mon-Fri +ORG:ABC\, Inc.;North American Division;Marketing +PHOTO:http://www.example.com/pub/photos/jqpublic.gif +RELATED;TYPE=friend:urn:uuid:f81d4fae-7dec-11d0-a765-00a0c91e6bf6 +REV:19951031T222710Z +ROLE:Project Leader +SOUND:CID:JOHNQPUBLIC.part8.19960229T080000.xyzMail@example.com +SOURCE:ldap://ldap.example.com/cn=Babs%20Jensen\,%20o=Babsco\,%20c=US +TEL;VALUE=uri;TYPE=home:tel:+33-01-23-45-67 +TITLE:Research Scientist +TZ;VALUE=utc-offset:-0500 +XML: +END:VCARD diff --git a/test/test_heuristics.rb b/test/test_heuristics.rb index 36b3f90b1e..c3216ce0cd 100755 --- a/test/test_heuristics.rb +++ b/test/test_heuristics.rb @@ -1064,6 +1064,13 @@ def test_vba_by_heuristics }) end + def test_vcf_by_heuristics + assert_heuristics({ + "TSV" => all_fixtures("TSV", "*.vcf"), + "vCard" => all_fixtures("vCard", "*.vcf") + }) + end + def test_w_by_heuristics assert_heuristics({ "CWeb" => all_fixtures("CWeb", "*.w"), diff --git a/vendor/README.md b/vendor/README.md index e1f346c0ba..f3c579ff7c 100644 --- a/vendor/README.md +++ b/vendor/README.md @@ -678,5 +678,6 @@ This is a list of grammars that Linguist selects to provide syntax highlighting - **robots.txt:** [Nixinova/NovaGrammars](https://github.com/Nixinova/NovaGrammars) - **sed:** [Alhadis/language-sed](https://github.com/Alhadis/language-sed) - **templ:** [templ-go/templ-vscode](https://github.com/templ-go/templ-vscode) +- **vCard:** [cstrachan88/vscode-vcard](https://github.com/cstrachan88/vscode-vcard) - **wisp:** [atom/language-clojure](https://github.com/atom/language-clojure) - **xBase:** [hernad/atom-language-harbour](https://github.com/hernad/atom-language-harbour) diff --git a/vendor/grammars/vscode-vcard b/vendor/grammars/vscode-vcard new file mode 160000 index 0000000000..875ceb468d --- /dev/null +++ b/vendor/grammars/vscode-vcard @@ -0,0 +1 @@ +Subproject commit 875ceb468d7042685faa3a8f0db3bef3a3a1aaac diff --git a/vendor/licenses/git_submodule/vscode-vcard.dep.yml b/vendor/licenses/git_submodule/vscode-vcard.dep.yml new file mode 100644 index 0000000000..d7060fd389 --- /dev/null +++ b/vendor/licenses/git_submodule/vscode-vcard.dep.yml @@ -0,0 +1,21 @@ +--- +name: vscode-vcard +version: 875ceb468d7042685faa3a8f0db3bef3a3a1aaac +type: git_submodule +homepage: https://github.com/cstrachan88/vscode-vcard.git +license: mit +licenses: +- sources: LICENSE.txt + text: |- + The MIT License + + Copyright 2019 Courtney Strachan + + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- sources: README.md + text: This project is licensed under the [MIT License](LICENSE.txt). +notices: []