@@ -301,24 +301,20 @@ public List<Variant> normalize(List<Variant> batch, boolean reuse) throws NonSta
301301 normalizedVariants .add (variant );
302302 continue ;
303303 }
304- String reference = variant .getReference (); //Save original values, as they can be changed
304+ //Save original values, as they can be changed
305+ String reference = variant .getReference ();
305306 String alternate = variant .getAlternate ();
306307 Integer start = variant .getStart ();
307308 Integer end = variant .getEnd ();
308309 String chromosome = variant .getChromosome ();
309- StructuralVariation sv = variant .getSv ();
310310
311311 if (variant .getStudies () == null || variant .getStudies ().isEmpty ()) {
312- List <VariantKeyFields > keyFieldsList ;
313- if (isSymbolic (variant )) {
314- keyFieldsList = normalizeSymbolic (start , end , reference , alternate , sv );
315- } else {
316- keyFieldsList = normalize (chromosome , start , reference , alternate );
317- }
312+ List <VariantKeyFields > keyFieldsList = normalizeAlleles (variant );
313+
318314 // Iterate keyFields sorting by position, so the generated variants are ordered. Do not modify original order!
319315 for (VariantKeyFields keyFields : sortByPosition (keyFieldsList )) {
320316 OriginalCall call = new OriginalCall (variant .toString (), keyFields .getNumAllele ());
321- Variant normalizedVariant = newVariant (variant , keyFields , sv );
317+ Variant normalizedVariant = newVariant (variant , keyFields );
322318 if (keyFields .getPhaseSet () != null ) {
323319 StudyEntry studyEntry = new StudyEntry ();
324320 studyEntry .setSamples (
@@ -332,25 +328,16 @@ public List<Variant> normalize(List<Variant> batch, boolean reuse) throws NonSta
332328 normalizedVariants .add (normalizedVariant );
333329 }
334330 } else {
335- for (StudyEntry entry : variant .getStudies ()) {
336- List <String > originalAlternates = new ArrayList <>(1 + entry .getSecondaryAlternates ().size ());
337- List <String > alternates = new ArrayList <>(1 + entry .getSecondaryAlternates ().size ());
338- alternates .add (alternate );
339- originalAlternates .add (alternate );
340- for (String secondaryAlternatesAllele : entry .getSecondaryAlternatesAlleles ()) {
341- alternates .add (secondaryAlternatesAllele );
342- originalAlternates .add (secondaryAlternatesAllele );
343- }
331+ if (variant .getStudies ().size () != 1 ) {
332+ throw new IllegalStateException ("Only one study per variant is supported when normalizing variants. Found "
333+ + variant .getStudies ().size () + " studies. Variant: " + variant );
334+ } else {
335+ StudyEntry entry = variant .getStudies ().get (0 );
336+ List <String > alternates = getAllAlternates (variant );
344337
345338 // FIXME: assumes there wont be multinucleotide positions with CNVs and short variants mixed
346- List <VariantKeyFields > keyFieldsList ;
347- List <VariantKeyFields > originalKeyFieldsList ;
348- if (isSymbolic (variant )) {
349- keyFieldsList = normalizeSymbolic (start , end , reference , alternates , sv );
350- } else {
351- keyFieldsList = normalize (chromosome , start , reference , alternates );
352- }
353- originalKeyFieldsList = keyFieldsList
339+ List <VariantKeyFields > keyFieldsList = normalizeAlleles (variant );
340+ List <VariantKeyFields > originalKeyFieldsList = keyFieldsList
354341 .stream ()
355342 .filter (k -> !k .isReferenceBlock ())
356343 .map (k -> k .originalKeyFields )
@@ -373,8 +360,8 @@ public List<Variant> normalize(List<Variant> batch, boolean reuse) throws NonSta
373360 originalCall = entry .getFiles ().get (0 ).getCall ().getVariantId ();
374361 } else {
375362 StringBuilder sb = new StringBuilder (variant .toString ());
376- for (int i = 1 ; i < originalAlternates .size (); i ++) {
377- sb .append ("," ).append (originalAlternates .get (i ));
363+ for (int i = 1 ; i < alternates .size (); i ++) {
364+ sb .append ("," ).append (alternates .get (i ));
378365 }
379366 originalCall = sb .toString ();
380367 }
@@ -400,6 +387,9 @@ public List<Variant> normalize(List<Variant> batch, boolean reuse) throws NonSta
400387 variant .setEnd (keyFields .getEnd ());
401388 variant .setReference (keyFields .getReference ());
402389 variant .setAlternate (keyFields .getAlternate ());
390+ if (keyFields .getSv () != null ) {
391+ variant .setSv (keyFields .getSv ());
392+ }
403393 variant .reset ();
404394 // Variant is being reused, must ensure the SV field si appropriately created
405395// if (isSymbolic(variant)) {
@@ -415,7 +405,7 @@ public List<Variant> normalize(List<Variant> batch, boolean reuse) throws NonSta
415405 }
416406 samples = entry .getSamples ();
417407 } else {
418- normalizedVariant = newVariant (variant , keyFields , sv );
408+ normalizedVariant = newVariant (variant , keyFields );
419409
420410 normalizedEntry = new StudyEntry ();
421411 normalizedEntry .setStudyId (entry .getStudyId ());
@@ -598,17 +588,54 @@ private Collection<VariantKeyFields> sortByPosition(List<VariantKeyFields> keyFi
598588// }
599589// }
600590
591+ protected List <VariantKeyFields > normalizeAlleles (Variant variant ) {
592+ List <String > alternates = getAllAlternates (variant );
593+
594+ List <VariantKeyFields > keyFieldsList ;
595+ if (isSymbolic (variant )) {
596+ keyFieldsList = normalizeSymbolic (variant .getStart (), variant .getEnd (), variant .getReference (), alternates , variant .getSv ());
597+ } else {
598+ keyFieldsList = normalize (variant .getChromosome (), variant .getStart (), variant .getReference (), alternates , variant .getSv ());
599+ }
600+ return keyFieldsList ;
601+ }
602+
603+ private static List <String > getAllAlternates (Variant variant ) {
604+ List <String > alternates ;
605+ if (variant .getStudies () != null && !variant .getStudies ().isEmpty ()) {
606+ StudyEntry entry = variant .getStudies ().get (0 );
607+ String alternate = variant .getAlternate ();
608+ alternates = new ArrayList <>(1 + entry .getSecondaryAlternates ().size ());
609+ alternates .add (alternate );
610+ for (AlternateCoordinate secondaryAlternate : entry .getSecondaryAlternates ()) {
611+ if (secondaryAlternate .getStart () != null && !secondaryAlternate .getStart ().equals (variant .getStart ())) {
612+ throw new IllegalStateException ("Unable to normalize variant where secondary alternates do not start at the same position. "
613+ + "Variant: " + variant + " , secondaryAlternate: " + secondaryAlternate );
614+ }
615+ if (secondaryAlternate .getEnd () != null && !secondaryAlternate .getEnd ().equals (variant .getEnd ())) {
616+ throw new IllegalStateException ("Unable to normalize variant where secondary alternates do not end at the same position. "
617+ + "Variant: " + variant + " (end=" + variant .getEnd () + ") , secondaryAlternate: " + secondaryAlternate );
618+ }
619+ alternates .add (secondaryAlternate .getAlternate ());
620+ }
621+ } else {
622+ alternates = Collections .singletonList (variant .getAlternate ());
623+ }
624+ return Collections .unmodifiableList (alternates );
625+ }
626+
627+ @ Deprecated // Test purposes only
601628 public List <VariantKeyFields > normalizeSymbolic (Integer start , Integer end , String reference , String alternate , StructuralVariation sv ) {
602629 return normalizeSymbolic (start , end , reference , Collections .singletonList (alternate ), sv );
603630 }
604631
605- @ Deprecated
632+ @ Deprecated // Test purposes only
606633 public List <VariantKeyFields > normalizeSymbolic (final Integer start , final Integer end , final String reference ,
607634 final List <String > alternates ) {
608635 return normalizeSymbolic (start , end , reference , alternates , null );
609636 }
610637
611- public List <VariantKeyFields > normalizeSymbolic (final Integer start , final Integer end , final String reference ,
638+ protected List <VariantKeyFields > normalizeSymbolic (final Integer start , final Integer end , final String reference ,
612639 final List <String > alternates , StructuralVariation sv ) {
613640 List <VariantKeyFields > list = new ArrayList <>(alternates .size ());
614641
@@ -624,12 +651,56 @@ public List<VariantKeyFields> normalizeSymbolic(final Integer start, final Integ
624651 Integer copyNumber = sv == null ? null : sv .getCopyNumber ();
625652 keyFields = normalizeSymbolic (start , end , reference , alternate , alternates , copyNumber , numAllelesIdx );
626653 }
654+
655+ if (alternate .equals (VariantBuilder .DUP_TANDEM_ALT )) {
656+ if (keyFields .getSv () == null ) {
657+ keyFields .setSv (new StructuralVariation ());
658+ }
659+ keyFields .getSv ().setType (StructuralVariantType .TANDEM_DUPLICATION );
660+ }
661+
662+ normalizeSvField (sv , keyFields );
663+
627664 list .add (keyFields );
628665 }
629666
630667 return list ;
631668 }
632669
670+ private static void normalizeSvField (StructuralVariation sv , VariantKeyFields keyFields ) {
671+ if (sv != null ) {
672+ StructuralVariation normalizedSv = keyFields .getSv ();
673+ if (normalizedSv == null ) {
674+ normalizedSv = new StructuralVariation ();
675+ }
676+ // CI positions may change during the normalization. Update them.
677+ normalizedSv .setCiStartLeft (sv .getCiStartLeft ());
678+ normalizedSv .setCiStartRight (sv .getCiStartRight ());
679+
680+ // Structural variants that affect a single point (INSERTIONS or Breakends) should not have CIEND.
681+ // At this point, we're removing the CIEND from the normalized variant.
682+ // Do not remove the value from the INFO field (if any).
683+ // The END is the same as the start (which, in base-1 means that "end == start -1" , so "end < start")
684+ if (keyFields .getEnd () < keyFields .getStart ()) {
685+ normalizedSv .setCiEndLeft (null );
686+ normalizedSv .setCiEndRight (null );
687+ } else {
688+ normalizedSv .setCiEndLeft (sv .getCiEndLeft ());
689+ normalizedSv .setCiEndRight (sv .getCiEndRight ());
690+ }
691+ normalizedSv .setLeftSvInsSeq (sv .getLeftSvInsSeq ());
692+ normalizedSv .setRightSvInsSeq (sv .getRightSvInsSeq ());
693+
694+ if (keyFields .getSv () == null ) {
695+ if (normalizedSv .getCiStartLeft () != null || normalizedSv .getCiStartRight () != null
696+ || normalizedSv .getCiEndLeft () != null || normalizedSv .getCiEndRight () != null
697+ || normalizedSv .getLeftSvInsSeq () != null || normalizedSv .getRightSvInsSeq () != null ) {
698+ keyFields .setSv (normalizedSv );
699+ }
700+ }
701+ }
702+ }
703+
633704 private boolean isNonRef (String alternate ) {
634705 return alternate .equals (Allele .NO_CALL_STRING )
635706 || alternate .equals (VariantBuilder .NON_REF_ALT )
@@ -695,7 +766,7 @@ private static VariantKeyFields normalizeMateBreakend(
695766 }
696767
697768 VariantKeyFields keyFields = new VariantKeyFields (newStart , newStart - 1 , numAllelesIdx , newReference , newAlternate );
698- keyFields .getSv (). setBreakend (breakend );
769+ keyFields .setBreakend (breakend );
699770 return keyFields ;
700771 }
701772
@@ -718,29 +789,37 @@ private VariantKeyFields normalizeSymbolic(
718789 + "contain 0 or 1 nt, but no more. Please, check." );
719790 }
720791
721- Integer cn = VariantBuilder .getCopyNumberFromAlternate (alternate );
722792// if (cn != null) {
723793// // Alternate with the form <CNxxx>, being xxx the number of copies, must be normalized into "<CNV>"
724794// newAlternate = "<CNV>";
725795// }
726796 String newAlternate ;
797+ Integer newCn ;
727798 if (alternate .equals ("<CNV>" ) && copyNumber != null ) {
728799 // Alternate must be of the form <CNxxx>, being xxx the number of copies
729800 newAlternate = "<CN" + copyNumber + ">" ;
801+ newCn = copyNumber ;
730802 } else {
731803 newAlternate = alternate ;
804+ newCn = VariantBuilder .getCopyNumberFromAlternate (alternate );
732805 }
806+
733807 return new VariantKeyFields (newStart , end , numAllelesIdx , newReference , newAlternate ,
734- null , cn , false );
808+ null , newCn , false );
735809 }
736810
737811
812+ @ Deprecated // Test purposes only
738813 public List <VariantKeyFields > normalize (String chromosome , int position , String reference , String alternate ) {
739- return normalize (chromosome , position , reference , Collections .singletonList (alternate ));
814+ return normalize (chromosome , position , reference , Collections .singletonList (alternate ), null );
815+ }
816+
817+ @ Deprecated // Test purposes only
818+ public List <VariantKeyFields > normalize (String chromosome , int position , String reference , List <String > alternates ) {
819+ return normalize (chromosome , position , reference , alternates , null );
740820 }
741821
742- public List <VariantKeyFields > normalize (String chromosome , int position , String reference , List <String > alternates )
743- {
822+ protected List <VariantKeyFields > normalize (String chromosome , int position , String reference , List <String > alternates , StructuralVariation sv ) {
744823
745824 List <VariantKeyFields > list = new ArrayList <>(alternates .size ());
746825 int numAllelesIdx = 0 ; // This index is necessary for getting the samples where the mutated allele is present
@@ -784,6 +863,8 @@ public List<VariantKeyFields> normalize(String chromosome, int position, String
784863 }
785864 }
786865
866+ normalizeSvField (sv , keyFields );
867+
787868 if (keyFields != null ) {
788869
789870 // To deal with cases such as A>GT
@@ -1380,34 +1461,24 @@ private int[] getGenotypesReorderingMap(int numAllele, int[] alleleMap) {
13801461 }
13811462 }
13821463
1383-
1384- private Variant newVariant (Variant variant , VariantKeyFields keyFields , StructuralVariation sv ) {
1464+ private Variant newVariant (Variant variant , VariantKeyFields keyFields ) {
13851465 Variant normalizedVariant = new Variant (variant .getChromosome (), keyFields .getStart (), keyFields .getEnd (), keyFields .getReference (), keyFields .getAlternate ())
13861466 .setId (variant .getId ())
13871467 .setNames (variant .getNames ())
13881468 .setStrand (variant .getStrand ());
13891469
1390- if (sv != null ) {
1391- if (normalizedVariant .getSv () != null ) {
1392- // CI positions may change during the normalization. Update them.
1393- normalizedVariant .getSv ().setCiStartLeft (sv .getCiStartLeft ());
1394- normalizedVariant .getSv ().setCiStartRight (sv .getCiStartRight ());
1395- normalizedVariant .getSv ().setCiEndLeft (sv .getCiEndLeft ());
1396- normalizedVariant .getSv ().setCiEndRight (sv .getCiEndRight ());
1397- normalizedVariant .getSv ().setLeftSvInsSeq (sv .getLeftSvInsSeq ());
1398- normalizedVariant .getSv ().setRightSvInsSeq (sv .getRightSvInsSeq ());
1399-
1400- // Variant will never have CopyNumber, because the Alternate is normalized from <CNxx> to <CNV>
1401- normalizedVariant .getSv ().setCopyNumber (keyFields .getCopyNumber ());
1402- VariantType cnvSubtype = VariantBuilder .getCopyNumberSubtype (keyFields .getCopyNumber ());
1403- if (cnvSubtype != null ) {
1404- normalizedVariant .setType (cnvSubtype );
1405- }
1406- }
1470+ if (keyFields .getSv () != null ) {
1471+ normalizedVariant .setSv (keyFields .getSv ());
14071472 }
1408-
14091473 normalizedVariant .setAnnotation (variant .getAnnotation ());
14101474
1475+ if (keyFields .getCopyNumber () != null ) {
1476+ VariantType cnvSubtype = VariantBuilder .getCopyNumberSubtype (keyFields .getCopyNumber ());
1477+ if (cnvSubtype != null ) {
1478+ normalizedVariant .setType (cnvSubtype );
1479+ }
1480+ }
1481+
14111482 return normalizedVariant ;
14121483// normalizedVariant.setAnnotation(variant.getAnnotation());
14131484// if (isSymbolic(variant)) {
@@ -1527,8 +1598,10 @@ public VariantKeyFields(int start, int end, int numAllele, String reference, Str
15271598 this .alternate = alternate ;
15281599 this .originalKeyFields = originalKeyFields == null ? this : originalKeyFields ;
15291600 this .referenceBlock = referenceBlock ;
1530- this .sv = new StructuralVariation ();
1531- setCopyNumber (copyNumber );
1601+ this .sv = null ;
1602+ if (copyNumber != null ) {
1603+ setCopyNumber (copyNumber );
1604+ }
15321605 }
15331606
15341607
@@ -1604,7 +1677,28 @@ public Integer getCopyNumber() {
16041677 }
16051678
16061679 public VariantKeyFields setCopyNumber (Integer copyNumber ) {
1607- sv .setCopyNumber (copyNumber );
1680+ if (sv == null ) {
1681+ if (copyNumber != null ) {
1682+ sv = new StructuralVariation ();
1683+ sv .setCopyNumber (copyNumber );
1684+ sv .setType (VariantBuilder .getCNVSubtype (copyNumber ));
1685+ }
1686+ } else {
1687+ sv .setCopyNumber (copyNumber );
1688+ sv .setType (VariantBuilder .getCNVSubtype (copyNumber ));
1689+ }
1690+ return this ;
1691+ }
1692+
1693+ public VariantKeyFields setBreakend (Breakend breakend ) {
1694+ if (sv == null ) {
1695+ if (breakend != null ) {
1696+ sv = new StructuralVariation ();
1697+ sv .setBreakend (breakend );
1698+ }
1699+ } else {
1700+ sv .setBreakend (breakend );
1701+ }
16081702 return this ;
16091703 }
16101704
0 commit comments