@@ -339,8 +339,15 @@ trait SeqLike[+A, +Repr] extends IterableLike[A, Repr] with GenSeqLike[A, Repr]
339339 * match the elements of sequence `that`, or `-1` of no such subsequence exists.
340340 */
341341 def indexOfSlice [B >: A ](that : GenSeq [B ], from : Int ): Int =
342- if (this .hasDefiniteSize && that.hasDefiniteSize)
343- SeqLike .indexOf(thisCollection, 0 , length, that.seq, 0 , that.length, from)
342+ if (this .hasDefiniteSize && that.hasDefiniteSize) {
343+ val l = length
344+ val tl = that.length
345+ val clippedFrom = math.max(0 , from)
346+ if (from > l) - 1
347+ else if (tl < 1 ) clippedFrom
348+ else if (l < tl) - 1
349+ else SeqLike .kmpSearch(thisCollection, clippedFrom, l, that.seq, 0 , tl, true )
350+ }
344351 else {
345352 var i = from
346353 var s : Seq [A ] = thisCollection drop i
@@ -374,8 +381,16 @@ trait SeqLike[+A, +Repr] extends IterableLike[A, Repr] with GenSeqLike[A, Repr]
374381 * @return the last index `<= end` such that the elements of this $coll starting at this index
375382 * match the elements of sequence `that`, or `-1` of no such subsequence exists.
376383 */
377- def lastIndexOfSlice [B >: A ](that : GenSeq [B ], end : Int ): Int =
378- SeqLike .lastIndexOf(thisCollection, 0 , length, that.seq, 0 , that.length, end)
384+ def lastIndexOfSlice [B >: A ](that : GenSeq [B ], end : Int ): Int = {
385+ val l = length
386+ val tl = that.length
387+ val clippedL = math.min(l- tl, end)
388+
389+ if (end < 0 ) - 1
390+ else if (tl < 1 ) clippedL
391+ else if (l < tl) - 1
392+ else SeqLike .kmpSearch(thisCollection, 0 , clippedL+ tl, that.seq, 0 , tl, false )
393+ }
379394
380395 @ bridge
381396 def lastIndexOfSlice [B >: A ](that : Seq [B ], end : Int ): Int = lastIndexOfSlice(that : GenSeq [B ], end)
@@ -693,58 +708,167 @@ trait SeqLike[+A, +Repr] extends IterableLike[A, Repr] with GenSeqLike[A, Repr]
693708/** The companion object for trait `SeqLike`.
694709 */
695710object SeqLike {
696- /** A KMP implementation, based on the undoubtedly reliable wikipedia entry.
711+ // KMP search utilities
712+
713+ /** Make sure a target sequence has fast, correctly-ordered indexing for KMP.
697714 *
698- * @author paulp
699- * @since 2.8
715+ * @author Rex Kerr
716+ * @since 2.10
717+ * @param W The target sequence
718+ * @param n0 The first element in the target sequence that we should use
719+ * @param n1 The far end of the target sequence that we should use (exclusive)
720+ * @return Target packed in an IndexedSeq (taken from iterator unless W already is an IndexedSeq)
700721 */
701- private def KMP [B ](S : Seq [B ], W : Seq [B ]): Option [Int ] = {
702- // trivial cases
703- if (W .isEmpty) return Some (0 )
704- else if (W drop 1 isEmpty) return (S indexOf W (0 )) match {
705- case - 1 => None
706- case x => Some (x)
707- }
708-
709- val T : Array [Int ] = {
710- val arr = new Array [Int ](W .length)
711- var pos = 2
712- var cnd = 0
713- arr(0 ) = - 1
714- arr(1 ) = 0
715- while (pos < W .length) {
716- if (W (pos - 1 ) == W (cnd)) {
717- arr(pos) = cnd + 1
718- pos += 1
719- cnd += 1
720- }
721- else if (cnd > 0 ) {
722- cnd = arr(cnd)
723- }
724- else {
725- arr(pos) = 0
726- pos += 1
727- }
722+ private def kmpOptimizeWord [B ](W : Seq [B ], n0 : Int , n1 : Int , forward : Boolean ) = W match {
723+ case iso : IndexedSeq [_] =>
724+ // Already optimized for indexing--use original (or custom view of original)
725+ if (forward && n0== 0 && n1== W .length) iso.asInstanceOf [IndexedSeq [B ]]
726+ else if (forward) new IndexedSeq [B ] {
727+ val length = n1 - n0
728+ def apply (x : Int ) = iso(n0 + x).asInstanceOf [B ]
728729 }
729- arr
730- }
730+ else new IndexedSeq [B ] {
731+ def length = n1 - n0
732+ def apply (x : Int ) = iso(n1 - 1 - x).asInstanceOf [B ]
733+ }
734+ case _ =>
735+ // W is probably bad at indexing. Pack in array (in correct orientation)
736+ // Would be marginally faster to special-case each direction
737+ new IndexedSeq [B ] {
738+ private [this ] val Warr = new Array [AnyRef ](n1- n0)
739+ private [this ] val delta = if (forward) 1 else - 1
740+ private [this ] val done = if (forward) n1- n0 else - 1
741+ val wit = W .iterator.drop(n0)
742+ var i = if (forward) 0 else (n1- n0- 1 )
743+ while (i != done) {
744+ Warr (i) = wit.next.asInstanceOf [AnyRef ]
745+ i += delta
746+ }
731747
732- var m, i = 0
733- def mi = m + i
748+ val length = n1 - n0
749+ def apply (x : Int ) = Warr (x).asInstanceOf [B ]
750+ }
751+ }
734752
735- while (mi < S .length) {
736- if (W (i) == S (mi)) {
737- i += 1
738- if (i == W .length)
739- return Some (m)
753+ /** Make a jump table for KMP search.
754+ *
755+ * @author paulp, Rex Kerr
756+ * @since 2.10
757+ * @param Wopt The target sequence, as at least an IndexedSeq
758+ * @param wlen Just in case we're only IndexedSeq and not IndexedSeqOptimized
759+ * @return KMP jump table for target sequence
760+ */
761+ private def kmpJumpTable [B ](Wopt : IndexedSeq [B ], wlen : Int ) = {
762+ val arr = new Array [Int ](wlen)
763+ var pos = 2
764+ var cnd = 0
765+ arr(0 ) = - 1
766+ arr(1 ) = 0
767+ while (pos < wlen) {
768+ if (Wopt (pos- 1 ) == Wopt (cnd)) {
769+ arr(pos) = cnd + 1
770+ pos += 1
771+ cnd += 1
772+ }
773+ else if (cnd > 0 ) {
774+ cnd = arr(cnd)
740775 }
741776 else {
742- m = mi - T (i)
743- if (i > 0 )
744- i = T (i)
777+ arr(pos) = 0
778+ pos += 1
745779 }
746780 }
747- None
781+ arr
782+ }
783+
784+ /** A KMP implementation, based on the undoubtedly reliable wikipedia entry.
785+ * Note: I made this private to keep it from entering the API. That can be reviewed.
786+ *
787+ * @author paulp, Rex Kerr
788+ * @since 2.10
789+ * @param S Sequence that may contain target
790+ * @param m0 First index of S to consider
791+ * @param m1 Last index of S to consider (exclusive)
792+ * @param W Target sequence
793+ * @param n0 First index of W to match
794+ * @param n1 Last index of W to match (exclusive)
795+ * @param forward Direction of search (from beginning==true, from end==false)
796+ * @return Index of start of sequence if found, -1 if not (relative to beginning of S, not m0).
797+ */
798+ private def kmpSearch [B ](S : Seq [B ], m0 : Int , m1 : Int , W : Seq [B ], n0 : Int , n1 : Int , forward : Boolean ): Int = {
799+ // Check for redundant case when target has single valid element
800+ @ inline def clipR (x : Int , y : Int ) = if (x< y) x else - 1
801+ @ inline def clipL (x : Int , y : Int ) = if (x> y) x else - 1
802+
803+ if (n1 == n0+ 1 ) {
804+ if (forward)
805+ clipR(S .indexOf(W (n0), m0), m1)
806+ else
807+ clipL(S .lastIndexOf(W (n0), m1- 1 ), m0- 1 )
808+ }
809+
810+ // Check for redundant case when both sequences are same size
811+ else if (m1- m0 == n1- n0) {
812+ // Accepting a little slowness for the uncommon case.
813+ if (S .view.slice(m0, m1) == W .view.slice(n0, n1)) m0
814+ else - 1
815+ }
816+ // Now we know we actually need KMP search, so do it
817+ else S match {
818+ case xs : IndexedSeq [_] =>
819+ // We can index into S directly; it should be adequately fast
820+ val Wopt = kmpOptimizeWord(W , n0, n1, forward)
821+ val T = kmpJumpTable(Wopt , n1- n0)
822+ var i, m = 0
823+ val zero = if (forward) m0 else m1- 1
824+ val delta = if (forward) 1 else - 1
825+ while (i+ m < m1- m0) {
826+ if (Wopt (i) == S (zero+ delta* (i+ m))) {
827+ i += 1
828+ if (i == n1- n0) return (if (forward) m+ m0 else m1- m- i)
829+ }
830+ else {
831+ val ti = T (i)
832+ m += i - ti
833+ if (i > 0 ) i = ti
834+ }
835+ }
836+ - 1
837+ case _ =>
838+ // We had better not index into S directly!
839+ val iter = S .iterator.drop(m0)
840+ val Wopt = kmpOptimizeWord(W , n0, n1, true )
841+ val T = kmpJumpTable(Wopt , n1- n0)
842+ var cache = new Array [AnyRef ](n1- n0) // Ring buffer--need a quick way to do a look-behind
843+ var largest = 0
844+ var i, m = 0
845+ var answer = - 1
846+ while (m+ m0+ n1- n0 <= m1) {
847+ while (i+ m >= largest) {
848+ cache(largest% (n1- n0)) = iter.next.asInstanceOf [AnyRef ]
849+ largest += 1
850+ }
851+ if (Wopt (i) == cache((i+ m)% (n1- n0))) {
852+ i += 1
853+ if (i == n1- n0) {
854+ if (forward) return m+ m0
855+ else {
856+ i -= 1
857+ answer = m+ m0
858+ val ti = T (i)
859+ m += i - ti
860+ if (i > 0 ) i = ti
861+ }
862+ }
863+ }
864+ else {
865+ val ti = T (i)
866+ m += i - ti
867+ if (i > 0 ) i = ti
868+ }
869+ }
870+ answer
871+ }
748872 }
749873
750874 /** Finds a particular index at which one sequence occurs in another sequence.
@@ -768,15 +892,27 @@ object SeqLike {
768892 def indexOf [B ](
769893 source : Seq [B ], sourceOffset : Int , sourceCount : Int ,
770894 target : Seq [B ], targetOffset : Int , targetCount : Int ,
771- fromIndex : Int ): Int = {
772- val toDrop = fromIndex max 0
773- val src = source.slice(sourceOffset, sourceCount) drop toDrop
774- val tgt = target.slice(targetOffset, targetCount)
775-
776- KMP (src, tgt) match {
777- case None => - 1
778- case Some (x) => x + toDrop
779- }
895+ fromIndex : Int
896+ ): Int = {
897+ // Fiddle with variables to match previous behavior and use kmpSearch
898+ // Doing LOTS of max/min, both clearer and faster to use math._
899+ val slen = source.length
900+ val clippedFrom = math.max(0 , fromIndex)
901+ val s0 = math.min(slen, sourceOffset + clippedFrom)
902+ val s1 = math.min(slen, s0 + sourceCount)
903+ val tlen = target.length
904+ val t0 = math.min(tlen, targetOffset)
905+ val t1 = math.min(tlen, t0 + targetCount)
906+
907+ // Error checking
908+ if (clippedFrom > slen- sourceOffset) - 1 // Cannot return an index in range
909+ else if (t1 - t0 < 1 ) s0 // Empty, matches first available position
910+ else if (s1 - s0 < t1 - t0) - 1 // Source is too short to find target
911+ else {
912+ // Nontrivial search
913+ val ans = kmpSearch(source, s0, s1, target, t0, t1, true )
914+ if (ans < 0 ) ans else ans - math.min(slen, sourceOffset)
915+ }
780916 }
781917
782918 /** Finds a particular index at which one sequence occurs in another sequence.
@@ -787,18 +923,27 @@ object SeqLike {
787923 def lastIndexOf [B ](
788924 source : Seq [B ], sourceOffset : Int , sourceCount : Int ,
789925 target : Seq [B ], targetOffset : Int , targetCount : Int ,
790- fromIndex : Int ): Int = {
791- if (fromIndex < 0 ) return - 1
792- val toTake = (fromIndex + targetCount) min sourceCount
793- // Given seq 1234567 looking for abc, we need to take an extra
794- // abc.length chars to examine beyond what is dictated by fromIndex.
795- val src = source.slice(sourceOffset, sourceCount) take toTake reverse
796- val tgt = target.slice(targetOffset, targetCount).reverse
797-
798- // then we reverse the adjustment here on success.
799- KMP (src, tgt) match {
800- case None => - 1
801- case Some (x) => src.length - x - targetCount
802- }
926+ fromIndex : Int
927+ ): Int = {
928+ // Fiddle with variables to match previous behavior and use kmpSearch
929+ // Doing LOTS of max/min, both clearer and faster to use math._
930+ val slen = source.length
931+ val tlen = target.length
932+ val s0 = math.min(slen, sourceOffset)
933+ val s1 = math.min(slen, s0 + sourceCount)
934+ val clippedFrom = math.min(s1 - s0, fromIndex)
935+ val t0 = math.min(tlen, targetOffset)
936+ val t1 = math.min(tlen, t0 + targetCount)
937+ val fixed_s1 = math.min(s1, s0 + clippedFrom + (t1 - t0) - 1 )
938+
939+ // Error checking
940+ if (clippedFrom < 0 ) - 1 // Cannot return an index in range
941+ else if (t1 - t0 < 1 ) s0+ clippedFrom // Empty, matches last available position
942+ else if (fixed_s1 - s0 < t1 - t0) - 1 // Source is too short to find target
943+ else {
944+ // Nontrivial search
945+ val ans = kmpSearch(source, s0, fixed_s1, target, t0, t1, false )
946+ if (ans < 0 ) ans else ans - s0
803947 }
948+ }
804949}
0 commit comments