Merge branch 'iambus-master'

diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 17ad990..08c36e7 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go

@@ -388,32 +388,30 @@
 }
 
 // DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line.
+// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
 func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
-	chars1, chars2, lineArray := dmp.diffLinesToIndexes(text1, text2)
-	return indexesToString(chars1), indexesToString(chars2), lineArray
+	chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
+	return chars1, chars2, lineArray
 }
 
 // DiffLinesToRunes splits two texts into a list of runes.
 func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
-	chars1, chars2, lineArray := dmp.diffLinesToIndexes(text1, text2)
-	return []rune(indexesToString(chars1)), []rune(indexesToString(chars2)), lineArray
-}
-
-// diffLinesToIndexes splits two texts into a list of indexes
-func (dmp *DiffMatchPatch) diffLinesToIndexes(text1, text2 string) ([]index, []index, []string) {
 	chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
-	return chars1, chars2, lineArray
+	return []rune(chars1), []rune(chars2), lineArray
 }
 
 // DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
 func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
 	hydrated := make([]Diff, 0, len(diffs))
 	for _, aDiff := range diffs {
-		var sb strings.Builder
-		for _, i := range stringToIndex(aDiff.Text) {
-			sb.WriteString(lineArray[i])
+		runes := []rune(aDiff.Text)
+		text := make([]string, len(runes))
+
+		for i, r := range runes {
+			text[i] = lineArray[runeToInt(r)]
 		}
-		aDiff.Text = sb.String()
+
+		aDiff.Text = strings.Join(text, "")
 		hydrated = append(hydrated, aDiff)
 	}
 	return hydrated
@@ -1148,13 +1146,28 @@
 
 		switch diff.Type {
 		case DiffInsert:
-			_, _ = buff.WriteString("\x1b[32m")
-			_, _ = buff.WriteString(text)
-			_, _ = buff.WriteString("\x1b[0m")
+			lines := strings.Split(text, "\n")
+			for i, line := range lines {
+				_, _ = buff.WriteString("\x1b[32m")
+				_, _ = buff.WriteString(line)
+				if i < len(lines)-1 {
+					_, _ = buff.WriteString("\x1b[0m\n")
+				} else {
+					_, _ = buff.WriteString("\x1b[0m")
+				}
+			}
+
 		case DiffDelete:
-			_, _ = buff.WriteString("\x1b[31m")
-			_, _ = buff.WriteString(text)
-			_, _ = buff.WriteString("\x1b[0m")
+			lines := strings.Split(text, "\n")
+			for i, line := range lines {
+				_, _ = buff.WriteString("\x1b[31m")
+				_, _ = buff.WriteString(line)
+				if i < len(lines)-1 {
+					_, _ = buff.WriteString("\x1b[0m\n")
+				} else {
+					_, _ = buff.WriteString("\x1b[0m")
+				}
+			}
 		case DiffEqual:
 			_, _ = buff.WriteString(text)
 		}
@@ -1306,8 +1319,7 @@
 }
 
 // diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
-func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) ([]index, []index, []string) {
-	// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
+func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
 	lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
 
 	lineHash := make(map[string]int)
@@ -1315,12 +1327,11 @@
 	strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray, lineHash)
 	strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray, lineHash)
 
-	return strIndexArray1, strIndexArray2, lineArray
+	return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
 }
 
-// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
-func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []uint32 {
-	// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
+// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []index.
+func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []index {
 	lineStart := 0
 	lineEnd := -1
 	strs := []index{}

diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index d1611da..2c43864 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go

@@ -317,9 +317,9 @@
 		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\x01\x02\x03\x03", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
 		{"a", "b", "\x01", "\x02", []string{"", "a", "b"}},
 		// Omit final newline.
-		{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
+		{"alpha\nbeta\nalpha", "", "\x01\x02\x03", "", []string{"", "alpha\n", "beta\n", "alpha"}},
 		// Same lines in Text1 and Text2
-		{"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "1,2,3", "1,4,3,5", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
+		{"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "\x01\x02\x03", "\x01\x04\x03\x05", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
 	} {
 		actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
 		assert.Equal(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -1033,6 +1033,17 @@
 
 			Expected: "a\n\x1b[31m<B>b</B>\x1b[0m\x1b[32mc&d\x1b[0m",
 		},
+		{
+			Diffs: []Diff{
+				{Type: DiffEqual, Text: "a\n"},
+				{Type: DiffDelete, Text: "b\nc\n"},
+				{Type: DiffEqual, Text: "def"},
+				{Type: DiffInsert, Text: "\ng\nh"},
+				{Type: DiffEqual, Text: "\ni"},
+			},
+
+			Expected: "a\n\x1b[31mb\x1b[0m\n\x1b[31mc\x1b[0m\n\x1b[31m\x1b[0mdef\x1b[32m\x1b[0m\n\x1b[32mg\x1b[0m\n\x1b[32mh\x1b[0m\ni",
+		},
 	} {
 		actual := dmp.DiffPrettyText(tc.Diffs)
 		assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -1468,6 +1479,38 @@
 	assert.NotEmpty(t, diffs)
 }
 
+func TestDiffPartialLineIndex(t *testing.T) {
+	dmp := New()
+	t1, t2, tt := dmp.DiffLinesToChars(
+		`line 1
+line 2
+line 3
+line 4
+line 5
+line 6
+line 7
+line 8
+line 9
+line 10 text1`,
+		`line 1
+line 2
+line 3
+line 4
+line 5
+line 6
+line 7
+line 8
+line 9
+line 10 text2`)
+	diffs := dmp.DiffMain(t1, t2, false)
+	diffs = dmp.DiffCharsToLines(diffs, tt)
+	assert.Equal(t, []Diff{
+		Diff{DiffEqual, "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\n"},
+		Diff{DiffDelete, "line 10 text1"},
+		Diff{DiffInsert, "line 10 text2"},
+	}, diffs)
+}
+
 func BenchmarkDiffMain(bench *testing.B) {
 	s1 := "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"
 	s2 := "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"

diff --git a/diffmatchpatch/stringutil.go b/diffmatchpatch/stringutil.go
index 265f29c..573b6bf 100644
--- a/diffmatchpatch/stringutil.go
+++ b/diffmatchpatch/stringutil.go

@@ -9,10 +9,16 @@
 package diffmatchpatch
 
 import (
+	"fmt"
 	"strings"
 	"unicode/utf8"
 )
 
+const UNICODE_INVALID_RANGE_START = 0xD800
+const UNICODE_INVALID_RANGE_END = 0xDFFF
+const UNICODE_INVALID_RANGE_DELTA = UNICODE_INVALID_RANGE_END - UNICODE_INVALID_RANGE_START + 1
+const UNICODE_RANGE_MAX = 0x10FFFF
+
 // unescaper unescapes selected chars for compatibility with JavaScript's encodeURI.
 // In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive.  Thus "%3F" would not be unescaped.  But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc.
 var unescaper = strings.NewReplacer(
@@ -86,3 +92,99 @@
 	}
 	return -1
 }
+
+func intArrayToString(ns []index) string {
+	if len(ns) == 0 {
+		return ""
+	}
+
+	b := []rune{}
+	for _, n := range ns {
+		b = append(b, intToRune(uint32(n)))
+	}
+	return string(b)
+}
+
+// These constants define the number of bits representable
+// in 1,2,3,4 byte utf8 sequences, respectively.
+const ONE_BYTE_BITS = 7
+const TWO_BYTE_BITS = 11
+const THREE_BYTE_BITS = 16
+const FOUR_BYTE_BITS = 21
+
+// Helper for getting a sequence of bits from an integer.
+func getBits(i uint32, cnt byte, from byte) byte {
+	return byte((i >> from) & ((1 << cnt) - 1))
+}
+
+// Converts an integer in the range 0~1112060 into a rune.
+// Based on the ranges table in https://en.wikipedia.org/wiki/UTF-8
+func intToRune(i uint32) rune {
+	if i < (1 << ONE_BYTE_BITS) {
+		return rune(i)
+	}
+
+	if i < (1 << TWO_BYTE_BITS) {
+		r, size := utf8.DecodeRune([]byte{0b11000000 | getBits(i, 5, 6), 0b10000000 | getBits(i, 6, 0)})
+		if size != 2 || r == utf8.RuneError {
+			panic(fmt.Sprintf("Error encoding an int %d with size 2, got rune %v and size %d", size, r, i))
+		}
+		return r
+	}
+
+	// Last -3 here needed because for some reason 3rd to last codepoint 65533 in this range
+	// was returning utf8.RuneError during encoding.
+	if i < ((1 << THREE_BYTE_BITS) - UNICODE_INVALID_RANGE_DELTA - 3) {
+		if i >= UNICODE_INVALID_RANGE_START {
+			i += UNICODE_INVALID_RANGE_DELTA
+		}
+
+		r, size := utf8.DecodeRune([]byte{0b11100000 | getBits(i, 4, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
+		if size != 3 || r == utf8.RuneError {
+			panic(fmt.Sprintf("Error encoding an int %d with size 3, got rune %v and size %d", size, r, i))
+		}
+		return r
+	}
+
+	if i < (1<<FOUR_BYTE_BITS - UNICODE_INVALID_RANGE_DELTA - 3) {
+		i += UNICODE_INVALID_RANGE_DELTA + 3
+		r, size := utf8.DecodeRune([]byte{0b11110000 | getBits(i, 3, 18), 0b10000000 | getBits(i, 6, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
+		if size != 4 || r == utf8.RuneError {
+			panic(fmt.Sprintf("Error encoding an int %d with size 4, got rune %v and size %d", size, r, i))
+		}
+		return r
+	}
+	panic(fmt.Sprintf("The integer %d is too large for runeToInt()", i))
+}
+
+// Converts a rune generated by intToRune back to an integer
+func runeToInt(r rune) uint32 {
+	i := uint32(r)
+	if i < (1 << ONE_BYTE_BITS) {
+		return i
+	}
+
+	bytes := []byte{0, 0, 0, 0}
+
+	size := utf8.EncodeRune(bytes, r)
+
+	if size == 2 {
+		return uint32(bytes[0]&0b11111)<<6 | uint32(bytes[1]&0b111111)
+	}
+
+	if size == 3 {
+		result := uint32(bytes[0]&0b1111)<<12 | uint32(bytes[1]&0b111111)<<6 | uint32(bytes[2]&0b111111)
+		if result >= UNICODE_INVALID_RANGE_END {
+			return result - UNICODE_INVALID_RANGE_DELTA
+		}
+
+		return result
+	}
+
+	if size == 4 {
+		result := uint32(bytes[0]&0b111)<<18 | uint32(bytes[1]&0b111111)<<12 | uint32(bytes[2]&0b111111)<<6 | uint32(bytes[3]&0b111111)
+		return result - UNICODE_INVALID_RANGE_DELTA - 3
+	}
+
+	panic(fmt.Sprintf("Unexpected state decoding rune=%v size=%d", r, size))
+}

diff --git a/diffmatchpatch/stringutil_test.go b/diffmatchpatch/stringutil_test.go
index ab2bc10..73ab6ca 100644
--- a/diffmatchpatch/stringutil_test.go
+++ b/diffmatchpatch/stringutil_test.go

@@ -114,3 +114,21 @@
 		assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc))
 	}
 }
+
+// Exhaustive check for all ints from 0 to 1112060 for correctness of implementation
+// of `intToRune() -> runeToInt()`.
+// This test is slow and runs longer than 5 seconds but it does provide a safety
+// guarantee that these 2 functions are correct for the ranges we support.
+func TestRuneToInt(t *testing.T) {
+
+	for i := uint32(0); i <= UNICODE_RANGE_MAX-UNICODE_INVALID_RANGE_DELTA-3; i += 1 {
+		r := intToRune(i)
+		ic := runeToInt(r)
+
+		assert.Equal(t, i, ic, fmt.Sprintf("intToRune(%d)=%d and runeToInt(%d)=%d", i, r, r, ic))
+	}
+
+	assert.Panics(t, func() {
+		intToRune(UNICODE_RANGE_MAX - UNICODE_INVALID_RANGE_DELTA - 2)
+	})
+}

diff --git a/go.mod b/go.mod
index c731033..c7886ce 100644
--- a/go.mod
+++ b/go.mod

@@ -8,4 +8,4 @@
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 )
 
-go 1.12
+go 1.13