Merge "Bugfix for unicode characters in commit message"
This commit is contained in:
committed by
Android (Google) Code Review
commit
545c2ae271
@@ -199,7 +199,7 @@ func readCSVFiles(projectCSVFile, commitCSVFile string) ([]ent.DiffRow, []ent.Co
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "Error converting CSV file to entities")
|
||||
}
|
||||
commitRows, err := csvFileToCommitRows(commitCSVFile)
|
||||
commitRows, err := CSVFileToCommitRows(commitCSVFile)
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "Error converting CSV file to entities")
|
||||
}
|
||||
@@ -252,7 +252,7 @@ func toDiffRows(entities []interface{}) ([]ent.DiffRow, error) {
|
||||
return diffRows, nil
|
||||
}
|
||||
|
||||
func csvFileToCommitRows(csvFile string) ([]ent.CommitRow, error) {
|
||||
func CSVFileToCommitRows(csvFile string) ([]ent.CommitRow, error) {
|
||||
entities, err := filesystem.CSVFileToEntities(
|
||||
csvFile,
|
||||
func(cols []string) (interface{}, error) {
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
package controllers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
cst "repodiff/constants"
|
||||
ent "repodiff/entities"
|
||||
"repodiff/repositories"
|
||||
)
|
||||
|
||||
func TestRegressionIncorrectStringValue(t *testing.T) {
|
||||
commitRows, _ := CSVFileToCommitRows("testdata/commit.csv")
|
||||
analyzed := make([]ent.AnalyzedCommitRow, len(commitRows))
|
||||
for i, row := range commitRows {
|
||||
analyzed[i] = ent.AnalyzedCommitRow{
|
||||
CommitRow: row,
|
||||
Type: cst.Empty,
|
||||
}
|
||||
}
|
||||
|
||||
c, _ := repositories.NewCommitRepository(
|
||||
ent.MappedDiffTarget{
|
||||
UpstreamTarget: 1,
|
||||
DownstreamTarget: 2,
|
||||
},
|
||||
)
|
||||
err := c.InsertCommitRows(analyzed)
|
||||
assert.Equal(t, nil, err, "Error should be nil")
|
||||
}
|
||||
2
tools/repo_diff/service/repodiff/controllers/testdata/commit.csv
vendored
Normal file
2
tools/repo_diff/service/repodiff/controllers/testdata/commit.csv
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
Date,Commit,Downstream Project,Author,Subject
|
||||
2018/04/28,f3bc9021add1f9cb458223dd374c58c69f53e207,platform/frameworks/support,aurimas@google.com,Move to AGP 3.0.0 stable 😁
|
||||
|
@@ -5,19 +5,19 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
c "repodiff/constants"
|
||||
e "repodiff/entities"
|
||||
cst "repodiff/constants"
|
||||
ent "repodiff/entities"
|
||||
"repodiff/persistence/filesystem"
|
||||
)
|
||||
|
||||
func TestProjectNamesToType(t *testing.T) {
|
||||
var common, downstream, upstream e.ManifestFile
|
||||
var common, downstream, upstream ent.ManifestFile
|
||||
filesystem.ReadXMLAsEntity("testdata/common_manifest.xml", &common)
|
||||
filesystem.ReadXMLAsEntity("testdata/downstream_manifest.xml", &downstream)
|
||||
filesystem.ReadXMLAsEntity("testdata/upstream_manifest.xml", &upstream)
|
||||
|
||||
nameToType := ProjectNamesToType(
|
||||
e.ManifestFileGroup{
|
||||
&ent.ManifestFileGroup{
|
||||
Common: common,
|
||||
Upstream: upstream,
|
||||
Downstream: downstream,
|
||||
@@ -27,7 +27,7 @@ func TestProjectNamesToType(t *testing.T) {
|
||||
|
||||
distinctCount := 0
|
||||
for _, projectType := range nameToType {
|
||||
if projectType == c.DifferentialSpecific {
|
||||
if projectType == cst.DifferentialSpecific {
|
||||
distinctCount++
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
package interactors
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type simpleSet map[string]bool
|
||||
|
||||
var unicode = regexp.MustCompile("[^\x00-\x7F]+")
|
||||
|
||||
func (s simpleSet) Contains(other string) bool {
|
||||
enabled, exists := s[other]
|
||||
return exists && enabled
|
||||
@@ -45,7 +49,7 @@ func SetSubtract(add, negate []string) []string {
|
||||
}
|
||||
|
||||
func SetUnion(slice1, slice2 []string) []string {
|
||||
return allKeys(
|
||||
union := allKeys(
|
||||
sliceToSimpleSet(
|
||||
append(
|
||||
slice1,
|
||||
@@ -53,6 +57,8 @@ func SetUnion(slice1, slice2 []string) []string {
|
||||
),
|
||||
),
|
||||
)
|
||||
sort.Strings(union)
|
||||
return union
|
||||
}
|
||||
|
||||
func sliceToSimpleSet(s []string) simpleSet {
|
||||
@@ -87,3 +93,23 @@ func allKeys(sets ...simpleSet) []string {
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
func FilterNoUnicode(s string) string {
|
||||
badCharacters := sliceToSimpleSet(
|
||||
unicode.FindAllString(s, -1),
|
||||
)
|
||||
if len(badCharacters) == 0 {
|
||||
return s
|
||||
}
|
||||
validCharacters := make([]string, 0, len(s))
|
||||
for _, rune_ := range s {
|
||||
char := string(rune_)
|
||||
if !badCharacters.Contains(char) {
|
||||
validCharacters = append(validCharacters, char)
|
||||
}
|
||||
}
|
||||
return strings.Join(
|
||||
validCharacters,
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
@@ -90,3 +90,23 @@ func TestSetUnion(t *testing.T) {
|
||||
union := SetUnion(s1, s2)
|
||||
assert.Equal(t, expected, union, "Union of s2 and s1")
|
||||
}
|
||||
|
||||
func TestFilterNoUnicodeWithUnicode(t *testing.T) {
|
||||
regressionStr := "Move to AGP 3.0.0 stable 😁"
|
||||
assert.Equal(
|
||||
t,
|
||||
"Move to AGP 3.0.0 stable ",
|
||||
FilterNoUnicode(regressionStr),
|
||||
"Function should filter out unicode characters",
|
||||
)
|
||||
}
|
||||
|
||||
func TestFilterNoUnicodeWithNoUnicode(t *testing.T) {
|
||||
validStr := "I'm a regular string with no whacky unicode chars"
|
||||
assert.Equal(
|
||||
t,
|
||||
validStr,
|
||||
FilterNoUnicode(validStr),
|
||||
"No change should occur",
|
||||
)
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
"repodiff/constants"
|
||||
e "repodiff/entities"
|
||||
"repodiff/interactors"
|
||||
"repodiff/utils"
|
||||
)
|
||||
|
||||
@@ -123,7 +124,7 @@ func commitRowToPersistCols(c e.AnalyzedCommitRow, uuidBytes string, timestamp i
|
||||
c.Commit,
|
||||
c.DownstreamProject,
|
||||
c.Author,
|
||||
c.Subject,
|
||||
interactors.FilterNoUnicode(c.Subject),
|
||||
c.Type,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user