Переглянути джерело

dont prematurely calculate fragment newlines (#1909)

Zachary Rice 7 місяців тому
батько
коміт
2a7bcffd7c
2 змінених файлів з 34 додано та 19 видалено
  1. 11 6
      detect/detect.go
  2. 23 13
      detect/detect_test.go

+ 11 - 6
detect/detect.go

@@ -292,9 +292,6 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
 		return findings
 	}
 
-	// add newline indices for location calculation in detectRule
-	newlineIndices := newLineRegexp.FindAllStringIndex(fragment.Raw, -1)
-
 	// setup variables to handle different decoding passes
 	currentRaw := fragment.Raw
 	encodedSegments := []*codec.EncodedSegment{}
@@ -314,14 +311,14 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
 			if len(rule.Keywords) == 0 {
 				// if no keywords are associated with the rule always scan the
 				// fragment using the rule
-				findings = append(findings, d.detectRule(fragment, newlineIndices, currentRaw, rule, encodedSegments)...)
+				findings = append(findings, d.detectRule(fragment, currentRaw, rule, encodedSegments)...)
 				continue
 			}
 
 			// check if keywords are in the fragment
 			for _, k := range rule.Keywords {
 				if _, ok := keywords[strings.ToLower(k)]; ok {
-					findings = append(findings, d.detectRule(fragment, newlineIndices, currentRaw, rule, encodedSegments)...)
+					findings = append(findings, d.detectRule(fragment, currentRaw, rule, encodedSegments)...)
 					break
 				}
 			}
@@ -348,7 +345,7 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
 }
 
 // detectRule scans the given fragment for the given rule and returns a list of findings
-func (d *Detector) detectRule(fragment Fragment, newlineIndices [][]int, currentRaw string, r config.Rule, encodedSegments []*codec.EncodedSegment) []report.Finding {
+func (d *Detector) detectRule(fragment Fragment, currentRaw string, r config.Rule, encodedSegments []*codec.EncodedSegment) []report.Finding {
 	var (
 		findings []report.Finding
 		logger   = func() zerolog.Logger {
@@ -415,6 +412,14 @@ func (d *Detector) detectRule(fragment Fragment, newlineIndices [][]int, current
 		}
 	}
 
+	matches := r.Regex.FindAllStringIndex(currentRaw, -1)
+	if len(matches) == 0 {
+		return findings
+	}
+
+	// TODO profile this, probably should replace with something more efficient
+	newlineIndices := newLineRegexp.FindAllStringIndex(fragment.Raw, -1)
+
 	// use currentRaw instead of fragment.Raw since this represents the current
 	// decoding pass on the text
 	for _, matchIndex := range r.Regex.FindAllStringIndex(currentRaw, -1) {

+ 23 - 13
detect/detect_test.go

@@ -2107,8 +2107,10 @@ func TestDetectRuleAllowlist(t *testing.T) {
 			},
 			expected: []report.Finding{
 				{
-					StartColumn: 50,
-					EndColumn:   60,
+					StartLine:   1,
+					EndLine:     1,
+					StartColumn: 18,
+					EndColumn:   28,
 					Line:        "let username = 'james@mail.com';\nlet password = 'Summer2024!';",
 					Match:       "Summer2024!",
 					Secret:      "Summer2024!",
@@ -2132,8 +2134,10 @@ func TestDetectRuleAllowlist(t *testing.T) {
 			},
 			expected: []report.Finding{
 				{
-					StartColumn: 50,
-					EndColumn:   60,
+					StartLine:   1,
+					EndLine:     1,
+					StartColumn: 18,
+					EndColumn:   28,
 					Line:        "let username = 'james@mail.com';\nlet password = 'Summer2024!';",
 					Match:       "Summer2024!",
 					Secret:      "Summer2024!",
@@ -2203,8 +2207,10 @@ func TestDetectRuleAllowlist(t *testing.T) {
 			},
 			expected: []report.Finding{
 				{
-					StartColumn: 50,
-					EndColumn:   60,
+					StartLine:   1,
+					EndLine:     1,
+					StartColumn: 18,
+					EndColumn:   28,
 					Line:        "let username = 'james@mail.com';\nlet password = 'Summer2024!';",
 					Match:       "Summer2024!",
 					Secret:      "Summer2024!",
@@ -2225,8 +2231,10 @@ func TestDetectRuleAllowlist(t *testing.T) {
 			},
 			expected: []report.Finding{
 				{
-					StartColumn: 50,
-					EndColumn:   60,
+					StartLine:   1,
+					EndLine:     1,
+					StartColumn: 18,
+					EndColumn:   28,
 					Line:        "let username = 'james@mail.com';\nlet password = 'Summer2024!';",
 					Match:       "Summer2024!",
 					Secret:      "Summer2024!",
@@ -2249,8 +2257,10 @@ func TestDetectRuleAllowlist(t *testing.T) {
 			},
 			expected: []report.Finding{
 				{
-					StartColumn: 50,
-					EndColumn:   60,
+					StartLine:   1,
+					EndLine:     1,
+					StartColumn: 18,
+					EndColumn:   28,
 					Line:        "let username = 'james@mail.com';\nlet password = 'Summer2024!';",
 					Match:       "Summer2024!",
 					Secret:      "Summer2024!",
@@ -2290,7 +2300,7 @@ let password = 'Summer2024!';`
 
 			f := tc.fragment
 			f.Raw = raw
-			actual := d.detectRule(f, [][]int{}, raw, rule, []*codec.EncodedSegment{})
+			actual := d.detectRule(f, raw, rule, []*codec.EncodedSegment{})
 			if diff := cmp.Diff(tc.expected, actual); diff != "" {
 				t.Errorf("diff: (-want +got)\n%s", diff)
 			}
@@ -2451,7 +2461,7 @@ func TestWindowsFileSeparator_RulePath(t *testing.T) {
 	require.NoError(t, err)
 	for name, test := range tests {
 		t.Run(name, func(t *testing.T) {
-			actual := d.detectRule(test.fragment, [][]int{}, test.fragment.Raw, test.rule, []*codec.EncodedSegment{})
+			actual := d.detectRule(test.fragment, test.fragment.Raw, test.rule, []*codec.EncodedSegment{})
 			if diff := cmp.Diff(test.expected, actual); diff != "" {
 				t.Errorf("diff: (-want +got)\n%s", diff)
 			}
@@ -2637,7 +2647,7 @@ func TestWindowsFileSeparator_RuleAllowlistPaths(t *testing.T) {
 	require.NoError(t, err)
 	for name, test := range tests {
 		t.Run(name, func(t *testing.T) {
-			actual := d.detectRule(test.fragment, [][]int{}, test.fragment.Raw, test.rule, []*codec.EncodedSegment{})
+			actual := d.detectRule(test.fragment, test.fragment.Raw, test.rule, []*codec.EncodedSegment{})
 			if diff := cmp.Diff(test.expected, actual); diff != "" {
 				t.Errorf("diff: (-want +got)\n%s", diff)
 			}