Skip to content

Commit

Permalink
chunk data values by keyword, expand keyword checks, and add collecti…
Browse files Browse the repository at this point in the history
…on name to keywords
  • Loading branch information
zricethezav committed Mar 20, 2024
1 parent b11ce72 commit 07e8077
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 90 deletions.
84 changes: 46 additions & 38 deletions pkg/sources/postman/postman.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,18 @@ func (s *Source) addKeywords(keywords []string) {
}

func (s *Source) addKeyword(keyword string) {
// fast check
if _, ok := s.DetectorKeywords[keyword]; ok {
s.keywords[keyword] = struct{}{}
return
}

// slow check. This is to handle the case where the keyword is a substring of a detector keyword
// e.g. "datadog-token" is a variable key in postman, but "datadog" is a detector keyword
for k := range s.DetectorKeywords {
if strings.Contains(keyword, k) {
s.keywords[k] = struct{}{}
}
}
}

Expand Down Expand Up @@ -314,6 +324,7 @@ func (s *Source) scanWorkspace(ctx context.Context, chunksChan chan *sources.Chu
if err != nil {
return err
}
s.addKeyword(collection.Info.Name)
s.scanCollection(ctx, chunksChan, metadata, collection)
}
return nil
Expand Down Expand Up @@ -412,7 +423,7 @@ func (s *Source) scanEvent(ctx context.Context, chunksChan chan *sources.Chunk,
}
}

s.scanData(ctx, chunksChan, s.formatAndInjectKeywords(s.buildSubstitueSet(metadata, data)), metadata)
s.scanData(ctx, chunksChan, s.generateKeywordDataSet(s.buildSubstitueSet(metadata, data)), metadata)
}

func (s *Source) scanAuth(ctx context.Context, chunksChan chan *sources.Chunk, m Metadata, auth Auth, u URL) {
Expand Down Expand Up @@ -495,7 +506,7 @@ func (s *Source) scanAuth(ctx context.Context, chunksChan chan *sources.Chunk, m
}

m.FieldType = AUTH_TYPE
s.scanData(ctx, chunksChan, s.formatAndInjectKeywords(s.buildSubstitueSet(m, authData)), m)
s.scanData(ctx, chunksChan, s.generateKeywordDataSet(s.buildSubstitueSet(m, authData)), m)
}

func (s *Source) scanHTTPRequest(ctx context.Context, chunksChan chan *sources.Chunk, metadata Metadata, r Request) {
Expand All @@ -513,9 +524,7 @@ func (s *Source) scanHTTPRequest(ctx context.Context, chunksChan chan *sources.C

if r.URL.Raw != "" {
metadata.Type = originalType + " > request URL (no query parameters)"
// Note: query parameters are handled separately
u := fmt.Sprintf("%s://%s/%s", r.URL.Protocol, strings.Join(r.URL.Host, "."), strings.Join(r.URL.Path, "/"))
s.scanData(ctx, chunksChan, s.formatAndInjectKeywords(s.buildSubstitueSet(metadata, u)), metadata)
s.scanData(ctx, chunksChan, s.generateKeywordDataSet(s.buildSubstitueSet(metadata, r.URL.Raw)), metadata)
}

if len(r.URL.Query) > 0 {
Expand Down Expand Up @@ -564,7 +573,8 @@ func (s *Source) scanBody(ctx context.Context, chunksChan chan *sources.Chunk, m
if b.Mode == "raw" {
m.Type = originalType + " > raw"
}
s.scanData(ctx, chunksChan, s.formatAndInjectKeywords(s.buildSubstitueSet(m, data)), m)
s.scanData(ctx, chunksChan, s.generateKeywordDataSet(s.buildSubstitueSet(m, data)), m)
// s.scanData(ctx, chunksChan, s.formatAndInjectKeywords(s.buildSubstitueSet(m, data)), m)
default:
break
}
Expand All @@ -588,7 +598,7 @@ func (s *Source) scanHTTPResponse(ctx context.Context, chunksChan chan *sources.
// Body in a response is just a string
if response.Body != "" {
m.Type = originalType + " > response body"
s.scanData(ctx, chunksChan, s.formatAndInjectKeywords(s.buildSubstitueSet(m, response.Body)), m)
s.scanData(ctx, chunksChan, s.generateKeywordDataSet(s.buildSubstitueSet(m, response.Body)), m)
}

if response.OriginalRequest.Method != "" {
Expand Down Expand Up @@ -623,42 +633,40 @@ func (s *Source) scanVariableData(ctx context.Context, chunksChan chan *sources.
}

m.FieldType = m.Type + " variables"
s.scanData(ctx, chunksChan, s.formatAndInjectKeywords(values), m)
s.scanData(ctx, chunksChan, s.generateKeywordDataSet(values), m)
}

func (s *Source) scanData(ctx context.Context, chunksChan chan *sources.Chunk, data string, metadata Metadata) {
if data == "" {
return
}
func (s *Source) scanData(ctx context.Context, chunksChan chan *sources.Chunk, data []string, metadata Metadata) {
metadata.FieldType = metadata.Type

chunksChan <- &sources.Chunk{
SourceType: s.Type(),
SourceName: s.name,
SourceID: s.SourceID(),
JobID: s.JobID(),
Data: []byte(data),
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Postman{
Postman: &source_metadatapb.Postman{
Link: metadata.Link,
WorkspaceUuid: metadata.WorkspaceUUID,
WorkspaceName: metadata.WorkspaceName,
CollectionId: metadata.CollectionInfo.UID,
CollectionName: metadata.CollectionInfo.Name,
EnvironmentId: metadata.EnvironmentID,
EnvironmentName: metadata.EnvironmentName,
RequestId: metadata.RequestID,
RequestName: metadata.RequestName,
FolderId: metadata.FolderID,
FolderName: metadata.FolderName,
FieldType: metadata.FieldType,
FieldName: metadata.FieldName,
VariableType: metadata.VarType,
for _, d := range data {
chunksChan <- &sources.Chunk{
SourceType: s.Type(),
SourceName: s.name,
SourceID: s.SourceID(),
JobID: s.JobID(),
Data: []byte(d),
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Postman{
Postman: &source_metadatapb.Postman{
Link: metadata.Link,
WorkspaceUuid: metadata.WorkspaceUUID,
WorkspaceName: metadata.WorkspaceName,
CollectionId: metadata.CollectionInfo.UID,
CollectionName: metadata.CollectionInfo.Name,
EnvironmentId: metadata.EnvironmentID,
EnvironmentName: metadata.EnvironmentName,
RequestId: metadata.RequestID,
RequestName: metadata.RequestName,
FolderId: metadata.FolderID,
FolderName: metadata.FolderName,
FieldType: metadata.FieldType,
FieldName: metadata.FieldName,
VariableType: metadata.VarType,
},
},
},
},
Verify: s.verify,
Verify: s.verify,
}
}
}

Expand Down
25 changes: 11 additions & 14 deletions pkg/sources/postman/substitution.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,18 @@ func (sub *Substitution) add(metadata Metadata, key string, value string) {
})
}

func (s *Source) keywordCombinations(str string) string {
data := ""
for keyword := range s.keywords {
data += fmt.Sprintf("%s:%s\n", keyword, str)
}

return data
}

func (s *Source) formatAndInjectKeywords(data []string) string {
var ret []string
for _, d := range data {
ret = append(ret, s.keywordCombinations(d))
// generateKeywordDataSet generates chunk data for each keyword.
// This helps ensure we aren't accidentally sending keys to the wrong provider
func (s *Source) generateKeywordDataSet(values []string) []string {
ret := []string{}
for k := range s.keywords {
data := ""
for _, d := range values {
data += fmt.Sprintf("%s:%s\n", k, d)
}
ret = append(ret, data)
}
return strings.Join(ret, "")
return ret
}

func (s *Source) buildSubstitueSet(metadata Metadata, data string) []string {
Expand Down
45 changes: 7 additions & 38 deletions pkg/sources/postman/substitution_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package postman
import (
"reflect"
"sort"
"strings"
"testing"
)

Expand Down Expand Up @@ -38,30 +37,6 @@ func TestSubstitution_Add(t *testing.T) {
}
}

func TestSource_KeywordCombinations(t *testing.T) {
s := &Source{
DetectorKeywords: map[string]struct{}{
"keyword1": {},
"keyword2": {},
},
keywords: make(map[string]struct{}),
}
s.addKeyword("keyword1")
s.addKeyword("keyword2")
s.addKeyword("keyword3")

// remove that \n from the end of the string
got := strings.Split(strings.TrimSuffix(s.keywordCombinations("test"), "\n"), "\n")
expected := []string{"keyword1:test", "keyword2:test"}

sort.Strings(got)
sort.Strings(expected)

if !reflect.DeepEqual(got, expected) {
t.Errorf("Expected keyword combinations: %q, got: %q", expected, got)
}
}

func TestSource_BuildSubstituteSet(t *testing.T) {
s := &Source{
sub: NewSubstitution(),
Expand Down Expand Up @@ -109,7 +84,7 @@ func TestRemoveDuplicateStr(t *testing.T) {
}
}

func TestSource_FormatAndInjectKeywords(t *testing.T) {
func TestSource_GenerateKeywordDataSet(t *testing.T) {
s := &Source{
DetectorKeywords: map[string]struct{}{
"keyword1": {},
Expand All @@ -123,32 +98,26 @@ func TestSource_FormatAndInjectKeywords(t *testing.T) {

testCases := []struct {
input []string
expected string
expected []string
}{
{
[]string{"data1", "data2"},
"keyword1:data1\nkeyword2:data1\nkeyword1:data2\nkeyword2:data2\n",
[]string{"keyword1:data1\nkeyword1:data2\n", "keyword2:data1\nkeyword2:data2\n"},
},
{
[]string{"data1"},
"keyword1:data1\nkeyword2:data1\n",
},
{
[]string{},
"",
[]string{"keyword1:data1\n", "keyword2:data1\n"},
},
}

for _, tc := range testCases {
result := s.formatAndInjectKeywords(tc.input)
got := strings.Split(result, "\n")
expected := strings.Split(tc.expected, "\n")
got := s.generateKeywordDataSet(tc.input)
expected := tc.expected
sort.Strings(got)
sort.Strings(expected)
// CHATGPT CHECK HERE

if !reflect.DeepEqual(got, expected) {
t.Errorf("Expected result: %q, got: %q", tc.expected, result)
t.Errorf("Expected result: %q, got: %q", tc.expected, got)
}
}
}

0 comments on commit 07e8077

Please sign in to comment.