diff --git a/pkg/parser/fetcher.go b/pkg/parser/fetcher.go new file mode 100644 index 0000000..f23d472 --- /dev/null +++ b/pkg/parser/fetcher.go @@ -0,0 +1,26 @@ +package parser + +import ( + "fmt" + "github.com/pkg/errors" + "io" + "net/http" +) + +// DownloadFile downloads repository files. Http clients should have access +// tokens already set. With this, parser does not need knowledge of private/public repos +func DownloadFile(client *http.Client, u string) (io.ReadCloser, error) { + resp, err := client.Get(u) + if err != nil { + return nil, errors.Wrap(err, "failed to download file to parse") + } + + if resp.StatusCode != http.StatusOK { + if resp.Body != nil { + resp.Body.Close() + } + return nil, fmt.Errorf("failed to retrieve download file: status code %d", resp.StatusCode) + } + + return resp.Body, nil +} diff --git a/pkg/parser/fetcher_test.go b/pkg/parser/fetcher_test.go new file mode 100644 index 0000000..5bb013b --- /dev/null +++ b/pkg/parser/fetcher_test.go @@ -0,0 +1,56 @@ +package parser + +import ( + "github.com/stretchr/testify/require" + "io/ioutil" + "net/http" + "testing" + "time" +) + +func TestGetGithubFile(t *testing.T) { + a := require.New(t) + u := "https://github.com/while-loop/todo/raw/08b3e2fad64e54c061d1ba6324a382e968212a6c/pkg/vcs/github/github_test.go" + exp := `package github + +import ( + "github.com/stretchr/testify/require" + "testing" +) + +func TestTestValidBody(t *testing.T) { + secret := "mysecret" + payload := []byte("mypayload") + computed := "sha1=57852ac1e3fd8e66063cd9d4cb05ea87355bb0b8" + + require.True(t, validBody(payload, secret, computed)) +} +` + + rc, err := DownloadFile(&http.Client{Timeout: 5 * time.Second}, u) + a.NoError(err) + defer rc.Close() + + content, err := ioutil.ReadAll(rc) + a.NoError(err) + + a.Equal(exp, string(content)) +} + +func TestFileNotFound(t *testing.T) { + a := require.New(t) + u := "https://github.com/while-loop/todo/raw/ezas123/pkg/vcs/github/github_test.go" + + rc, err := DownloadFile(&http.Client{Timeout: 5 * time.Second}, u) + a.Contains(err.Error(), "status code 404") + a.Nil(rc) +} + +func TestServerDown(t *testing.T) { + a := require.New(t) + u := "https://fakegithuburlTestServerDown.com/who" + + rc, err := DownloadFile(&http.Client{Timeout: 5 * time.Second}, u) + a.Contains(err.Error(), "failed to download file to parse") + a.Nil(rc) +} diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 498d206..60f0b1d 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -1,8 +1,101 @@ package parser -import "github.com/while-loop/todo/pkg/tracker" +import ( + "github.com/while-loop/todo/pkg/log" + "github.com/while-loop/todo/pkg/tracker" + "net/http" + "sync" + "time" +) + +const ( + maxGoroutines = 5 +) + +var ( + client = &http.Client{Timeout: 30 * time.Second} +) + +type TodoParser interface { + GetTodos(urls ...string) []tracker.Issue +} + +type todoParser struct { +} + +func New() TodoParser { + return &todoParser{} +} + +func (p *todoParser) GetTodos(urls ...string) []tracker.Issue { + + jobs := make(chan string, 100) + results := make(chan tracker.Issue, 100) + finished := make(chan struct{}) + + wg := &sync.WaitGroup{} + + log.Debugf("spinning %d goroutines for todoParser", maxGoroutines) + for i := 0; i < maxGoroutines; i++ { + wg.Add(1) // track how many goroutines we spin up + go worker(wg, jobs, results) + } + + go func() { + log.Debug("sending urls to worker routines") + for _, u := range urls { + jobs <- u + } + close(jobs) // close the jobs channel so goroutines gracefully stop when no jobs are left + }() -func GetTodos(files ...string) []tracker.Issue { issues := make([]tracker.Issue, 0) + go func() { + log.Debug("collecting results from workers") + for issue := range results { + issues = append(issues, issue) + } + finished <- struct{}{} // let the main thread know we're done collecting issues + }() + + log.Debug("waiting for goroutines to finish") + // wait for all the goroutines to gracefully finish + wg.Wait() + + // tell the result collector that we're done waiting on worker goroutines + close(results) + + log.Debug("waiting for collector routine to finish") + // wait for the result collector to finish appending issues + <-finished + + log.Debug("done waiting for collector") return issues } + +// worker downloads and parses a file given a url +func worker(wg *sync.WaitGroup, urlChan <-chan string, results chan<- tracker.Issue) { + for u := range urlChan { + log.Info("worker recvd ", u) + rc, err := DownloadFile(client, u) + if err != nil { + log.Error("worker failed to download file", err) + continue + } + + log.Debug("working parsing file") + iss, err := ParseFile(u, rc) + rc.Close() + if err != nil { + log.Error("worker failed to parse file", err) + // don't return because we could have recvd partial issues w/ an error + } + + if len(iss) > 0 { + for _, is := range iss { + results <- is + } + } + } + wg.Done() +} diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go new file mode 100644 index 0000000..8f7725b --- /dev/null +++ b/pkg/parser/parser_test.go @@ -0,0 +1,63 @@ +package parser + +import ( + "github.com/stretchr/testify/require" + "github.com/while-loop/todo/pkg/tracker" + "reflect" + "testing" +) + +func TestDownloadTwoFiles(t *testing.T) { + a := require.New(t) + urls := []string{"https://github.com/while-loop/todo/raw/08b3e2fad64e54c061d1ba6324a382e968212a6c/pkg/vcs/github/event_push_test.go", + "https://github.com/ansible/ansible/raw/781fd7099a0278d3d91557b94da1083f19fad329/test/legacy/roles/test_gce_labels/tasks/test.yml"} + + iss := []tracker.Issue{ + {Assignee: "erjohnso", Title: "write more tests", File: urls[1], Line: 28}, + {Title: "test when parser has been impl", File: urls[0], Line: 18}, + } + + p := New() + issues := p.GetTodos(urls...) + a.Equal(len(iss), len(issues)) + ttl := 0 // keep a count of total issues matched. since order of received issues are random due to goroutines + + for _, expIs := range iss { + for i, actIs := range issues { + if reflect.DeepEqual(expIs, actIs) { + ttl++ + issues = remove(issues, i) + break + } + } + } + + a.Equal(len(iss), ttl) +} + +func TestDownloadWhenServerIsNotReachable(t *testing.T) { + a := require.New(t) + urls := []string{"https://gitakjshdgfasjhgdfhub.com"} + + p := New() + issues := p.GetTodos(urls...) + + a.Equal(0, len(issues)) +} + +func TestParsingUnsupportedExtension(t *testing.T) { + a := require.New(t) + + // readme.md has valid todo comments, but md is an supp extension atm + urls := []string{"https://github.com/while-loop/todo/raw/cc6b554cccfd3598f6b6342d69c78abcbc5d0128/README.md"} + + p := New() + issues := p.GetTodos(urls...) + + a.Equal(0, len(issues)) +} + +func remove(s []tracker.Issue, i int) []tracker.Issue { + s[i] = s[len(s)-1] + return s[:len(s)-1] +} diff --git a/pkg/parser/regex.go b/pkg/parser/regex.go new file mode 100644 index 0000000..e613b01 --- /dev/null +++ b/pkg/parser/regex.go @@ -0,0 +1,161 @@ +package parser + +import ( + "bufio" + "fmt" + "github.com/pkg/errors" + "github.com/while-loop/todo/pkg/log" + "github.com/while-loop/todo/pkg/tracker" + "io" + "path/filepath" + "regexp" + "sort" + "strings" +) + +type cmtKind string + +const ( + cmtToken = `{comment_token}` + cmtRegex = `(?i)^\s*` + cmtToken + `.*todo(\((?P.*)\):)?[\s|$]\s*(.*)$` + hash cmtKind = "#" + slash = "//" +) + +var ( + slashRegex = regexp.MustCompile(strings.Replace(cmtRegex, cmtToken, string(slash), 1)) + hashRegex = regexp.MustCompile(strings.Replace(cmtRegex, cmtToken, string(hash), 1)) + mentionsRegex = regexp.MustCompile(`(@[^\s]+)`) + labelsRegex = regexp.MustCompile(`\+([^\s]+)`) + slashLangs = []string{"go", "java", "c", "cpp", "h", "hpp"} + hashLangs = []string{"py", "sh", "bash", "yml", "yaml"} +) + +func init() { + sort.Strings(slashLangs) + sort.Strings(hashLangs) +} + +func ParseFile(fileName string, file io.ReadCloser) ([]tracker.Issue, error) { + defer file.Close() + issues := make([]tracker.Issue, 0) + ext := strings.TrimLeft(filepath.Ext(fileName), ".") + if ext == "" { + log.Errorf("failed to get file ext for %s", fileName) + return nil, fmt.Errorf("unknown file ext: %s", ext) + } + + rexp := commentRegexes(ext) + if rexp == nil { + log.Errorf("parser regex. unknown ext type: %s", ext) + return nil, fmt.Errorf("unknown file ext: %s", ext) + } + + scan := bufio.NewScanner(file) + scan.Split(bufio.ScanLines) + + lineNum := 0 // set as 0 so 1st iteration is 1. increment is at beginning to accommodate for continues + for scan.Scan() { + lineNum++ + line := scan.Text() + + // todo(while-loop): add ignore keyword to yml config (ParseFile will be a todoParser func) + if strings.Contains(line, "!todo") { + continue + } + + is, found := parseLine(rexp, line) + if found { + is.File = fileName + is.Line = lineNum + log.Debugf("found issue: %s\n%s", line, is.String()) + issues = append(issues, is) + } + } + + if scan.Err() != nil { + return issues, errors.Wrapf(scan.Err(), "error while scanning file: %s", fileName) + } + + return issues, nil +} + +func commentRegexes(ext string) *regexp.Regexp { + idx := sort.SearchStrings(slashLangs, ext) + if idx < len(slashLangs) && slashLangs[idx] == ext { + return slashRegex + } + + idx = sort.SearchStrings(hashLangs, ext) + if idx < len(hashLangs) && hashLangs[idx] == ext { + return hashRegex + } + + return nil +} + +func parseLine(rexp *regexp.Regexp, line string) (tracker.Issue, bool) { + + finds := rexp.FindStringSubmatch(line) + if len(finds) <= 0 { + return tracker.Issue{}, false + } + + i := tracker.Issue{ + Mentions: mentionsRegex.FindAllString(line, -1), + Labels: parseLabels(line), + File: "", + Line: 0, + ID: "", + Title: "", + Assignee: "", + Author: "", + Description: "", + } + + i.Title = createTitle(line, regexKind(rexp), i.Mentions, i.Labels) + + for idx, name := range rexp.SubexpNames() { + if name == "assignee" { + i.Assignee = finds[idx] + } + } + + return i, true +} + +func createTitle(line string, kind cmtKind, mentions, labels []string) string { + line = regexp.MustCompile(`(?i).*`+string(kind)+`.*todo(\((.*)\):)?[\s|$]\s*`).ReplaceAllString(line, "") + + for _, m := range mentions { + line = regexp.MustCompile(`\s*`+m+`\s*`).ReplaceAllString(line, "") + } + + for _, l := range labels { + line = regexp.MustCompile(`\s*\+`+l+`\s*`).ReplaceAllString(line, "") + } + return line +} + +func parseLabels(line string) []string { + var labels []string + + for _, groups := range labelsRegex.FindAllStringSubmatch(line, -1) { + if len(groups) < 2 { + log.Warn("only found 1 group in label regex", groups) + continue + } + + labels = append(labels, groups[1]) + } + + return labels +} + +func regexKind(rexp *regexp.Regexp) cmtKind { + if strings.Contains(rexp.String(), string(hash)) { + return hash + } else { + return slash + } +} diff --git a/pkg/parser/regex_test.go b/pkg/parser/regex_test.go new file mode 100644 index 0000000..647d579 --- /dev/null +++ b/pkg/parser/regex_test.go @@ -0,0 +1,116 @@ +package parser + +import ( + "github.com/stretchr/testify/require" + "github.com/while-loop/todo/pkg/tracker" + "regexp" + "strconv" + "testing" +) + +func TestRegexInit(t *testing.T) { + require.Contains(t, slashRegex.String(), `//.*todo`) + require.Contains(t, hashRegex.String(), `#.*todo`) +} + +func TestLangSorted(t *testing.T) { + tcs := []struct { + l1, l2 string + kind []string + lessThan bool + }{ + {"c", "go", slashLangs, true}, + {"java", "go", slashLangs, false}, + {"bash", "py", hashLangs, true}, + {"yml", "sh", hashLangs, false}, + } + + for i, tc := range tcs { + t.Run(strconv.Itoa(i), func(t *testing.T) { + i1 := getIdx(tc.kind, tc.l1) + i2 := getIdx(tc.kind, tc.l2) + if tc.lessThan { + require.True(t, i1 < i2) + } else { + require.True(t, i1 > i2) + } + }) + } +} + +func TestTODOs(t *testing.T) { + tcs := []struct { + comment string + rexp *regexp.Regexp + is tracker.Issue + }{ + {"// todo hello world", slashRegex, tracker.Issue{Title: "hello world"}}, + {"line of code", slashRegex, e}, + {"# todo(snake): impl python", hashRegex, tracker.Issue{Title: "impl python", Assignee: "snake"}}, + {`// fmt.Println("uh oh") todo(snake): eol comment`, slashRegex, tracker.Issue{Title: "eol comment", Assignee: "snake"}}, + {"// todo(while-loop): @dev1 finish tests +test +api", slashRegex, tracker.Issue{ // 4 + Title: "finish tests", + Labels: []string{"test", "api"}, + Mentions: []string{"@dev1"}, + Assignee: "while-loop", + }}, + {`// todo(while-loop): add ignore keyword to yml config (ParseFile will be a todoParser func)`, slashRegex, tracker.Issue{ //5 + Assignee: "while-loop", + Title: "add ignore keyword to yml config (ParseFile will be a todoParser func)", + }}, + {`// code snippet https://github.com/while-loop/todo/blob/cc6b554cccfd3598f6b6342d69c78abcbc5d0128/pkg/app.go#L17-L25`, slashRegex, e}, + {`// footer ###### This issue was generated by [todo](https://github.com/while-loop/todo) on behalf of %s.`, slashRegex, e}, + {`// foreach file, get all todos`, slashRegex, e}, //8 + } + + for idx, tt := range tcs { + t.Run(strconv.Itoa(idx), func(inner *testing.T) { + is, _ := parseLine(tt.rexp, tt.comment) + require.Equal(inner, tt.is, is) + }) + } +} + +func TestMentions(t *testing.T) { + tcs := []struct { + cmt string + mentions []string + }{ + {`// todo(while-loop): find all @user1 users in db @wh3n7hi5-ends-i_win`, []string{"@user1", "@wh3n7hi5-ends-i_win"}}, + {`// var 3 int todo find all`, nil}, + } + + for idx, tt := range tcs { + t.Run(strconv.Itoa(idx), func(t *testing.T) { + require.Equal(t, tt.mentions, mentionsRegex.FindAllString(tt.cmt, -1)) + }) + } +} + +func TestLabels(t *testing.T) { + tcs := []struct { + cmt string + labels []string + }{ + {`// todo(while-loop): find all +users users in db +api/users +wh3n7hi5-ends-i_win`, []string{"users", "api/users", "wh3n7hi5-ends-i_win"}}, + {`// var 3 int todo find all`, nil}, + } + + for idx, tt := range tcs { + t.Run(strconv.Itoa(idx), func(t *testing.T) { + require.Equal(t, tt.labels, parseLabels(tt.cmt)) + }) + } +} + +func getIdx(arr []string, item string) int { + + for i := 0; i < len(arr); i++ { + if arr[i] == item { + return i + } + } + return -1 +} + +var e = tracker.Issue{} diff --git a/pkg/tracker/github/github.go b/pkg/tracker/github/github.go index db533be..6f8f816 100644 --- a/pkg/tracker/github/github.go +++ b/pkg/tracker/github/github.go @@ -1,4 +1,4 @@ package github // code snippet https://github.com/while-loop/todo/blob/cc6b554cccfd3598f6b6342d69c78abcbc5d0128/pkg/app.go#L17-L25 -// footer ###### This issue was generated by [todo](https://github.com/while-loop/todo). +// footer ###### This issue was generated by [todo](https://github.com/while-loop/todo) on behalf of %s. diff --git a/pkg/tracker/manager.go b/pkg/tracker/manager.go index 76f352e..7127dc8 100644 --- a/pkg/tracker/manager.go +++ b/pkg/tracker/manager.go @@ -18,8 +18,10 @@ type Issue struct { Description string Assignee string Author string - Mentions string + Mentions []string Labels []string + File string + Line int } func (i *Issue) String() string { diff --git a/pkg/vcs/github/event_push.go b/pkg/vcs/github/event_push.go index d5cdd06..00be968 100644 --- a/pkg/vcs/github/event_push.go +++ b/pkg/vcs/github/event_push.go @@ -5,7 +5,6 @@ import ( "fmt" "github.com/luci/go-render/render" "github.com/while-loop/todo/pkg/log" - "github.com/while-loop/todo/pkg/parser" "net/http" "strings" "time" @@ -31,15 +30,20 @@ func (s *Service) handlePush(w http.ResponseWriter, r *http.Request) { // get changed files list for _, commit := range event.Commits { for _, fPath := range append(commit.Modified, commit.Added...) { + if event.Repository.FullName == "while-loop/todo" && strings.HasSuffix(fPath, "_test.go") { + // don't parse any test todos as issues to generate !todo + // todo add "exclude" (array) to vcs config struct. match regex + continue + } suspects = append(suspects, contentUrl(event.Repository.FullName, commit.ID, fPath)) } } // foreach file, get all todos - todos := parser.GetTodos(suspects...) + todos := s.parser.GetTodos(suspects...) fmt.Println(suspects) - // send todos to issuechan (tracker will handle reducing and filtering) + // send todos to issuechan (tracker will handle reducing and filtering) !todo log.Infof("found %d todos in github push %s", len(todos), event.HeadCommit.URL) go func() { for _, t := range todos { diff --git a/pkg/vcs/github/event_push_test.go b/pkg/vcs/github/event_push_test.go index 53cd692..ab6e441 100644 --- a/pkg/vcs/github/event_push_test.go +++ b/pkg/vcs/github/event_push_test.go @@ -16,4 +16,4 @@ func TestContentUrl(t *testing.T) { func TestFoundTodos(t *testing.T) { // TODO test when parser has been impl -} \ No newline at end of file +} diff --git a/pkg/vcs/github/github.go b/pkg/vcs/github/github.go index e5626a1..780085c 100644 --- a/pkg/vcs/github/github.go +++ b/pkg/vcs/github/github.go @@ -10,6 +10,7 @@ import ( "github.com/google/go-github/github" "github.com/gorilla/mux" "github.com/while-loop/todo/pkg/log" + "github.com/while-loop/todo/pkg/parser" "github.com/while-loop/todo/pkg/tracker" "github.com/while-loop/todo/pkg/vcs/config" "golang.org/x/oauth2" @@ -31,6 +32,7 @@ type Service struct { issueCh chan<- tracker.Issue eventHandlers map[string]http.HandlerFunc config *config.GithubConfig + parser parser.TodoParser } func NewService(config *config.GithubConfig, issueChan chan<- tracker.Issue) *Service { @@ -42,6 +44,7 @@ func NewService(config *config.GithubConfig, issueChan chan<- tracker.Issue) *Se ghClient: github.NewClient(oauthClient), eventHandlers: map[string]http.HandlerFunc{}, config: config, + parser: parser.New(), // todo(while-loop) add parser as param } s.eventHandlers[issues] = s.handleIssue