Skip to content

Commit

Permalink
Add table github_repository_content (#317)
Browse files Browse the repository at this point in the history
Co-authored-by: aminvielledebat <107035817+aminvielledebatAtBedrock@users.noreply.github.com>
Co-authored-by: Ved misra <47312748+misraved@users.noreply.github.com>
  • Loading branch information
3 people committed May 17, 2024
1 parent 257960a commit 20986e7
Show file tree
Hide file tree
Showing 3 changed files with 313 additions and 0 deletions.
148 changes: 148 additions & 0 deletions docs/tables/github_repository_content.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
---
title: "Steampipe Table: github_repository_content - Query file and directory contents in GitHub repositories using SQL"
description: "Facilitates querying the contents of files and directories within GitHub repositories, offering insights into file paths, types, sizes, and more."
---

# Table: github_repository_content - Query File and Directory Contents in GitHub Repositories using SQL

The `github_repository_content` table is designed to fetch the contents of files or directories within a GitHub repository. It provides a detailed view of file paths, types, contents, sizes, and other related information.

## Table Usage Guide

To utilize this table effectively, specify the file path or directory within `repository_content_path`. If `repository_content_path` is not specified, the table will return the contents of the repository's root directory. This feature allows for comprehensive exploration of repository contents, from individual files to entire directories.

**Important Notes**
- It's mandatory to specify the `repository_full_name` (including the organization/user prefix) in the `where` or `join` clause when querying this table.
- To enhance performance, this table supports the optional qualifier `repository_content_path`. Queries that utilize this qualifier are optimized to efficiently retrieve file contents from a sub-directory within a repository.

## Examples

### List the root directory contents of a repository
This query is useful for obtaining an overview of the root directory of a specific repository, helping users quickly identify the initial set of files and directories it contains.

```sql+postgres
select
repository_full_name,
path,
content,
type,
size,
sha
from
github_repository_content
where
repository_full_name = 'github/docs';
```

```sql+sqlite
select
repository_full_name,
path,
content,
type,
size,
sha
from
github_repository_content
where
repository_full_name = 'github/docs';
```

### Get file contents under a folder in a repository
This enables retrieval of the file contents within a specific directory.

```sql+postgresql
select
repository_full_name,
name,
type,
path,
content
from
github_repository_content
where
repository_full_name = 'turbot/steampipe-plugin-aws'
and
repository_content_path = 'aws-tests/tests';
```

```sql+sqlite
select
repository_full_name,
name,
type,
path,
content
from
github_repository_content
where
repository_full_name = 'turbot/steampipe-plugin-aws'
and
repository_content_path = 'aws-tests/tests';
```

### List contents of a specific directory within a repository
This query facilitates a deeper inspection of a specific directory within a repository, enabling users to understand its structure and the types of files it contains.

```sql+postgres
select
repository_full_name,
path,
content,
type,
size,
sha
from
github_repository_content
where
repository_full_name = 'github/docs'
and repository_content_path = 'docs';
```

```sql+sqlite
select
repository_full_name,
path,
content,
type,
size,
sha
from
github_repository_content
where
repository_full_name = 'github/docs'
and repository_content_path = 'docs';
```

### Retrieve a specific file within a repository
Targeting a specific file within a repository, this query is particularly useful for extracting detailed information about a file, such as its content, type, and size, which is essential for analysis or integration purposes.

```sql+postgres
select
repository_full_name,
path,
type,
size,
sha,
content
from
github_repository_content
where
repository_full_name = 'github/docs'
and repository_content_path = '.vscode/settings.json';
```

```sql+sqlite
select
repository_full_name,
path,
type,
size,
sha,
content
from
github_repository_content
where
repository_full_name = 'github/docs'
and repository_content_path = '.vscode/settings.json';
```
1 change: 1 addition & 0 deletions github/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ func Plugin(ctx context.Context) *plugin.Plugin {
"github_release": tableGitHubRelease(),
"github_repository": tableGitHubRepository(),
"github_repository_collaborator": tableGitHubRepositoryCollaborator(),
"github_repository_content": tableGitHubRepositoryContent(),
"github_repository_dependabot_alert": tableGitHubRepositoryDependabotAlert(),
"github_repository_deployment": tableGitHubRepositoryDeployment(),
"github_repository_environment": tableGitHubRepositoryEnvironment(),
Expand Down
164 changes: 164 additions & 0 deletions github/table_github_repository_content.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package github

import (
"context"

"github.com/shurcooL/githubv4"
"github.com/turbot/steampipe-plugin-github/github/models"
"github.com/turbot/steampipe-plugin-sdk/v5/grpc/proto"
"github.com/turbot/steampipe-plugin-sdk/v5/plugin"
"github.com/turbot/steampipe-plugin-sdk/v5/plugin/transform"
)

//// TABLE DEFINITION

func tableGitHubRepositoryContent() *plugin.Table {
return &plugin.Table{
Name: "github_repository_content",
Description: "List the content in a repository (list directory, or get file content",
List: &plugin.ListConfig{
Hydrate: tableGitHubRepositoryContentList,
ShouldIgnoreError: isNotFoundError([]string{"404"}),
KeyColumns: []*plugin.KeyColumn{
{Name: "repository_full_name", Require: plugin.Required},
{Name: "repository_content_path", Require: plugin.Optional, CacheMatch: "exact"},
},
},
Columns: []*plugin.Column{
{Name: "repository_full_name", Description: "The full name of the repository (login/repo-name).", Type: proto.ColumnType_STRING, Transform: transform.FromQual("repository_full_name")},
{Name: "type", Description: "The file type (directory or file).", Type: proto.ColumnType_STRING},
{Name: "name", Description: "The file name.", Type: proto.ColumnType_STRING},
{Name: "oid", Description: "The Git object ID.", Type: proto.ColumnType_STRING},
{Name: "abbreviated_oid", Description: "An abbreviated version of the Git object ID.", Type: proto.ColumnType_STRING},
{Name: "repository_content_path", Description: "The requested path in repository search.", Type: proto.ColumnType_STRING, Transform: transform.FromQual("repository_content_path")},
{Name: "path", Description: "The path of the file.", Type: proto.ColumnType_STRING},
{Name: "path_raw", Description: "A Base64-encoded representation of the file's path.", Type: proto.ColumnType_STRING},
{Name: "mode", Description: "The mode of the file.", Type: proto.ColumnType_INT},
{Name: "size", Description: "The size of the file (in KB).", Type: proto.ColumnType_INT},
{Name: "line_count", Description: "The number of lines available in the file.", Type: proto.ColumnType_INT},
{Name: "content", Description: "The decoded file content (if the element is a file).", Type: proto.ColumnType_STRING},
{Name: "is_generated", Description: "Whether or not this tree entry is generated.", Type: proto.ColumnType_BOOL},
{Name: "is_binary", Description: "Indicates whether the Blob is binary or text.", Type: proto.ColumnType_BOOL},
{Name: "commit_url", Description: "Git URL (with SHA) of the file.", Type: proto.ColumnType_STRING},
},
}
}

type ContentInfo struct {
Oid string
AbbreviatedOid string
Name string
Mode int
PathRaw string
IsGenerated bool
Path string
Size int
LineCount int
Type string
Content string
CommitUrl string
IsBinary bool
}

//// LIST FUNCTION

func tableGitHubRepositoryContentList(ctx context.Context, d *plugin.QueryData, h *plugin.HydrateData) (interface{}, error) {
owner, repo := parseRepoFullName(d.EqualsQualString("repository_full_name"))
var filterPath string
if d.EqualsQualString("repository_content_path") != "" {
filterPath = d.EqualsQualString("repository_content_path")
}

err := getFileContents(ctx, d, h, owner, repo, filterPath)
if err != nil {
plugin.Logger(ctx).Error("github_repository_content.tableGitHubRepositoryContentList", "error", err)
return nil, err
}

return nil, nil
}

func getFileContents(ctx context.Context, d *plugin.QueryData, h *plugin.HydrateData, owner string, repo string, filterPath string) error {
var query struct {
RateLimit models.RateLimit
Repository struct {
Object struct {
Tree struct {
Oid githubv4.String
AbbreviatedOid githubv4.String
Entries []struct {
Name githubv4.String
Path githubv4.String
Size githubv4.Int
LineCount githubv4.Int
Mode githubv4.Int
PathRaw githubv4.String
IsGenerated githubv4.Boolean
Type githubv4.String
Object struct {
Blob struct {
Oid githubv4.String
AbbreviatedOid githubv4.String
Text githubv4.String
IsBinary githubv4.Boolean
CommitUrl githubv4.String
} `graphql:"... on Blob"`
}
}
} `graphql:"... on Tree"`
} `graphql:"object(expression: $expression)"`
} `graphql:"repository(owner: $owner, name: $repo)"`
}

variables := map[string]interface{}{
"owner": githubv4.String(owner),
"repo": githubv4.String(repo),
"expression": githubv4.String("HEAD:" + filterPath),
}

client := connectV4(ctx, d)
listPage := func(ctx context.Context, d *plugin.QueryData, h *plugin.HydrateData) (interface{}, error) {
return nil, client.Query(ctx, &query, variables)
}

_, err := plugin.RetryHydrate(ctx, d, h, listPage, retryConfig())
if err != nil {
plugin.Logger(ctx).Error("github_repository_content.getFileContents", "api_error", err, "repository", repo)
return err
}

for _, data := range query.Repository.Object.Tree.Entries {
if string(data.Type) != "tree" {
c := ContentInfo{
Oid: string(data.Object.Blob.Oid),
AbbreviatedOid: string(data.Object.Blob.AbbreviatedOid),
Name: string(data.Name),
Mode: int(data.Mode),
PathRaw: string(data.PathRaw),
IsGenerated: bool(data.IsGenerated),
Path: string(data.Path),
Size: int(data.Size),
LineCount: int(data.LineCount),
Type: string(data.Type),
Content: string(data.Object.Blob.Text),
IsBinary: bool(data.Object.Blob.IsBinary),
CommitUrl: string(data.Object.Blob.CommitUrl),
}
d.StreamListItem(ctx, c)

// Context can be cancelled due to manual cancellation or the limit has been hit
if d.RowsRemaining(ctx) == 0 {
return nil
}
}
if string(data.Type) == "tree" {
err := getFileContents(ctx, d, h, owner, repo, string(data.Path))
if err != nil {
plugin.Logger(ctx).Error("github_repository_content.getFileContents", "recurssive_api_error", err)
return err
}
}
}

return nil
}

0 comments on commit 20986e7

Please sign in to comment.