-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add table github_repository_content (#317)
Co-authored-by: aminvielledebat <107035817+aminvielledebatAtBedrock@users.noreply.github.com> Co-authored-by: Ved misra <47312748+misraved@users.noreply.github.com>
- Loading branch information
1 parent
257960a
commit 20986e7
Showing
3 changed files
with
313 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
--- | ||
title: "Steampipe Table: github_repository_content - Query file and directory contents in GitHub repositories using SQL" | ||
description: "Facilitates querying the contents of files and directories within GitHub repositories, offering insights into file paths, types, sizes, and more." | ||
--- | ||
|
||
# Table: github_repository_content - Query File and Directory Contents in GitHub Repositories using SQL | ||
|
||
The `github_repository_content` table is designed to fetch the contents of files or directories within a GitHub repository. It provides a detailed view of file paths, types, contents, sizes, and other related information. | ||
|
||
## Table Usage Guide | ||
|
||
To utilize this table effectively, specify the file path or directory within `repository_content_path`. If `repository_content_path` is not specified, the table will return the contents of the repository's root directory. This feature allows for comprehensive exploration of repository contents, from individual files to entire directories. | ||
|
||
**Important Notes** | ||
- It's mandatory to specify the `repository_full_name` (including the organization/user prefix) in the `where` or `join` clause when querying this table. | ||
- To enhance performance, this table supports the optional qualifier `repository_content_path`. Queries that utilize this qualifier are optimized to efficiently retrieve file contents from a sub-directory within a repository. | ||
|
||
## Examples | ||
|
||
### List the root directory contents of a repository | ||
This query is useful for obtaining an overview of the root directory of a specific repository, helping users quickly identify the initial set of files and directories it contains. | ||
|
||
```sql+postgres | ||
select | ||
repository_full_name, | ||
path, | ||
content, | ||
type, | ||
size, | ||
sha | ||
from | ||
github_repository_content | ||
where | ||
repository_full_name = 'github/docs'; | ||
``` | ||
|
||
```sql+sqlite | ||
select | ||
repository_full_name, | ||
path, | ||
content, | ||
type, | ||
size, | ||
sha | ||
from | ||
github_repository_content | ||
where | ||
repository_full_name = 'github/docs'; | ||
``` | ||
|
||
### Get file contents under a folder in a repository | ||
This enables retrieval of the file contents within a specific directory. | ||
|
||
```sql+postgresql | ||
select | ||
repository_full_name, | ||
name, | ||
type, | ||
path, | ||
content | ||
from | ||
github_repository_content | ||
where | ||
repository_full_name = 'turbot/steampipe-plugin-aws' | ||
and | ||
repository_content_path = 'aws-tests/tests'; | ||
``` | ||
|
||
```sql+sqlite | ||
select | ||
repository_full_name, | ||
name, | ||
type, | ||
path, | ||
content | ||
from | ||
github_repository_content | ||
where | ||
repository_full_name = 'turbot/steampipe-plugin-aws' | ||
and | ||
repository_content_path = 'aws-tests/tests'; | ||
``` | ||
|
||
### List contents of a specific directory within a repository | ||
This query facilitates a deeper inspection of a specific directory within a repository, enabling users to understand its structure and the types of files it contains. | ||
|
||
```sql+postgres | ||
select | ||
repository_full_name, | ||
path, | ||
content, | ||
type, | ||
size, | ||
sha | ||
from | ||
github_repository_content | ||
where | ||
repository_full_name = 'github/docs' | ||
and repository_content_path = 'docs'; | ||
``` | ||
|
||
```sql+sqlite | ||
select | ||
repository_full_name, | ||
path, | ||
content, | ||
type, | ||
size, | ||
sha | ||
from | ||
github_repository_content | ||
where | ||
repository_full_name = 'github/docs' | ||
and repository_content_path = 'docs'; | ||
``` | ||
|
||
### Retrieve a specific file within a repository | ||
Targeting a specific file within a repository, this query is particularly useful for extracting detailed information about a file, such as its content, type, and size, which is essential for analysis or integration purposes. | ||
|
||
```sql+postgres | ||
select | ||
repository_full_name, | ||
path, | ||
type, | ||
size, | ||
sha, | ||
content | ||
from | ||
github_repository_content | ||
where | ||
repository_full_name = 'github/docs' | ||
and repository_content_path = '.vscode/settings.json'; | ||
``` | ||
|
||
```sql+sqlite | ||
select | ||
repository_full_name, | ||
path, | ||
type, | ||
size, | ||
sha, | ||
content | ||
from | ||
github_repository_content | ||
where | ||
repository_full_name = 'github/docs' | ||
and repository_content_path = '.vscode/settings.json'; | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
package github | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/shurcooL/githubv4" | ||
"github.com/turbot/steampipe-plugin-github/github/models" | ||
"github.com/turbot/steampipe-plugin-sdk/v5/grpc/proto" | ||
"github.com/turbot/steampipe-plugin-sdk/v5/plugin" | ||
"github.com/turbot/steampipe-plugin-sdk/v5/plugin/transform" | ||
) | ||
|
||
//// TABLE DEFINITION | ||
|
||
func tableGitHubRepositoryContent() *plugin.Table { | ||
return &plugin.Table{ | ||
Name: "github_repository_content", | ||
Description: "List the content in a repository (list directory, or get file content", | ||
List: &plugin.ListConfig{ | ||
Hydrate: tableGitHubRepositoryContentList, | ||
ShouldIgnoreError: isNotFoundError([]string{"404"}), | ||
KeyColumns: []*plugin.KeyColumn{ | ||
{Name: "repository_full_name", Require: plugin.Required}, | ||
{Name: "repository_content_path", Require: plugin.Optional, CacheMatch: "exact"}, | ||
}, | ||
}, | ||
Columns: []*plugin.Column{ | ||
{Name: "repository_full_name", Description: "The full name of the repository (login/repo-name).", Type: proto.ColumnType_STRING, Transform: transform.FromQual("repository_full_name")}, | ||
{Name: "type", Description: "The file type (directory or file).", Type: proto.ColumnType_STRING}, | ||
{Name: "name", Description: "The file name.", Type: proto.ColumnType_STRING}, | ||
{Name: "oid", Description: "The Git object ID.", Type: proto.ColumnType_STRING}, | ||
{Name: "abbreviated_oid", Description: "An abbreviated version of the Git object ID.", Type: proto.ColumnType_STRING}, | ||
{Name: "repository_content_path", Description: "The requested path in repository search.", Type: proto.ColumnType_STRING, Transform: transform.FromQual("repository_content_path")}, | ||
{Name: "path", Description: "The path of the file.", Type: proto.ColumnType_STRING}, | ||
{Name: "path_raw", Description: "A Base64-encoded representation of the file's path.", Type: proto.ColumnType_STRING}, | ||
{Name: "mode", Description: "The mode of the file.", Type: proto.ColumnType_INT}, | ||
{Name: "size", Description: "The size of the file (in KB).", Type: proto.ColumnType_INT}, | ||
{Name: "line_count", Description: "The number of lines available in the file.", Type: proto.ColumnType_INT}, | ||
{Name: "content", Description: "The decoded file content (if the element is a file).", Type: proto.ColumnType_STRING}, | ||
{Name: "is_generated", Description: "Whether or not this tree entry is generated.", Type: proto.ColumnType_BOOL}, | ||
{Name: "is_binary", Description: "Indicates whether the Blob is binary or text.", Type: proto.ColumnType_BOOL}, | ||
{Name: "commit_url", Description: "Git URL (with SHA) of the file.", Type: proto.ColumnType_STRING}, | ||
}, | ||
} | ||
} | ||
|
||
type ContentInfo struct { | ||
Oid string | ||
AbbreviatedOid string | ||
Name string | ||
Mode int | ||
PathRaw string | ||
IsGenerated bool | ||
Path string | ||
Size int | ||
LineCount int | ||
Type string | ||
Content string | ||
CommitUrl string | ||
IsBinary bool | ||
} | ||
|
||
//// LIST FUNCTION | ||
|
||
func tableGitHubRepositoryContentList(ctx context.Context, d *plugin.QueryData, h *plugin.HydrateData) (interface{}, error) { | ||
owner, repo := parseRepoFullName(d.EqualsQualString("repository_full_name")) | ||
var filterPath string | ||
if d.EqualsQualString("repository_content_path") != "" { | ||
filterPath = d.EqualsQualString("repository_content_path") | ||
} | ||
|
||
err := getFileContents(ctx, d, h, owner, repo, filterPath) | ||
if err != nil { | ||
plugin.Logger(ctx).Error("github_repository_content.tableGitHubRepositoryContentList", "error", err) | ||
return nil, err | ||
} | ||
|
||
return nil, nil | ||
} | ||
|
||
func getFileContents(ctx context.Context, d *plugin.QueryData, h *plugin.HydrateData, owner string, repo string, filterPath string) error { | ||
var query struct { | ||
RateLimit models.RateLimit | ||
Repository struct { | ||
Object struct { | ||
Tree struct { | ||
Oid githubv4.String | ||
AbbreviatedOid githubv4.String | ||
Entries []struct { | ||
Name githubv4.String | ||
Path githubv4.String | ||
Size githubv4.Int | ||
LineCount githubv4.Int | ||
Mode githubv4.Int | ||
PathRaw githubv4.String | ||
IsGenerated githubv4.Boolean | ||
Type githubv4.String | ||
Object struct { | ||
Blob struct { | ||
Oid githubv4.String | ||
AbbreviatedOid githubv4.String | ||
Text githubv4.String | ||
IsBinary githubv4.Boolean | ||
CommitUrl githubv4.String | ||
} `graphql:"... on Blob"` | ||
} | ||
} | ||
} `graphql:"... on Tree"` | ||
} `graphql:"object(expression: $expression)"` | ||
} `graphql:"repository(owner: $owner, name: $repo)"` | ||
} | ||
|
||
variables := map[string]interface{}{ | ||
"owner": githubv4.String(owner), | ||
"repo": githubv4.String(repo), | ||
"expression": githubv4.String("HEAD:" + filterPath), | ||
} | ||
|
||
client := connectV4(ctx, d) | ||
listPage := func(ctx context.Context, d *plugin.QueryData, h *plugin.HydrateData) (interface{}, error) { | ||
return nil, client.Query(ctx, &query, variables) | ||
} | ||
|
||
_, err := plugin.RetryHydrate(ctx, d, h, listPage, retryConfig()) | ||
if err != nil { | ||
plugin.Logger(ctx).Error("github_repository_content.getFileContents", "api_error", err, "repository", repo) | ||
return err | ||
} | ||
|
||
for _, data := range query.Repository.Object.Tree.Entries { | ||
if string(data.Type) != "tree" { | ||
c := ContentInfo{ | ||
Oid: string(data.Object.Blob.Oid), | ||
AbbreviatedOid: string(data.Object.Blob.AbbreviatedOid), | ||
Name: string(data.Name), | ||
Mode: int(data.Mode), | ||
PathRaw: string(data.PathRaw), | ||
IsGenerated: bool(data.IsGenerated), | ||
Path: string(data.Path), | ||
Size: int(data.Size), | ||
LineCount: int(data.LineCount), | ||
Type: string(data.Type), | ||
Content: string(data.Object.Blob.Text), | ||
IsBinary: bool(data.Object.Blob.IsBinary), | ||
CommitUrl: string(data.Object.Blob.CommitUrl), | ||
} | ||
d.StreamListItem(ctx, c) | ||
|
||
// Context can be cancelled due to manual cancellation or the limit has been hit | ||
if d.RowsRemaining(ctx) == 0 { | ||
return nil | ||
} | ||
} | ||
if string(data.Type) == "tree" { | ||
err := getFileContents(ctx, d, h, owner, repo, string(data.Path)) | ||
if err != nil { | ||
plugin.Logger(ctx).Error("github_repository_content.getFileContents", "recurssive_api_error", err) | ||
return err | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} |