In [1]:
import pandas as pd

import lakefs_client
from lakefs_client import models
from lakefs_client.client import LakeFSClient

## lakeFS Python API

In [2]:
# lakeFS credentials and endpoint
configuration = lakefs_client.Configuration()
configuration.username = 'AKIA****************'
configuration.password = 'GJXW******************************'
configuration.host = 'http://127.0.0.1:8000'

client = LakeFSClient(configuration)

In [19]:
# create repo
repo = models.RepositoryCreation(name='movie-repo', storage_namespace='s3://movie-s3bucket', default_branch='main')
client.repositories.create_repository(repo)

In [5]:
# list branches
client.branches.list_branches('movie-repo')

{'pagination': {'has_more': False,
                'max_per_page': 1000,
                'next_offset': '',
                'results': 2},
 'results': [{'commit_id': 'd9ad30aa37927f17d7702f7058833e1e348ca559c411148b8d86b5222ff81fd5',
              'id': 'dev'},
             {'commit_id': 'd9ad30aa37927f17d7702f7058833e1e348ca559c411148b8d86b5222ff81fd5',
              'id': 'main'}]}

In [None]:
# create a new branch
client.branches.create_branch(repository='movie-repo', 
                              branch_creation=models.BranchCreation(name='dev', source='main'))


## Movie Use Case

In [7]:
# new branch
client.branches.create_branch(repository='movie-repo', 
                              branch_creation=models.BranchCreation(name='dev', source='main'))


'b7eed68d29a04bc9d3a78892213de59ffda95f4907789a94085dff06b00c91aa'

In [9]:
# read data
raw_info = client.objects.get_object(repository='movie-repo', 
                                 ref='dev', 
                                 path='movie_info_origin.csv')
df = pd.read_csv(raw_info)

In [11]:
movies_new = df[["movie_id", "title", "budget", "genres", "original_language", "popularity",
                     "release_date", "revenue", "runtime"]]

movies_new["genre"] = movies_new["genres"].apply(lambda x: x.split("|")[0].split(":")[1])

In [12]:
import io
stream = io.StringIO()
movies_new[["movie_id", "title", "original_language", "release_date", "runtime", "genre"]].to_csv(stream, index=False)

In [13]:
stream.seek(0)
client.objects.upload_object(repository='movie-repo', 
                         branch='dev', 
                         path='movie_feature.csv', 
                         content=stream)

{'checksum': '3ea40165be6cb9c18d06461282241b6a',
 'content_type': 'application/octet-stream',
 'mtime': 1680191269,
 'path': 'movie_feature.csv',
 'path_type': 'object',
 'physical_address': 's3://movie-s3bucket/data/gokl7n5n47dc73cfshvg/cgiqu95n47dc73cfsi00',
 'size_bytes': 1731335}

In [14]:
# commit
client.commits.commit(
    repository='movie-repo',
    branch='dev',
    commit_creation=models.CommitCreation(message='Update the movie feature!', metadata={'using': 'python_api'}))

{'committer': 'movie_admin',
 'creation_date': 1680191347,
 'id': '55f4281666e592ae61daa6de91faa69f7ecd91af17d7c61377afe92777dbba4a',
 'message': 'Update the movie feature!',
 'meta_range_id': '',
 'metadata': {'using': 'python_api'},
 'parents': ['b7eed68d29a04bc9d3a78892213de59ffda95f4907789a94085dff06b00c91aa']}

In [15]:
# diff from main
client.refs.diff_refs(repository='movie-repo', left_ref='main', right_ref='dev').results

[{'path': 'movie_feature.csv',
  'path_type': 'object',
  'size_bytes': 1731335,
  'type': 'changed'}]

In [16]:
# merge
client.refs.merge_into_branch(repository='movie-repo', 
                              source_ref='dev', 
                              destination_branch='main')

{'reference': '9045a98e7317237b36e9e9ccd3ae284db49c1e0516ce6f2e901a4131ebab1fd4',
 'summary': {'added': 0, 'changed': 0, 'conflict': 0, 'removed': 0}}

In [17]:
# delete branch
client.branches.delete_branch(repository='movie-repo', branch="dev")

In [18]:
# list branches
client.branches.list_branches('movie-repo')

{'pagination': {'has_more': False,
                'max_per_page': 1000,
                'next_offset': '',
                'results': 1},
 'results': [{'commit_id': '9045a98e7317237b36e9e9ccd3ae284db49c1e0516ce6f2e901a4131ebab1fd4',
              'id': 'main'}]}