Skip to content

Commit

Permalink
Merge pull request #1 from vespa-engine/tgm/move-code
Browse files Browse the repository at this point in the history
move pyvespa code from vespa-engine/vespa/python/vespa
  • Loading branch information
Thiago G. Martins committed Sep 8, 2020
2 parents 5148ae0 + fa9a2a7 commit 677d355
Show file tree
Hide file tree
Showing 33 changed files with 7,205 additions and 0 deletions.
142 changes: 142 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
*.bak
.gitattributes
.last_checked
.gitconfig
*.bak
*.log
*~
~*
_tmp*
tmp*
tags

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# dotenv
.env

# virtualenv
.venv
venv/
ENV/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

.vscode
*.swp

# osx generated files
.DS_Store
.DS_Store?
.Trashes
ehthumbs.db
Thumbs.db
.idea

# pytest
.pytest_cache

# tools/trust-doc-nbs
docs_src/.last_checked

# symlinks to fastai
docs_src/fastai
tools/fastai

# link checker
checklink/cookies.txt

# .gitconfig is now autogenerated
.gitconfig

Pipfile
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include README.md
recursive-exclude * __pycache__
20 changes: 20 additions & 0 deletions docs/sphinx/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build

# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
139 changes: 139 additions & 0 deletions docs/sphinx/source/application-package.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create Vespa application packages\n",
"\n",
"> Python API to create application packages"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Our goal is to create the following `msmarco` schema using our python API, based on our [text search tutorial](https://docs.vespa.ai/documentation/tutorials/text-search.html)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"schema msmarco {\n",
" document msmarco {\n",
" field id type string {\n",
" indexing: attribute | summary\n",
" }\n",
" field title type string {\n",
" indexing: index | summary\n",
" index: enable-bm25\n",
" }\n",
" field body type string {\n",
" indexing: index | summary\n",
" index: enable-bm25\n",
" }\n",
" }\n",
"\n",
" fieldset default {\n",
" fields: title, body\n",
" }\n",
"\n",
" rank-profile default {\n",
" first-phase {\n",
" expression: nativeRank(title, body)\n",
" }\n",
" }\n",
"\n",
" rank-profile bm25 inherits default {\n",
" first-phase {\n",
" expression: bm25(title) + bm25(body)\n",
" }\n",
" }\n",
"\n",
"}\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Schema API"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from vespa.package import Document, Field, Schema, FieldSet, RankProfile, ApplicationPackage\n",
"\n",
"document = Document(\n",
" fields=[\n",
" Field(name = \"id\", type = \"string\", indexing = [\"attribute\", \"summary\"]),\n",
" Field(name = \"title\", type = \"string\", indexing = [\"index\", \"summary\"], index = \"enable-bm25\"),\n",
" Field(name = \"body\", type = \"string\", indexing = [\"index\", \"summary\"], index = \"enable-bm25\") \n",
" ]\n",
")\n",
"\n",
"msmarco_schema = Schema(\n",
" name = \"msmarco\", \n",
" document = document, \n",
" fieldsets = [FieldSet(name = \"default\", fields = [\"title\", \"body\"])],\n",
" rank_profiles = [RankProfile(name = \"default\", first_phase = \"nativeRank(title, body)\")]\n",
")\n",
"\n",
"app_package = ApplicationPackage(name = \"msmarco\", schema=msmarco_schema)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Modify the application package"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can add a new rank profile:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"app_package.schema.add_rank_profile(\n",
" RankProfile(name = \"bm25\", inherits = \"default\", first_phase = \"bm25(title) + bm25(body)\")\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 677d355

Please sign in to comment.