Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b79f854
backend: make crawlconfigs mutable! (#656)
ikreymer Mar 1, 2023
a6692bb
reformat
ikreymer Mar 1, 2023
d9b2ce0
fix setting scale, format
ikreymer Mar 1, 2023
a67921b
add revision tracking to crawlconfig:
ikreymer Mar 2, 2023
c7b7140
lint fix
ikreymer Mar 2, 2023
63203f7
crawlconfigs: only add revision history if crawl data changed, not me…
ikreymer Mar 2, 2023
54dfaef
backend: ensure crawl job 'config' is updated when exclusions are add…
ikreymer Mar 2, 2023
eeaabf6
Add migration for mutable crawl configs
tw4l Mar 2, 2023
bdb0334
Add crawlTimeout to UpdateCrawlConfig and add test
tw4l Mar 3, 2023
b0101b8
Use new crawl config update API (#672)
SuaYoo Mar 6, 2023
7bcb0eb
WIP: Rename userid fields to createdBy and modifiedBy
tw4l Mar 6, 2023
d1348bf
Linting fixups
tw4l Mar 6, 2023
2175175
migration: skip invalid crawls (missing config), make createdBy optio…
ikreymer Mar 7, 2023
42eeade
lint fix
ikreymer Mar 7, 2023
cf60226
more userid -> modifiedBy renamings
ikreymer Mar 7, 2023
4bee248
Update crawl config keys with new API (#681)
SuaYoo Mar 7, 2023
a506342
Remove oldId
tw4l Mar 7, 2023
7896db0
Remove new_id from response
tw4l Mar 7, 2023
b32824c
Add missing return to fix /run response
tw4l Mar 7, 2023
66b8645
update config object
SuaYoo Mar 8, 2023
140a83b
update crawlTimeout type
SuaYoo Mar 8, 2023
26a0c79
update crawlTimeout in initial crawl config
SuaYoo Mar 8, 2023
1c30cb8
backend:
ikreymer Mar 8, 2023
2e68ed1
don't force migration
ikreymer Mar 8, 2023
d3178b5
add revision if profileid is changed
ikreymer Mar 8, 2023
96f6168
replace config
SuaYoo Mar 8, 2023
2c46ee1
fix updating endpoint:
ikreymer Mar 8, 2023
d48de62
ensure profileid == "" and null are equivalent when checking if changed
ikreymer Mar 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 30 additions & 25 deletions backend/btrixcloud/crawl_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from .db import init_db
from .crawls import Crawl, CrawlFile, CrawlCompleteIn, dt_now
from .crawlconfigs import CrawlConfig


# Seconds before allowing another shutdown attempt
Expand Down Expand Up @@ -46,6 +47,8 @@ def __init__(self):
self.cid = uuid.UUID(os.environ["CRAWL_CONFIG_ID"])
self.userid = uuid.UUID(os.environ["USER_ID"])

self.rev = int(os.environ["REV"])

self.is_manual = os.environ.get("RUN_MANUAL") == "1"
self.tags = os.environ.get("TAGS", "").split(",")

Expand Down Expand Up @@ -79,15 +82,23 @@ def __init__(self):
async def async_init(self, template, params):
"""async init for k8s job"""
crawl = await self._get_crawl()
crawlconfig = None

self.scale = await self.load_initial_scale(crawl)
try:
result = await self.crawl_configs.find_one({"_id": self.cid})
crawlconfig = CrawlConfig.from_dict(result)
self.scale = self._get_crawl_scale(crawl) or crawlconfig.scale

# pylint: disable=broad-except
except Exception as exc:
print(exc)

# if doesn't exist, create, using scale from config
if not crawl:
params["scale"] = self.scale
await self.init_job_objects(template, params)

await self.init_crawl()
await self.init_crawl(crawlconfig)
prev_start_time = None

retry = 3
Expand Down Expand Up @@ -249,10 +260,10 @@ async def update_crawl(self, **kwargs):
"""update crawl state, and optionally mark as finished"""
await self.crawls.find_one_and_update({"_id": self.job_id}, {"$set": kwargs})

async def init_crawl(self):
async def init_crawl(self, crawlconfig):
"""create crawl, doesn't exist, mark as starting"""
try:
crawl = self._make_crawl("starting", self.scale)
crawl = self._make_crawl("starting", self.scale, crawlconfig)
await self.crawls.insert_one(crawl.to_dict())
except pymongo.errors.DuplicateKeyError:
await self.update_crawl(state="starting", scale=self.scale)
Expand Down Expand Up @@ -321,27 +332,17 @@ async def cancel(self):

return {"success": True}

# pylint: disable=unused-argument
async def load_initial_scale(self, crawl=None):
"""load scale from config or crawl object if not set"""
if self.scale:
return self.scale

try:
result = await self.crawl_configs.find_one(
{"_id": self.cid}, {"scale": True}
)
return result["scale"]
# pylint: disable=broad-except
except Exception as exc:
print(exc)
return 1

def _make_crawl(self, state, scale):
def _make_crawl(self, state, scale, crawlconfig):
"""Create crawl object for partial or fully complete crawl"""
return Crawl(
id=self.job_id,
state=state,
config=crawlconfig.config,
jobType=crawlconfig.jobType,
profileid=crawlconfig.profileid,
cid_rev=crawlconfig.rev,
schedule=crawlconfig.schedule,
crawlTimeout=crawlconfig.crawlTimeout,
userid=self.userid,
oid=self.oid,
cid=self.cid,
Expand Down Expand Up @@ -383,9 +384,9 @@ async def cancel():
async def healthz():
return {}

@app.post("/change_config/{cid}")
async def change_config(cid: str):
return await self._change_crawl_config(cid)
@app.post("/rollover")
async def restart():
return await self._rollover_restart()

@abstractmethod
async def init_job_objects(self, template, params):
Expand All @@ -399,6 +400,10 @@ async def delete_job_objects(self, job_id):
async def _get_crawl(self):
"""get runnable object representing this crawl"""

@abstractmethod
def _get_crawl_scale(self, crawl):
"""get scale from crawl, if any"""

@abstractmethod
async def _do_scale(self, new_scale):
"""set number of replicas"""
Expand All @@ -408,7 +413,7 @@ async def _send_shutdown_signal(self, signame):
"""gracefully shutdown crawl"""

@abstractmethod
async def _change_crawl_config(self, cid):
async def _rollover_restart(self):
"""change crawl config for this crawl"""

@property
Expand Down
Loading