Skip to content

Commit

Permalink
Add partial support for setitem with fancy indexing (mars-project#2453)
Browse files Browse the repository at this point in the history
  • Loading branch information
hekaisheng authored and wjsi committed Oct 23, 2021
1 parent bc5f004 commit cf409e7
Show file tree
Hide file tree
Showing 4 changed files with 333 additions and 46 deletions.
7 changes: 6 additions & 1 deletion mars/services/scheduling/worker/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ async def _get_global_slot_ref(self):

async def _prepare_input_data(self, subtask: Subtask, band_name: str):
queries = []
shuffle_queries = []
storage_api = await StorageAPI.create(
subtask.session_id, address=self.address, band_name=band_name)
pure_dep_keys = set()
Expand All @@ -106,10 +107,14 @@ async def _prepare_input_data(self, subtask: Subtask, band_name: str):
queries.append(storage_api.fetch.delay(chunk.key, band_name=to_fetch_band))
elif isinstance(chunk.op, FetchShuffle):
for key in chunk_key_to_data_keys[chunk.key]:
queries.append(storage_api.fetch.delay(
shuffle_queries.append(storage_api.fetch.delay(
key, band_name=to_fetch_band, error='ignore'))
if queries:
await storage_api.fetch.batch(*queries)
if shuffle_queries:
# TODO(hks): The batch method doesn't accept different error arguments,
# combine them when it can.
await storage_api.fetch.batch(*shuffle_queries)

async def _collect_input_sizes(self,
subtask: Subtask,
Expand Down
22 changes: 16 additions & 6 deletions mars/services/subtask/worker/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,10 @@ async def _load_input_data(self):
accept_nones.append(True)
elif isinstance(chunk.op, FetchShuffle):
for key in self._chunk_key_to_data_keys[chunk.key]:
keys.append(key)
gets.append(self._storage_api.get.delay(key, error='ignore'))
accept_nones.append(False)
if key not in keys:
keys.append(key)
gets.append(self._storage_api.get.delay(key, error='ignore'))
accept_nones.append(False)
if keys:
logger.debug('Start getting input data, keys: %s, '
'subtask id: %s', keys, self.subtask.subtask_id)
Expand Down Expand Up @@ -207,19 +208,28 @@ def cb(fut):
if ref_counts[inp.key] == 0:
# ref count reaches 0, remove it
for key in self._chunk_key_to_data_keys[inp.key]:
del self._datastore[key]
if key in self._datastore:
del self._datastore[key]

async def _unpin_data(self, data_keys):
# unpin input keys
unpins = []
shuffle_unpins = []
for key in data_keys:
if isinstance(key, tuple):
# a tuple key means it's a shuffle key,
# some shuffle data is None and not stored in storage
unpins.append(self._storage_api.unpin.delay(key, error='ignore'))
shuffle_unpins.append(
self._storage_api.unpin.delay(key, error='ignore')
)
else:
unpins.append(self._storage_api.unpin.delay(key))
await self._storage_api.unpin.batch(*unpins)
if unpins:
await self._storage_api.unpin.batch(*unpins)
if shuffle_unpins:
# TODO(hks): The batch method doesn't accept different error arguments,
# combine them when it can.
await self._storage_api.unpin.batch(*shuffle_unpins)

async def _store_data(self, chunk_graph: ChunkGraph):
# skip virtual operands for result chunks
Expand Down
Loading

0 comments on commit cf409e7

Please sign in to comment.