Skip to content

Commit

Permalink
[Reclaim buffer] [Mellanox] Db migrator support reclaiming reserved b…
Browse files Browse the repository at this point in the history
…uffer for unused ports (sonic-net#1822)

Signed-off-by: Stephen Sun stephens@nvidia.com

What I did
Db migrator support reclaiming reserved buffer for unused ports

How I did it
For admin down ports, if the buffer objects configuration aligns with default configuration, set the buffer objects configuration as:

Dynamic model: all normal buffer objects are configured on admin down ports. Buffer manager will apply zero profiles on admin down ports.
Static model: zero buffer objects are configured on admin down ports.

How to verify it
Unit test.
Manually test.
  • Loading branch information
stephenxs committed Nov 29, 2021
1 parent 30e4654 commit 67a267b
Show file tree
Hide file tree
Showing 194 changed files with 16,815 additions and 4,293 deletions.
16 changes: 13 additions & 3 deletions scripts/db_migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self, namespace, socket=None):
none-zero values.
build: sequentially increase within a minor version domain.
"""
self.CURRENT_VERSION = 'version_2_0_3'
self.CURRENT_VERSION = 'version_2_0_4'

self.TABLE_NAME = 'VERSIONS'
self.TABLE_KEY = 'DATABASE'
Expand Down Expand Up @@ -76,7 +76,7 @@ def __init__(self, namespace, socket=None):

if asic_type == "mellanox":
from mellanox_buffer_migrator import MellanoxBufferMigrator
self.mellanox_buffer_migrator = MellanoxBufferMigrator(self.configDB)
self.mellanox_buffer_migrator = MellanoxBufferMigrator(self.configDB, self.appDB, self.stateDB)

def migrate_pfc_wd_table(self):
'''
Expand Down Expand Up @@ -617,9 +617,19 @@ def version_2_0_2(self):

def version_2_0_3(self):
"""
Current latest version. Nothing to do here.
Version 2_0_3
"""
log.log_info('Handling version_2_0_3')
if self.asic_type == "mellanox":
self.mellanox_buffer_migrator.mlnx_reclaiming_unused_buffer()
self.set_version('version_2_0_4')
return 'version_2_0_4'

def version_2_0_4(self):
"""
Current latest version. Nothing to do here.
"""
log.log_info('Handling version_2_0_4')
return None

def get_version(self):
Expand Down
261 changes: 260 additions & 1 deletion scripts/mellanox_buffer_migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,18 @@
Not providing it means no buffer profile migration required.
"""
from sonic_py_common import logger
import re

SYSLOG_IDENTIFIER = 'mellanox_buffer_migrator'

# Global logger instance
log = logger.Logger(SYSLOG_IDENTIFIER)

class MellanoxBufferMigrator():
def __init__(self, configDB):
def __init__(self, configDB, appDB, stateDB):
self.configDB = configDB
self.appDB = appDB
self.stateDB = stateDB

self.platform = None
self.sku = None
Expand Down Expand Up @@ -834,3 +837,259 @@ def mlnx_flush_new_buffer_configuration(self):

def mlnx_is_buffer_model_dynamic(self):
return self.is_buffer_config_default and not self.is_msft_sku

def mlnx_reorganize_buffer_tables(self, buffer_table, name):
"""
This is to reorganize the BUFFER_PG and BUFFER_QUEUE tables from single tier index to double tiers index.
Originally, the index is like <port>|<ids>. However, we need to check all the items with respect to a port,
which requires two tiers index, <port> and then <ids>
Eg.
Before reorganize:
{
"Ethernet0|0": {"profile" : "ingress_lossy_profile"},
"Ethernet0|3-4": {"profile": "pg_lossless_100000_5m_profile"},
"Ethernet4|0": {"profile" : "ingress_lossy_profile"},
"Ethernet4|3-4": {"profile": "pg_lossless_50000_5m_profile"}
}
After reorganize:
{
"Ethernet0": {
"0": {"profile" : "ingress_lossy_profile"},
"3-4": {"profile": "pg_lossless_100000_5m_profile"}
},
"Ethernet4": {
"0": {"profile" : "ingress_lossy_profile"},
"3-4": {"profile": "pg_lossless_50000_5m_profile"}
}
}
"""
result = {}
for key, item in buffer_table.items():
if len(key) != 2:
log.log_error('Table {} contains invalid key {}, skip this item'.format(name, key))
continue
port, ids = key
if not port in result:
result[port] = {}
result[port][ids] = item

return result

def mlnx_reclaiming_unused_buffer(self):
cable_length_key = self.configDB.get_keys('CABLE_LENGTH')
if not cable_length_key:
log.log_notice("No cable length table defined, do not migrate buffer objects for reclaiming buffer")
return;

log.log_info("Migrate buffer objects for reclaiming buffer based on 'CABLE_LENGTH|{}'".format(cable_length_key[0]))

device_metadata = self.configDB.get_entry('DEVICE_METADATA', 'localhost')
is_dynamic = (device_metadata.get('buffer_model') == 'dynamic')

port_table = self.configDB.get_table('PORT')
buffer_pool_table = self.configDB.get_table('BUFFER_POOL')
buffer_profile_table = self.configDB.get_table('BUFFER_PROFILE')
buffer_pg_table = self.configDB.get_table('BUFFER_PG')
buffer_queue_table = self.configDB.get_table('BUFFER_QUEUE')
buffer_ingress_profile_list_table = self.configDB.get_table('BUFFER_PORT_INGRESS_PROFILE_LIST')
buffer_egress_profile_list_table = self.configDB.get_table('BUFFER_PORT_EGRESS_PROFILE_LIST')
cable_length_entries = self.configDB.get_entry('CABLE_LENGTH', cable_length_key[0])

buffer_pg_items = self.mlnx_reorganize_buffer_tables(buffer_pg_table, 'BUFFER_PG')
buffer_queue_items = self.mlnx_reorganize_buffer_tables(buffer_queue_table, 'BUFFER_QUEUE')

single_pool = True
if 'ingress_lossy_pool' in buffer_pool_table:
ingress_lossy_profile = buffer_profile_table.get('ingress_lossy_profile')
if ingress_lossy_profile:
if 'ingress_lossy_pool' == ingress_lossy_profile.get('pool'):
single_pool = False

# Construct buffer items to be applied to admin down ports
if is_dynamic:
# For dynamic model, we just need to add the default buffer objects to admin down ports
# Buffer manager will apply zero profiles automatically when a port is shutdown
lossy_pg_item = {'profile': 'ingress_lossy_profile'} if 'ingress_lossy_profile' in buffer_profile_table else None
lossy_queue_item = {'profile': 'q_lossy_profile'} if 'q_lossy_profile' in buffer_profile_table else None
lossless_queue_item = {'profile': 'egress_lossless_profile'} if 'egress_lossless_profile' in buffer_profile_table else None

queue_items_to_apply = {'0-2': lossy_queue_item,
'3-4': lossless_queue_item,
'5-6': lossy_queue_item}

if single_pool:
if 'ingress_lossless_profile' in buffer_profile_table:
ingress_profile_list_item = {'profile_list': 'ingress_lossless_profile'}
else:
ingress_profile_list_item = None
else:
if 'ingress_lossless_profile' in buffer_profile_table and 'ingress_lossy_profile' in buffer_profile_table:
ingress_profile_list_item = {'profile_list': 'ingress_lossless_profile,ingress_lossy_profile'}
else:
ingress_profile_list_item = None

if 'egress_lossless_profile' in buffer_profile_table and 'egress_lossy_profile' in buffer_profile_table:
egress_profile_list_item = {'profile_list': 'egress_lossless_profile,egress_lossy_profile'}
else:
egress_profile_list_item = None

pools_to_insert = None
profiles_to_insert = None

else:
# For static model, we need more.
# Define zero buffer pools and profiles
ingress_zero_pool = {'size': '0', 'mode': 'static', 'type': 'ingress'}
ingress_lossy_pg_zero_profile = {
"pool":"ingress_zero_pool",
"size":"0",
"static_th":"0"
}
lossy_pg_item = {'profile': 'ingress_lossy_pg_zero_profile'}

ingress_lossless_zero_profile = {
"pool":"ingress_lossless_pool",
"size":"0",
"dynamic_th":"-8"
}

if single_pool:
ingress_profile_list_item = {'profile_list': 'ingress_lossless_zero_profile'}
else:
ingress_lossy_zero_profile = {
"pool":"ingress_lossy_pool",
"size":"0",
"dynamic_th":"-8"
}
ingress_profile_list_item = {'profile_list': 'ingress_lossless_zero_profile,ingress_lossy_zero_profile'}

egress_lossless_zero_profile = {
"pool":"egress_lossless_pool",
"size":"0",
"dynamic_th":"-8"
}
lossless_queue_item = {'profile': 'egress_lossless_zero_profile'}

egress_lossy_zero_profile = {
"pool":"egress_lossy_pool",
"size":"0",
"dynamic_th":"-8"
}
lossy_queue_item = {'profile': 'egress_lossy_zero_profile'}
egress_profile_list_item = {'profile_list': 'egress_lossless_zero_profile,egress_lossy_zero_profile'}

queue_items_to_apply = {'0-2': lossy_queue_item,
'3-4': lossless_queue_item,
'5-6': lossy_queue_item}

pools_to_insert = {'ingress_zero_pool': ingress_zero_pool}
profiles_to_insert = {'ingress_lossy_pg_zero_profile': ingress_lossy_pg_zero_profile,
'ingress_lossless_zero_profile': ingress_lossless_zero_profile,
'egress_lossless_zero_profile': egress_lossless_zero_profile,
'egress_lossy_zero_profile': egress_lossy_zero_profile}
if not single_pool:
profiles_to_insert['ingress_lossy_zero_profile'] = ingress_lossy_zero_profile

lossless_profile_pattern = 'pg_lossless_([1-9][0-9]*000)_([1-9][0-9]*m)_profile'
zero_item_count = 0
reclaimed_ports = set()
for port_name, port_info in port_table.items():
if port_info.get('admin_status') == 'up':
# Handles admin down ports only
continue

# If items to be applied to admin down port of BUFFER_PG table have been generated,
# Check whether the BUFFER_PG items with respect to the port align with the default one,
# and insert the items to BUFFER_PG
# The same logic for BUFFER_QUEUE, BUFFER_PORT_INGRESS_PROFILE_LIST and BUFFER_PORT_EGRESS_PROFILE_LIST
if lossy_pg_item:
port_pgs = buffer_pg_items.get(port_name)
is_default = False
if not port_pgs:
is_default = True
else:
if set(port_pgs.keys()) == set(['3-4']):
if is_dynamic:
reclaimed_ports.add(port_name)
if port_pgs['3-4']['profile'] == 'NULL':
is_default = True
else:
match = re.search(lossless_profile_pattern, port_pgs['3-4']['profile'])
if match:
speed = match.group(1)
cable_length = match.group(2)
if speed == port_info.get('speed') and cable_length == cable_length_entries.get(port_name):
is_default = True

if is_default:
lossy_pg_key = '{}|0'.format(port_name)
lossless_pg_key = '{}|3-4'.format(port_name)
self.configDB.set_entry('BUFFER_PG', lossy_pg_key, lossy_pg_item)
if is_dynamic:
self.configDB.set_entry('BUFFER_PG', lossless_pg_key, {'profile': 'NULL'})
# For traditional model, we must NOT remove the default lossless PG
# because it has been popagated to APPL_DB during db_migrator
# Leaving it untouched in CONFIG_DB enables traditional buffer manager to
# remove it from CONFIG_DB as well as APPL_DB
# However, removing it from CONFIG_DB causes it left in APPL_DB
zero_item_count += 1

if lossy_queue_item and lossless_queue_item:
port_queues = buffer_queue_items.get(port_name)
if not port_queues:
for ids, item in queue_items_to_apply.items():
self.configDB.set_entry('BUFFER_QUEUE', port_name + '|' + ids, item)
zero_item_count += 1

if ingress_profile_list_item:
port_ingress_profile_list = buffer_ingress_profile_list_table.get(port_name)
if not port_ingress_profile_list:
self.configDB.set_entry('BUFFER_PORT_INGRESS_PROFILE_LIST', port_name, ingress_profile_list_item)
zero_item_count += 1

if egress_profile_list_item:
port_egress_profile_list = buffer_egress_profile_list_table.get(port_name)
if not port_egress_profile_list:
self.configDB.set_entry('BUFFER_PORT_EGRESS_PROFILE_LIST', port_name, egress_profile_list_item)
zero_item_count += 1

if zero_item_count > 0:
if pools_to_insert:
for name, pool in pools_to_insert.items():
self.configDB.set_entry('BUFFER_POOL', name, pool)

if profiles_to_insert:
for name, profile in profiles_to_insert.items():
self.configDB.set_entry('BUFFER_PROFILE', name, profile)

# We need to remove BUFFER_PG table items for admin down ports from APPL_DB
# and then remove the buffer profiles which are no longer referenced
# We do it here because
# - The buffer profiles were copied from CONFIG_DB by db_migrator when the database was being migrated from 1.0.6 to 2.0.0
# - In this migrator the buffer priority-groups have been removed from CONFIG_DB.BUFFER_PG table
# - The dynamic buffer manager will not generate buffer profile by those buffer PG items
# In case a buffer profile was referenced by an admin down port only, the dynamic buffer manager won't create it after starting
# This kind of buffer profiles will be left in APPL_DB and can not be removed.
if not is_dynamic:
return

warmreboot_state = self.stateDB.get(self.stateDB.STATE_DB, 'WARM_RESTART_ENABLE_TABLE|system', 'enable')
if warmreboot_state == 'true':
referenced_profiles = set()
keys = self.appDB.keys(self.appDB.APPL_DB, "BUFFER_PG_TABLE:*")
if keys is None:
return
for buffer_pg_key in keys:
port, pg = buffer_pg_key.split(':')[1:]
if port in reclaimed_ports:
self.appDB.delete(self.appDB.APPL_DB, buffer_pg_key)
else:
buffer_pg_items = self.appDB.get_all(self.appDB.APPL_DB, buffer_pg_key)
profile = buffer_pg_items.get('profile')
if profile:
referenced_profiles.add(profile)
keys = self.appDB.keys(self.appDB.APPL_DB, "BUFFER_PROFILE_TABLE:*")
for buffer_profile_key in keys:
profile = buffer_profile_key.split(':')[1]
if profile not in referenced_profiles and profile not in buffer_profile_table.keys():
self.appDB.delete(self.appDB.APPL_DB, buffer_profile_key)
Original file line number Diff line number Diff line change
Expand Up @@ -712,13 +712,6 @@
"pool": "[BUFFER_POOL_TABLE:ingress_lossless_pool]",
"size": "124928"
},
"BUFFER_PROFILE_TABLE:pg_lossless_400000_300m_profile": {
"xon": "37888",
"dynamic_th": "0",
"xoff": "373760",
"pool": "[BUFFER_POOL_TABLE:ingress_lossless_pool]",
"size": "420864"
},
"BUFFER_PROFILE_TABLE:q_lossy_profile": {
"dynamic_th": "3",
"pool": "[BUFFER_POOL_TABLE:egress_lossy_pool]",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -712,13 +712,6 @@
"pool": "ingress_lossless_pool",
"size": "124928"
},
"BUFFER_PROFILE_TABLE:pg_lossless_400000_300m_profile": {
"xon": "37888",
"dynamic_th": "0",
"xoff": "373760",
"pool": "ingress_lossless_pool",
"size": "420864"
},
"BUFFER_PROFILE_TABLE:q_lossy_profile": {
"dynamic_th": "3",
"pool": "egress_lossy_pool",
Expand Down
Loading

0 comments on commit 67a267b

Please sign in to comment.