Skip to content

Commit

Permalink
i915: Add support for mmapped OA buffer
Browse files Browse the repository at this point in the history
OA buffer can be mmapped and reports can be recorded from the mmapped
buffer. This creates a new type of OA record that has a different
format. Add support for the new record with command line option -M.

Example:
    i915-perf-recorder -m RenderBasic -s 8000 -k mono -M

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
  • Loading branch information
unerlige committed Aug 3, 2021
1 parent 692030a commit 1c19c13
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/gpuvis.cpp
Expand Up @@ -2269,7 +2269,7 @@ void TraceEvents::add_i915_perf_frequency( const trace_event_t &event, int64_t t

void TraceEvents::init_i915_perf_event( trace_event_t &event )
{
if ( !event.has_duration() )
if ( !event.has_duration() && !m_i915.perf_locs.empty())
event.id_start = m_i915.perf_locs.back();
else
{
Expand Down
3 changes: 2 additions & 1 deletion src/gpuvis_graph.cpp
Expand Up @@ -2556,7 +2556,8 @@ uint32_t TraceWin::graph_render_i915_perf_events( graph_info_t &gi )
{
imgui_drawrect( x0, y, x1 - x0, row_h, s_clrs().get( col_Graph_BarSelRect ) );

gi.set_selected_i915_ringctxseq( *process.event );
if (process.event != NULL)
gi.set_selected_i915_ringctxseq( *process.event );

gi.i915_perf_bars.push_back( event.id );

Expand Down
38 changes: 35 additions & 3 deletions src/gpuvis_i915_perfcounters.cpp
Expand Up @@ -122,6 +122,31 @@ void I915PerfCounters::init( TraceEvents &trace_events )
}
}

static uint32_t num_reports(const struct intel_perf_timeline_item *tl,
const struct drm_i915_perf_record_header *start,
const struct drm_i915_perf_record_header *end,
uint32_t report_size)
{
const struct drm_i915_perf_record_header *hdr = start;
uint32_t m_n_reports;

if (start == end)
return (tl->report_end_offset - tl->report_start_offset) / report_size;

while (hdr <= end) {
if (hdr == start)
m_n_reports = (hdr->size - sizeof(*hdr) - tl->report_start_offset) / report_size;
else if (hdr == end)
m_n_reports += tl->report_end_offset / report_size;
else
m_n_reports += (hdr->size - sizeof(*hdr)) / report_size;

hdr++;
}

return m_n_reports;
}

void I915PerfCounters::set_event( const trace_event_t &event )
{
if ( m_event_id == event.id || event.id == INVALID_ID )
Expand All @@ -139,12 +164,19 @@ void I915PerfCounters::set_event( const trace_event_t &event )
m_trace_events->i915_perf_reader->records[timeline_item->record_start];
const struct drm_i915_perf_record_header *record_end =
m_trace_events->i915_perf_reader->records[timeline_item->record_end];
const uint32_t report_size = metric_set->perf_raw_size;

struct intel_perf_accumulator accu;
intel_perf_accumulate_reports( &accu, metric_set->perf_oa_format,
record_start, record_end );

m_n_reports = timeline_item->record_end - timeline_item->record_start;
record_start, record_end,
timeline_item->report_start_offset,
timeline_item->report_end_offset );

if (record_start->type == DRM_I915_PERF_RECORD_SAMPLE)
m_n_reports = timeline_item->record_end - timeline_item->record_start;
else if (record_start->type == INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE)
m_n_reports = num_reports(timeline_item, record_start, record_end,
report_size);

for ( uint32_t c = 0; c < metric_set->n_counters; c++ )
{
Expand Down
60 changes: 48 additions & 12 deletions src/i915-perf/i915-perf-read.cpp
Expand Up @@ -113,13 +113,41 @@ int read_i915_perf_file( const char *file, StrPool &strpool, trace_info_t &trace
return 0;
}

#if USE_I915_PERF
static uint32_t record_timestamp( const struct drm_i915_perf_record_header *record )
/*
* Records can now be of 2 types - DRM_I915_PERF_RECORD_SAMPLE and
* INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE. With the former records, there is
* only one report per record and the latter may have many reports per record.
* The use of records here is to only get the next report in the sequence and
* hence can be combined into a single logic below with an iterator.
*/
struct perf_report_iter {
uint32_t record_idx;
uint32_t report_offset;
};

static uint32_t *next_report(const struct intel_perf_data_reader *reader,
const struct intel_perf_timeline_item *tl,
struct perf_report_iter *iter)
{
const uint32_t *data = ( const uint32_t * )( record + 1 );
return data[ 1 ];
const uint32_t report_size = reader->metric_set->perf_raw_size;
const struct drm_i915_perf_record_header *curr =
reader->records[iter->record_idx];

if (iter->record_idx == tl->record_end) {
iter->report_offset += report_size;
if (iter->report_offset > tl->report_end_offset)
return NULL;
} else if (iter->record_idx < tl->record_end) {
const uint32_t len = curr->size - sizeof(*curr);
iter->report_offset += report_size;
if (iter->report_offset >= len) {
iter->report_offset = 0;
iter->record_idx++;
}
}

return (uint32_t *)((uint8_t *)(curr + 1) + iter->report_offset);
}
#endif

void load_i915_perf_counter_values( struct intel_perf_data_reader *reader,
struct intel_perf_logical_counter *counter,
Expand All @@ -129,17 +157,25 @@ void load_i915_perf_counter_values( struct intel_perf_data_reader *reader,
assert( event.i915_perf_timeline < reader->n_timelines );

const struct intel_perf_timeline_item *item = &reader->timelines[ event.i915_perf_timeline ];
const struct drm_i915_perf_record_header *first_record = reader->records[ item->record_start ];
for ( uint32_t j = item->record_start; j < item->record_end; j++ )
uint64_t factor = ( item->cpu_ts_end - item->cpu_ts_start ) /
( item->ts_end - item->ts_start );
struct perf_report_iter curr = {
item->record_start,
item->report_start_offset
}, prev = curr;
uint32_t *report32;

while ((report32 = next_report(reader, item, &curr)) != NULL)
{
const struct drm_i915_perf_record_header *record = reader->records[j];
int64_t ts = item->cpu_ts_start +
( record_timestamp( record ) - record_timestamp( first_record ) ) *
( item->cpu_ts_end - item->cpu_ts_start ) / ( item->ts_end - item->ts_start );
int64_t ts = item->cpu_ts_start + ( report32[ 1 ] - item->ts_start ) * factor;
struct intel_perf_accumulator acc;

intel_perf_accumulate_reports( &acc, reader->metric_set->perf_oa_format,
reader->records[j], reader->records[j + 1] );
reader->records[prev.record_idx],
reader->records[curr.record_idx],
prev.report_offset, curr.report_offset );

prev = curr;

float value;
if ( counter->storage == INTEL_PERF_LOGICAL_COUNTER_STORAGE_DOUBLE ||
Expand Down

0 comments on commit 1c19c13

Please sign in to comment.