diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 9cc3d6dcb849f397972f221525c96267142ef45e..7a73c94f39df7560a1b379992ba9f87cbe0835d7 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -25,6 +25,8 @@ #include "../../../util/arm-spe.h" #include // reallocarray +#define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -36,11 +38,118 @@ struct arm_spe_recording { bool *wrapped; }; +static struct perf_cpu_map *cpu_map__new_sysfs_online(void) +{ + struct perf_cpu_map *cpus = NULL; + FILE *onlnf; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (onlnf) { + cpus = perf_cpu_map__read(onlnf); + fclose(onlnf); + } + return cpus; +} + +static struct perf_cpu_map *perf_cpu_map__new_online_cpus(void) +{ + struct perf_cpu_map *cpus = cpu_map__new_sysfs_online(); + + if (cpus) + return cpus; + + return perf_cpu_map__default_new(); +} + +/* + * arm_spe_find_cpus() returns a new cpu map, and the caller should invoke + * perf_cpu_map__put() to release the map after use. + */ +static struct perf_cpu_map *arm_spe_find_cpus(struct evlist *evlist) +{ + struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); + struct perf_cpu_map *intersect_cpus; + + /* cpu map is not "any" CPU , we have specific CPUs to work with */ + if (!perf_cpu_map__has_any_cpu(event_cpus)) { + intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); + perf_cpu_map__put(online_cpus); + /* Event can be "any" CPU so count all CPUs. */ + } else { + intersect_cpus = online_cpus; + } + + return intersect_cpus; +} + static size_t arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused) + struct evlist *evlist) +{ + struct perf_cpu_map *cpu_map = arm_spe_find_cpus(evlist); + size_t size; + + if (!cpu_map) + return 0; + + size = ARM_SPE_AUXTRACE_PRIV_MAX + + ARM_SPE_CPU_PRIV_MAX * perf_cpu_map__nr(cpu_map); + size *= sizeof(u64); + + perf_cpu_map__put(cpu_map); + return size; +} + +static int arm_spe_save_cpu_header(struct auxtrace_record *itr, + struct perf_cpu cpu, __u64 data[]) { - return ARM_SPE_AUXTRACE_PRIV_SIZE; + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *pmu = NULL; + struct perf_pmu tmp_pmu; + char cpu_id_str[16]; + char *cpuid = NULL; + u64 val; + + snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu); + tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str); + if (!tmp_pmu.cpus) + return -ENOMEM; + + /* Read CPU MIDR */ + cpuid = perf_pmu__getcpuid(&tmp_pmu); + + /* The CPU map will not be used anymore, release it */ + perf_cpu_map__put(tmp_pmu.cpus); + + if (!cpuid) + return -ENOMEM; + val = strtol(cpuid, NULL, 16); + + data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; + data[ARM_SPE_CPU] = cpu.cpu; + data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; + data[ARM_SPE_CPU_MIDR] = val; + + /* Find the associate Arm SPE PMU for the CPU */ + if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) + pmu = sper->arm_spe_pmu; + + if (!pmu) { + /* No Arm SPE PMU is found */ + data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; + data[ARM_SPE_CAP_MIN_IVAL] = 0; + } else { + data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; + + if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) + val = 0; + data[ARM_SPE_CAP_MIN_IVAL] = val; + } + + free(cpuid); + return ARM_SPE_CPU_PRIV_MAX; } static int arm_spe_info_fill(struct auxtrace_record *itr, @@ -48,20 +157,46 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, struct perf_record_auxtrace_info *auxtrace_info, size_t priv_size) { + int i, ret; + size_t offset; struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct perf_cpu_map *cpu_map; + struct perf_cpu cpu; + __u64 *data; - if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) + if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) return -EINVAL; if (!session->evlist->core.nr_mmaps) return -EINVAL; + cpu_map = arm_spe_find_cpus(session->evlist); + if (!cpu_map) + return -EINVAL; + auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; - auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; + auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = + ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; + auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); + + offset = ARM_SPE_AUXTRACE_PRIV_MAX; + perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { + assert(offset < priv_size); + data = &auxtrace_info->priv[offset]; + ret = arm_spe_save_cpu_header(itr, cpu, data); + if (ret < 0) + goto out; + offset += ret; + } - return 0; + ret = 0; +out: + perf_cpu_map__put(cpu_map); + return ret; } static void diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 83836eb43fdd11632cd1807556806b7d84f9dd02..33d832a80e78c0d1313dac89bce048ed7a0b4657 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -56,32 +56,32 @@ enum arm_spe_op_type { ARM_SPE_OP_BR_INDIRECT = 1 << 17, }; -enum arm_spe_neoverse_data_source { - ARM_SPE_NV_L1D = 0x0, - ARM_SPE_NV_L2 = 0x8, - ARM_SPE_NV_PEER_CORE = 0x9, - ARM_SPE_NV_LOCAL_CLUSTER = 0xa, - ARM_SPE_NV_SYS_CACHE = 0xb, - ARM_SPE_NV_PEER_CLUSTER = 0xc, - ARM_SPE_NV_REMOTE = 0xd, - ARM_SPE_NV_DRAM = 0xe, +enum arm_spe_common_data_source { + ARM_SPE_COMMON_DS_L1D = 0x0, + ARM_SPE_COMMON_DS_L2 = 0x8, + ARM_SPE_COMMON_DS_PEER_CORE = 0x9, + ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa, + ARM_SPE_COMMON_DS_SYS_CACHE = 0xb, + ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc, + ARM_SPE_COMMON_DS_REMOTE = 0xd, + ARM_SPE_COMMON_DS_DRAM = 0xe, }; enum arm_spe_hisi_hip_data_source { - ARM_SPE_HISI_HIP_PEER_CPU = 0, - ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1, - ARM_SPE_HISI_HIP_L3 = 2, - ARM_SPE_HISI_HIP_L3_HITM = 3, - ARM_SPE_HISI_HIP_PEER_CLUSTER = 4, - ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5, - ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6, - ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7, - ARM_SPE_HISI_HIP_LOCAL_MEM = 8, - ARM_SPE_HISI_HIP_REMOTE_MEM = 9, - ARM_SPE_HISI_HIP_NC_DEV = 13, - ARM_SPE_HISI_HIP_L2 = 16, - ARM_SPE_HISI_HIP_L2_HITM = 17, - ARM_SPE_HISI_HIP_L1 = 18, + ARM_SPE_HISI_HIP_PEER_CPU = 0, + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1, + ARM_SPE_HISI_HIP_L3 = 2, + ARM_SPE_HISI_HIP_L3_HITM = 3, + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4, + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5, + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6, + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7, + ARM_SPE_HISI_HIP_LOCAL_MEM = 8, + ARM_SPE_HISI_HIP_REMOTE_MEM = 9, + ARM_SPE_HISI_HIP_NC_DEV = 13, + ARM_SPE_HISI_HIP_L2 = 16, + ARM_SPE_HISI_HIP_L2_HITM = 17, + ARM_SPE_HISI_HIP_L1 = 18, }; struct arm_spe_record { diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 12007f9bdad8946a0b521feb412b80faba1e4415..c4f621eb7f59315c6893163f1bfe4d0c68b7c1c7 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -48,7 +48,6 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; - u64 midr; struct perf_tsc_conversion tc; @@ -80,6 +79,11 @@ struct arm_spe { unsigned long num_events; u8 use_ctx_pkt_for_pid; + + u64 **metadata; + u64 metadata_ver; + u64 metadata_nr_cpu; + bool is_homogeneous; }; struct arm_spe_queue { @@ -100,6 +104,18 @@ struct arm_spe_queue { u64 period_instructions; }; +struct data_source_handle { + const struct midr_range *midr_ranges; + void (*ds_synth)(const struct arm_spe_record *record, + union perf_mem_data_src *data_src); +}; + +#define DS(range, func) \ + { \ + .midr_ranges = range, \ + .ds_synth = arm_spe__synth_##func, \ + } + static void arm_spe_dump(struct arm_spe *spe __maybe_unused, unsigned char *buf, size_t len) { @@ -120,7 +136,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -275,6 +291,20 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) return 0; } +static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu) +{ + u64 i; + + if (!spe->metadata) + return NULL; + + for (i = 0; i < spe->metadata_nr_cpu; i++) + if (spe->metadata[i][ARM_SPE_CPU] == cpu) + return spe->metadata[i]; + + return NULL; +} + static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) { struct simd_flags simd_flags = {}; @@ -402,7 +432,7 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; - sample.addr = record->virt_addr; + sample.addr = record->to_ip; sample.phys_addr = record->phys_addr; sample.data_src = data_src; sample.period = spe->instructions_sample_period; @@ -411,10 +441,16 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -static const struct midr_range neoverse_spe[] = { +static const struct midr_range common_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), {}, }; @@ -423,8 +459,8 @@ static const struct midr_range hisi_hip_ds_encoding_cpus[] = { {}, }; -static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { /* * Even though four levels of cache hierarchy are possible, no known @@ -446,17 +482,17 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } switch (record->source) { - case ARM_SPE_NV_L1D: + case ARM_SPE_COMMON_DS_L1D: data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_L2: + case ARM_SPE_COMMON_DS_L2: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_PEER_CORE: + case ARM_SPE_COMMON_DS_PEER_CORE: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -465,8 +501,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know if this is L1, L2 but we do know it was a cache-2-cache * transfer, so set SNOOPX_PEER */ - case ARM_SPE_NV_LOCAL_CLUSTER: - case ARM_SPE_NV_PEER_CLUSTER: + case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: + case ARM_SPE_COMMON_DS_PEER_CLUSTER: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -474,7 +510,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec /* * System cache is assumed to be L3 */ - case ARM_SPE_NV_SYS_CACHE: + case ARM_SPE_COMMON_DS_SYS_CACHE: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoop = PERF_MEM_SNOOP_HIT; @@ -483,13 +519,13 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know what level it hit in, except it came from the other * socket */ - case ARM_SPE_NV_REMOTE: + case ARM_SPE_COMMON_DS_REMOTE: data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; break; - case ARM_SPE_NV_DRAM: + case ARM_SPE_COMMON_DS_DRAM: data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; @@ -499,35 +535,12 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } } -static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) -{ - if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { - data_src->mem_lvl = PERF_MEM_LVL_L3; - - if (record->type & ARM_SPE_LLC_MISS) - data_src->mem_lvl |= PERF_MEM_LVL_MISS; - else - data_src->mem_lvl |= PERF_MEM_LVL_HIT; - } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { - data_src->mem_lvl = PERF_MEM_LVL_L1; - - if (record->type & ARM_SPE_L1D_MISS) - data_src->mem_lvl |= PERF_MEM_LVL_MISS; - else - data_src->mem_lvl |= PERF_MEM_LVL_HIT; - } - - if (record->type & ARM_SPE_REMOTE_ACCESS) - data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; -} - static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, union perf_mem_data_src *data_src) { /* Use common synthesis method to handle store operations */ if (record->op & ARM_SPE_OP_ST) { - arm_spe__synth_data_source_generic(record, data_src); + arm_spe__synth_data_source_common(record, data_src); return; } @@ -612,11 +625,87 @@ static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *rec } } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +static const struct data_source_handle data_source_handles[] = { + DS(common_ds_encoding_cpus, data_source_common), + DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), +}; + +static void arm_spe__synth_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { + data_src->mem_lvl = PERF_MEM_LVL_L3; + + if (record->type & ARM_SPE_LLC_MISS) + data_src->mem_lvl |= PERF_MEM_LVL_MISS; + else + data_src->mem_lvl |= PERF_MEM_LVL_HIT; + } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { + data_src->mem_lvl = PERF_MEM_LVL_L1; + + if (record->type & ARM_SPE_L1D_MISS) + data_src->mem_lvl |= PERF_MEM_LVL_MISS; + else + data_src->mem_lvl |= PERF_MEM_LVL_HIT; + } + + if (record->type & ARM_SPE_REMOTE_ACCESS) + data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; +} + +static bool arm_spe__synth_ds(struct arm_spe_queue *speq, + const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + struct arm_spe *spe = speq->spe; + u64 *metadata = NULL; + u64 midr; + unsigned int i; + + /* Metadata version 1 assumes all CPUs are the same (old behavior) */ + if (spe->metadata_ver == 1) { + const char *cpuid; + + pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); + cpuid = perf_env__cpuid(spe->session->evlist->env); + midr = strtol(cpuid, NULL, 16); + } else { + /* CPU ID is -1 for per-thread mode */ + if (speq->cpu < 0) { + /* + * On the heterogeneous system, due to CPU ID is -1, + * cannot confirm the data source packet is supported. + */ + if (!spe->is_homogeneous) + return false; + + /* In homogeneous system, simply use CPU0's metadata */ + if (spe->metadata) + metadata = spe->metadata[0]; + } else { + metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); + } + + if (!metadata) + return false; + + midr = metadata[ARM_SPE_CPU_MIDR]; + } + + for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { + if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { + data_source_handles[i].ds_synth(record, data_src); + return true; + } + } + + return false; +} + +static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, + const struct arm_spe_record *record) { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; - bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); - bool is_hisilicon = is_midr_in_range_list(midr, hisi_hip_ds_encoding_cpus); /* Only synthesize data source for LDST operations */ if (!is_ldst_op(record->op)) @@ -629,12 +718,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m else return 0; - if (is_neoverse) - arm_spe__synth_data_source_neoverse(record, &data_src); - else if (is_hisilicon) - arm_spe__synth_data_source_hisi_hip(record, &data_src); - else - arm_spe__synth_data_source_generic(record, &data_src); + if (!arm_spe__synth_ds(speq, record, &data_src)) + arm_spe__synth_memory_level(record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -655,7 +740,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) u64 data_src; int err; - data_src = arm_spe__synth_data_source(record, spe->midr); + data_src = arm_spe__synth_data_source(speq, record); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -1120,6 +1205,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return 0; } +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) +{ + u64 *metadata; + + metadata = zalloc(per_cpu_size); + if (!metadata) + return NULL; + + memcpy(metadata, buf, per_cpu_size); + return metadata; +} + +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) +{ + int i; + + for (i = 0; i < nr_cpu; i++) + zfree(&metadata[i]); + free(metadata); +} + +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, + u64 *ver, int *nr_cpu) +{ + u64 *ptr = (u64 *)info->priv; + u64 metadata_size; + u64 **metadata = NULL; + int hdr_sz, per_cpu_sz, i; + + metadata_size = info->header.size - + sizeof(struct perf_record_auxtrace_info); + + /* Metadata version 1 */ + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { + *ver = 1; + *nr_cpu = 0; + /* No per CPU metadata */ + return NULL; + } + + *ver = ptr[ARM_SPE_HEADER_VERSION]; + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; + + metadata = calloc(*nr_cpu, sizeof(*metadata)); + if (!metadata) + return NULL; + + /* Locate the start address of per CPU metadata */ + ptr += hdr_sz; + per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); + + for (i = 0; i < *nr_cpu; i++) { + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); + if (!metadata[i]) + goto err_per_cpu_metadata; + + ptr += per_cpu_sz / sizeof(u64); + } + + return metadata; + +err_per_cpu_metadata: + arm_spe__free_metadata(metadata, *nr_cpu); + return NULL; +} + static void arm_spe_free_queue(void *priv) { struct arm_spe_queue *speq = priv; @@ -1154,6 +1306,7 @@ static void arm_spe_free(struct perf_session *session) auxtrace_heap__free(&spe->heap); arm_spe_free_events(session); session->auxtrace = NULL; + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); free(spe); } @@ -1165,16 +1318,60 @@ static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == spe->pmu_type; } -static const char * const arm_spe_info_fmts[] = { - [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +static const char * const metadata_hdr_v1_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", +}; + +static const char * const metadata_hdr_fmts[] = { + [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", + [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", + [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", + [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", }; -static void arm_spe_print_info(__u64 *arr) +static const char * const metadata_per_cpu_fmts[] = { + [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", + [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", + [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", + [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", + [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", +}; + +static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) { + unsigned int i, cpu, hdr_size, cpu_num, cpu_size; + const char * const *hdr_fmts; + if (!dump_trace) return; - fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); + if (spe->metadata_ver == 1) { + cpu_num = 0; + hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; + hdr_fmts = metadata_hdr_v1_fmts; + } else { + cpu_num = arr[ARM_SPE_CPUS_NUM]; + hdr_size = arr[ARM_SPE_HEADER_SIZE]; + hdr_fmts = metadata_hdr_fmts; + } + + for (i = 0; i < hdr_size; i++) + fprintf(stdout, hdr_fmts[i], arr[i]); + + arr += hdr_size; + for (cpu = 0; cpu < cpu_num; cpu++) { + /* + * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS + * are fixed. The sequential parameter size is decided by the + * field 'ARM_SPE_CPU_NR_PARAMS'. + */ + cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; + for (i = 0; i < cpu_size; i++) + fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); + arr += cpu_size; + } } struct arm_spe_synth { @@ -1391,24 +1588,57 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) return 0; } +static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) +{ + u64 midr; + int i; + + if (!nr_cpu) + return false; + + for (i = 0; i < nr_cpu; i++) { + if (!metadata[i]) + return false; + + if (i == 0) { + midr = metadata[i][ARM_SPE_CPU_MIDR]; + continue; + } + + if (midr != metadata[i][ARM_SPE_CPU_MIDR]) + return false; + } + + return true; +} + int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; - size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; + size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; struct perf_record_time_conv *tc = &session->time_conv; - const char *cpuid = perf_env__cpuid(session->evlist->env); - u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; - int err; + u64 **metadata = NULL; + u64 metadata_ver; + int nr_cpu, err; if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + min_sz) return -EINVAL; + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, + &nr_cpu); + if (!metadata && metadata_ver != 1) { + pr_err("Failed to parse Arm SPE metadata.\n"); + return -EINVAL; + } + spe = zalloc(sizeof(struct arm_spe)); - if (!spe) - return -ENOMEM; + if (!spe) { + err = -ENOMEM; + goto err_free_metadata; + } err = auxtrace_queues__init(&spe->queues); if (err) @@ -1417,8 +1647,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->session = session; spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; - spe->midr = midr; + if (metadata_ver == 1) + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + else + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; + spe->metadata = metadata; + spe->metadata_ver = metadata_ver; + spe->metadata_nr_cpu = nr_cpu; + spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); @@ -1451,7 +1687,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; - arm_spe_print_info(&auxtrace_info->priv[0]); + arm_spe_print_info(spe, &auxtrace_info->priv[0]); if (dump_trace) return 0; @@ -1479,5 +1715,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, session->auxtrace = NULL; err_free: free(spe); +err_free_metadata: + arm_spe__free_metadata(metadata, nr_cpu); return err; } diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h index 98d3235781c3c0ffbfb5714eae259297d19ecbcb..c68c04cae5b5a32d287ba79edee1dc0c71441b79 100644 --- a/tools/perf/util/arm-spe.h +++ b/tools/perf/util/arm-spe.h @@ -12,10 +12,46 @@ enum { ARM_SPE_PMU_TYPE, ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_V1_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_V1_PRIV_SIZE \ + (ARM_SPE_AUXTRACE_V1_PRIV_MAX * sizeof(u64)) + +enum { + /* + * The old metadata format (defined above) does not include a + * field for version number. Version 1 is reserved and starts + * from version 2. + */ + ARM_SPE_HEADER_VERSION, + /* Number of sizeof(u64) */ + ARM_SPE_HEADER_SIZE, + /* PMU type shared by CPUs */ + ARM_SPE_PMU_TYPE_V2, + /* Number of CPUs */ + ARM_SPE_CPUS_NUM, ARM_SPE_AUXTRACE_PRIV_MAX, }; -#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) +enum { + /* Magic number */ + ARM_SPE_MAGIC, + /* CPU logical number in system */ + ARM_SPE_CPU, + /* Number of parameters */ + ARM_SPE_CPU_NR_PARAMS, + /* CPU MIDR */ + ARM_SPE_CPU_MIDR, + /* Associated PMU type */ + ARM_SPE_CPU_PMU_TYPE, + /* Minimal interval */ + ARM_SPE_CAP_MIN_IVAL, + ARM_SPE_CPU_PRIV_MAX, +}; + +#define ARM_SPE_HEADER_CURRENT_VERSION 2 + union perf_event; struct perf_session;