SPRAC21A June   2016  – June 2019 OMAP-L132 , OMAP-L138 , TDA2E , TDA2EG-17 , TDA2HF , TDA2HG , TDA2HV , TDA2LF , TDA2P-ABZ , TDA2P-ACD , TDA2SA , TDA2SG , TDA2SX , TDA3LA , TDA3LX , TDA3MA , TDA3MD , TDA3MV

 

  1.   TDA2xx and TDA2ex Performance
    1.     Trademarks
    2. SoC Overview
      1. 1.1 Introduction
      2. 1.2 Acronyms and Definitions
      3. 1.3 TDA2xx and TDA2ex System Interconnect
      4. 1.4 Traffic Regulation Within the Interconnect
        1. 1.4.1 Bandwidth Regulators
        2. 1.4.2 Bandwidth Limiters
        3. 1.4.3 Initiator Priority
      5. 1.5 TDA2xx and TDA2ex Memory Subsystem
        1. 1.5.1 Controller/PHY Timing Parameters
        2. 1.5.2 Class of Service
        3. 1.5.3 Prioritization Between DMM/SYS PORT or MPU Port to EMIF
      6. 1.6 TDA2xx and TDA2ex Measurement Operating Frequencies
      7. 1.7 System Instrumentation and Measurement Methodology
        1. 1.7.1 GP Timers
        2. 1.7.2 L3 Statistic Collectors
    3. Cortex-A15
      1. 2.1 Level1 and Level2 Cache
      2. 2.2 MMU
      3. 2.3 Performance Control Mechanisms
        1. 2.3.1 Cortex-A15 Knobs
        2. 2.3.2 MMU Page Table Knobs
      4. 2.4 Cortex-A15 CPU Read and Write Performance
        1. 2.4.1 Cortex-A15 Functions
        2. 2.4.2 Setup Limitations
        3. 2.4.3 System Performance
          1. 2.4.3.1 Cortex-A15 Stand-Alone Memory Read, Write, Copy
          2. 2.4.3.2 Results
    4. System Enhanced Direct Memory Access (System EDMA)
      1. 3.1 System EDMA Performance
        1. 3.1.1 System EDMA Read and Write
        2. 3.1.2 System EDMA Results
      2. 3.2 System EDMA Observations
    5. DSP Subsystem EDMA
      1. 4.1 DSP Subsystem EDMA Performance
        1. 4.1.1 DSP Subsystem EDMA Read and Write
        2. 4.1.2 DSP Subsystem EDMA Results
      2. 4.2 DSP Subsystem EDMA Observations
    6. Embedded Vision Engine (EVE) Subsystem EDMA
      1. 5.1 EVE EDMA Performance
        1. 5.1.1 EVE EDMA Read and Write
        2. 5.1.2 EVE EDMA Results
      2. 5.2 EVE EDMA Observations
    7. DSP CPU
      1. 6.1 DSP CPU Performance
        1. 6.1.1 DSP CPU Read and Write
        2. 6.1.2 Code Setup
          1. 6.1.2.1 Pipeline Copy
          2. 6.1.2.2 Pipeline Read
          3. 6.1.2.3 Pipeline Write
          4. 6.1.2.4 L2 Stride-Jmp Copy
          5. 6.1.2.5 L2 Stride-Jmp Read
          6. 6.1.2.6 L2 Stride-Jmp Write
      2. 6.2 DSP CPU Observations
      3. 6.3 Summary
    8. Cortex-M4 (IPU)
      1. 7.1 Cortex-M4 CPU Performance
        1. 7.1.1 Cortex-M4 CPU Read and Write
        2. 7.1.2 Code Setup
        3. 7.1.3 Cortex-M4 Functions
        4. 7.1.4 Setup Limitations
      2. 7.2 Cortex-M4 CPU Observations
        1. 7.2.1 Cache Disable
        2. 7.2.2 Cache Enable
      3. 7.3 Summary
    9. USB IP
      1. 8.1 Overview
      2. 8.2 USB IP Performance
        1. 8.2.1 Test Setup
        2. 8.2.2 Results and Observations
        3. 8.2.3 Summary
    10. PCIe IP
      1. 9.1 Overview
      2. 9.2 PCIe IP Performance
        1. 9.2.1 Test Setup
        2. 9.2.2 Results and Observations
    11. 10 IVA-HD IP
      1. 10.1 Overview
      2. 10.2 H.264 Decoder
        1. 10.2.1 Description
        2. 10.2.2 Test Setup
        3. 10.2.3 Test Results
      3. 10.3 MJPEG Decoder
        1. 10.3.1 Description
        2. 10.3.2 Test Setup
        3. 10.3.3 Test Results
    12. 11 MMC IP
      1. 11.1 MMC Read and Write Performance
        1. 11.1.1 Test Description
        2. 11.1.2 Test Results
      2. 11.2 Summary
    13. 12 SATA IP
      1. 12.1 SATA Read and Write Performance
        1. 12.1.1 Test Setup
        2. 12.1.2 Observations
          1. 12.1.2.1 RAW Performance
          2. 12.1.2.2 SDK Performance
      2. 12.2 Summary
    14. 13 GMAC IP
      1. 13.1 GMAC Receive/Transmit Performance
        1. 13.1.1 Test Setup
        2. 13.1.2 Test Description
          1. 13.1.2.1 CPPI Buffer Descriptors
        3. 13.1.3 Test Results
          1. 13.1.3.1 Receive/Transmit Mode (see )
          2. 13.1.3.2 Receive Only Mode (see )
          3. 13.1.3.3 Transmit Only Mode (see )
      2. 13.2 Summary
    15. 14 GPMC IP
      1. 14.1 GPMC Read and Write Performance
        1. 14.1.1 Test Setup
          1. 14.1.1.1 NAND Flash
          2. 14.1.1.2 NOR Flash
        2. 14.1.2 Test Description
          1. 14.1.2.1 Asynchronous NAND Flash Read/Write Using CPU Prefetch Mode
          2. 14.1.2.2 Asynchronous NOR Flash Single Read
          3. 14.1.2.3 Asynchronous NOR Flash Page Read
          4. 14.1.2.4 Asynchronous NOR Flash Single Write
        3. 14.1.3 Test Results
      2. 14.2 Summary
    16. 15 QSPI IP
      1. 15.1 QSPI Read and Write Performance
        1. 15.1.1 Test Setup
        2. 15.1.2 Test Results
        3. 15.1.3 Analysis
          1. 15.1.3.1 Theoretical Calculations
          2. 15.1.3.2 % Efficiency
      2. 15.2 QSPI XIP Code Execution Performance
      3. 15.3 Summary
    17. 16 Standard Benchmarks
      1. 16.1 Dhrystone
        1. 16.1.1 Cortex-A15 Tests and Results
        2. 16.1.2 Cortex-M4 Tests and Results
      2. 16.2 LMbench
        1. 16.2.1 LMbench Bandwidth
          1. 16.2.1.1 TDA2xx and TDA2ex Cortex-A15 LMbench Bandwidth Results
          2. 16.2.1.2 TDA2xx and TDA2ex Cortex-M4 LMBench Bandwidth Results
          3. 16.2.1.3 Analysis
        2. 16.2.2 LMbench Latency
          1. 16.2.2.1 TDA2xx and TDA2ex Cortex-A15 LMbench Latency Results
          2. 16.2.2.2 TDA2xx and TDA2ex Cortex-M4 LMbench Latency Results
          3. 16.2.2.3 Analysis
      3. 16.3 STREAM
        1. 16.3.1 TDA2xx and TDA2ex Cortex-A15 STREAM Benchmark Results
        2. 16.3.2 TDA2xx and TDA2ex Cortex-M4 STREAM Benchmark Results
    18. 17 Error Checking and Correction (ECC)
      1. 17.1 OCMC ECC Programming
      2. 17.2 EMIF ECC Programming
      3. 17.3 EMIF ECC Programming to Starterware Code Mapping
      4. 17.4 Careabouts of Using EMIF ECC
        1. 17.4.1 Restrictions Due to Non-Availability of Read Modify Write ECC Support in EMIF
          1. 17.4.1.1 Un-Cached CPU Access of EMIF
          2. 17.4.1.2 Cached CPU Access of EMIF
          3. 17.4.1.3 Non CPU Access of EMIF Memory
          4. 17.4.1.4 Debugger Access of EMIF via the Memory Browser/Watch Window
          5. 17.4.1.5 Software Breakpoints While Debugging
        2. 17.4.2 Compiler Optimization
        3. 17.4.3 Restrictions Due to i882 Errata
        4. 17.4.4 How to Find Who Caused the Unaligned Quanta Writes After the Interrupt
      5. 17.5 Impact of ECC on Performance
    19. 18 DDR3 Interleaved vs Non-Interleaved
      1. 18.1 Interleaved versus Non-Interleaved Setup
      2. 18.2 Impact of Interleaved vs Non-Interleaved DDR3 for a Single Initiator
      3. 18.3 Impact of Interleaved vs Non-Interleaved DDR3 for Multiple Initiators
    20. 19 DDR3 vs DDR2 Performance
      1. 19.1 Impact of DDR2 vs DDR3 for a Single Initiator
      2. 19.2 Impact of DDR2 vs DDR3 for Multiple Initiators
    21. 20 Boot Time Profile
      1. 20.1 ROM Boot Time Profile
      2. 20.2 System Boot Time Profile
    22. 21 L3 Statistics Collector Programming Model
    23. 22 Reference
  2.   Revision History

L3 Statistics Collector Programming Model

Following are APIs that are used to configure statistics collector, setup timer, and get statistics on regular interval of 100 µs.

Initialize Statcoll: statCollectorInit(); void statCollectorInit() { gStatColState.stat0_filter_cnt = 0; gStatColState.stat1_filter_cnt = 0; gStatColState.stat2_filter_cnt = 0; gStatColState.stat3_filter_cnt = 0; } Enable Statcoll and Read Statcoll registers: /** \brief statCollectorControl * Description: API to enable statcoll. Same API can be used to read the * statcoll register values as well. * Inputs: * inst_name : Statcoll Instance Name. eg: STATCOL_EMIF_SYS, * STATCOL_DSP1_MDMA etc. defined in STATCOL_ID enumeration. * cur_stat_filter_cnt : This value is ignored when calling this function * to enable the statcoll. When trying to read the statcoll * this value is used to determine the filter number used. * mode: Used to indicate whether the function is being called for reading * or enabling the statcoll as defined by : * #define ENABLE_MODE 0x0 * #define READ_STATUS_MODE 0x1 * Return : In the enable mode the function returns the filter number * assigned. In the read mode the function * returns the value * read (BW/Latency etc) from the statcoll registers. */ UInt32 statCollectorControl(UInt32 inst_name, UInt32 cur_stat_filter_cnt, UInt32 mode) { switch (inst_name) { case STATCOL_EMIF_SYS: cur_base_address = stat_coll0_base_address; cur_event_mux_req = 0; cur_event_mux_resp = 1; if(mode == ENABLE_MODE) {gStatColState.stat0_filter_cnt = gStatColState.stat0_filter_cnt + 1;} if(mode == ENABLE_MODE) {cur_stat_filter_cnt = gStatColState.stat0_filter_cnt;} break; case <NEXT_STATCOLL> : ... } if(mode == ENABLE_MODE) { if ( cur_stat_filter_cnt > 4 ) { printf("We have exhausted filters/counters.....\n"); } // Global Enable Stat Collector wr_stat_reg(cur_base_address+0x8,0x1); // Soft Enable Stat Collector wr_stat_reg(cur_base_address+0xC,0x1); wr_stat_reg(cur_base_address+0x18,0x5); // Operation of Stat Collector / RespEvt => Packet wr_stat_reg(cur_base_address+0x1C,0x5); // Event Sel wr_stat_reg(cur_base_address+0x20+4*(cur_stat_filter_cnt-1),cur_event_mux_req); // Op is EventInfo wr_stat_reg(cur_base_address+0x1FC+(0x158*(cur_stat_filter_cnt-1)),2); // Event Info Sel Op -> packet length wr_stat_reg(cur_base_address+0x1F8+(0x158*(cur_stat_filter_cnt-1)),0); // Filter Global Enable wr_stat_reg(cur_base_address+0xAC+(0x158*(cur_stat_filter_cnt-1)),0x1); // Filter Enable wr_stat_reg(cur_base_address+0xBC+(0x158*(cur_stat_filter_cnt-1)),0x1); // Manual dump wr_stat_reg(cur_base_address+0x54,0x1); // use send register to reset counters } else { wr_stat_reg(cur_base_address+0xC,0x0); cur_stat_filter_cnt = rd_stat_reg(cur_base_address+0x8C+((cur_stat_filter_cnt-1)*4)); wr_stat_reg(cur_base_address+0xC,0x1); } return cur_stat_filter_cnt; } Usage (Dummy code): void main() { statCollectorInit(); counterIdISSNTR1 = statCollectorControl(STATCOL_ISS_NRT1, 0, ENABLE_MODE); DMTIMER_prcmenable(TIMER_NUM); DMTIMER_Start(TIMER_NUM); // Dummy Read statCollectorControl(STATCOL_ISS_NRT1, counterIdISSNTR2, READ_STATUS_MODE); while(statCountIdx < TOTAL_COUNT) { while (DMTIMER_Read(TIMER_NUM) <= SYS_CLK_FREQ/10000) // for 100 us {;} statCountISSNRT1[statCountIdx++] = statCollectorControl(STATCOL_ISS_NRT1, counterIdISSNTR1, READ_STATUS_MODE); DMTIMER_Stop(TIMER_NUM); DMTIMER_Start(TIMER_NUM); } } Timer APIs: ReturnCode_t DMTIMER_Start (UWORD8 timer_num) { ReturnCode_t checkReturn = RET_OK; /* Counter clear and auto reload enable */ if (timer_num < 1 || timer_num > 16) return RET_FAIL; switch(timer_num) { case 1: /* Clear the counter value */ WR_REG_32(TIMER1, DMTIMER__TCRR, 0x0); /* Triggering the timer load */ WR_REG_32(TIMER1, DMTIMER__TTGR, 0x1); /* Start timer and reload enable: bit[0] start, bit[1] autoreload enable */ WR_REG_32(TIMER1, DMTIMER__TCLR, 0x1); break; case 2: ... default: checkReturn = RET_FAIL; break; } return checkReturn; } ReturnCode_t DMTIMER_Stop(UWORD8 timer_num) { ReturnCode_t checkReturn = RET_OK; /* Counter clear and auto reload enable */ if (timer_num < 1 || timer_num > 16) return RET_FAIL; switch(timer_num) { case 1: /* Bit[0]: 0, counter is frozen */ WR_REG_32(TIMER1, DMTIMER__TCLR, 0x0); break; case 2: ... default: checkReturn = RET_FAIL; break; } return checkReturn; } UWORD32 DMTIMER_Read(UWORD8 timer_num) { volatile UWORD32 read_value = 0; if (timer_num < 1 || timer_num > 16) return RET_FAIL; switch(timer_num) { case 1: read_value = RD_REG_32(TIMER1, DMTIMER__TCRR); break; case 2: ... default: read_value = 0; break; } return read_value; } ReturnCode_t DMTIMER_prcmenable(UWORD8 timer_num) { ReturnCode_t checkReturn = RET_OK; if (timer_num < 1 || timer_num > 16) return RET_FAIL; switch(timer_num) { case 1: checkReturn = (ReturnCode_t )prcm_enable_module(prcm_timer1); break; case 2: ... default: checkReturn = RET_FAIL; break; } return checkReturn; }