LAB/GEM5

GEM5 v21.2.1.1, L2 private cache, L3 shared cache

RyoTTa 2022. 5. 12. 13:20
반응형

1. configs/common/Options.py 파일에 l3 cache option 추가

parser.add_argument("--l3cache", action="store_true")

 

2. configs/common/Caches.py 파일에 l3 cache class 추가

class L3Cache(Cache):
    assoc = 16
    tag_latency = 32
    data_latency = 32
    response_latency = 32
    mshrs = 32
    tgts_per_mshr = 24
    write_buffers = 16

 

3. src/mem/XBar.py 파일에 L3XBar class 추가

class L3XBar(CoherentXBar):
    width = 32
    frontend_latency = 1
    forward_latency = 0
    response_latency = 1
    snoop_response_latency = 1
    snoop_filter = SnoopFilter(lookup_latency = 0)
    point_of_unification = True

 

4. src/cpu/BaseCPU.py 파일에 L3XBar import

from m5.objects.XBar import L3XBar

 

5. src/cpu/BaseCPU.py 파일에 addThreeLevelCacheHierarchy() 함수 정의

    def addThreeLevelCacheHierarchy(self, ic, dc, l3c, iwc = None, dwc = None):
        self.addPrivateSplitL1Caches(ic, dc, iwc, dwc)
        self.toL3Bus = xbar if xbar else L3XBar()
        self.connectCachedPorts(self.toL3Bus.cpu_side_ports)
        self.l3cache = l3c
        self.toL3Bus.mem_side_ports = self.l3cache.cpu_side
        self._cached_ports = ['l3cache.mem_side']

 

6. configs/common/CacheConfig.py 에 L3Cache 및 l3_cache_class 정의

   def config_cache(,) 를 아래와 같이 수정한다. --l3cache 옵션이 True 일때만 L2 Cache는 Private으로 작동하게된다.

def config_cache(options, system):
    if options.external_memory_system and (options.caches or options.l2cache):
        print("External caches and internal caches are exclusive options.\n")
        sys.exit(1)

    if options.external_memory_system:
        ExternalCache = ExternalCacheFactory(options.external_memory_system)

    if options.cpu_type == "O3_ARM_v7a_3":
        try:
            import cores.arm.O3_ARM_v7a as core
        except:
            print("O3_ARM_v7a_3 is unavailable. Did you compile the O3 model?")
            sys.exit(1)

        dcache_class, icache_class, l2_cache_class, walk_cache_class = \
            core.O3_ARM_v7a_DCache, core.O3_ARM_v7a_ICache, \
            core.O3_ARM_v7aL2, \
            None
    elif options.cpu_type == "HPI":
        try:
            import cores.arm.HPI as core
        except:
            print("HPI is unavailable.")
            sys.exit(1)

        dcache_class, icache_class, l2_cache_class, walk_cache_class = \
            core.HPI_DCache, core.HPI_ICache, core.HPI_L2, None
    else:
        dcache_class, icache_class, l2_cache_class, l3_cache_class, walk_cache_class = \
            L1_DCache, L1_ICache, L2Cache, L3Cache, None

        if buildEnv['TARGET_ISA'] in ['x86', 'riscv']:
            walk_cache_class = PageTableWalkerCache

    # Set the cache line size of the system
    system.cache_line_size = options.cacheline_size

    # If elastic trace generation is enabled, make sure the memory system is
    # minimal so that compute delays do not include memory access latencies.
    # Configure the compulsory L1 caches for the O3CPU, do not configure
    # any more caches.
    if options.l2cache and options.elastic_trace_en:
        fatal("When elastic trace is enabled, do not configure L2 caches.")


    if options.l3cache:
        system.l3 = l3_cache_class(clk_domain=system.cpu_clk_domain,
                                   **_get_cache_opts('l3', options))
        system.tol3bus = L3XBar(clk_domain = system.cpu_clk_domain)
        system.l3.cpu_side = system.tol3bus.mem_side_ports
        system.l3.mem_side = system.membus.cpu_side_ports

    elif options.l2cache:
        # Provide a clock for the L2 and the L1-to-L2 bus here as they
        # are not connected using addTwoLevelCacheHierarchy. Use the
        # same clock as the CPUs.
        system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain,
                                   **_get_cache_opts('l2', options))

        system.tol2bus = L2XBar(clk_domain = system.cpu_clk_domain)
        system.l2.cpu_side = system.tol2bus.mem_side_ports
        system.l2.mem_side = system.membus.cpu_side_ports

    if options.memchecker:
        system.memchecker = MemChecker()

    for i in range(options.num_cpus):
        if options.caches:
            icache = icache_class(**_get_cache_opts('l1i', options))
            dcache = dcache_class(**_get_cache_opts('l1d', options))

            # If we have a walker cache specified, instantiate two
            # instances here
            if walk_cache_class:
                iwalkcache = walk_cache_class()
                dwalkcache = walk_cache_class()
            else:
                iwalkcache = None
                dwalkcache = None

            if options.memchecker:
                dcache_mon = MemCheckerMonitor(warn_only=True)
                dcache_real = dcache

                # Do not pass the memchecker into the constructor of
                # MemCheckerMonitor, as it would create a copy; we require
                # exactly one MemChecker instance.
                dcache_mon.memchecker = system.memchecker

                # Connect monitor
                dcache_mon.mem_side = dcache.cpu_side

                # Let CPU connect to monitors
                dcache = dcache_mon

            # When connecting the caches, the clock is also inherited
            # from the CPU in question
            system.cpu[i].addPrivateSplitL1Caches(icache, dcache,
                                                  iwalkcache, dwalkcache)

            if options.memchecker:
                # The mem_side ports of the caches haven't been connected yet.
                # Make sure connectAllPorts connects the right objects.
                system.cpu[i].dcache = dcache_real
                system.cpu[i].dcache_mon = dcache_mon
            
            if options.l3cache:
                system.cpu[i].l2 = l2_cache_class(clk_domain=system.cpu_clk_domain,
                                   **_get_cache_opts('l2', options))
                #system.cpu[i].tol2bus = L2XBar(clk_domain = system.cpu_clk_domain)
                system.cpu[i].tol2bus = L2XBar(clk_domain = system.cpu_clk_domain)
                system.cpu[i].l2.cpu_side = system.cpu[i].tol2bus.mem_side_ports
                system.cpu[i].l2.mem_side =  system.tol3bus.cpu_side_ports

        elif options.external_memory_system:
            # These port names are presented to whatever 'external' system
            # gem5 is connecting to.  Its configuration will likely depend
            # on these names.  For simplicity, we would advise configuring
            # it to use this naming scheme; if this isn't possible, change
            # the names below.
            if buildEnv['TARGET_ISA'] in ['x86', 'arm', 'riscv']:
                system.cpu[i].addPrivateSplitL1Caches(
                        ExternalCache("cpu%d.icache" % i),
                        ExternalCache("cpu%d.dcache" % i),
                        ExternalCache("cpu%d.itb_walker_cache" % i),
                        ExternalCache("cpu%d.dtb_walker_cache" % i))
            else:
                system.cpu[i].addPrivateSplitL1Caches(
                        ExternalCache("cpu%d.icache" % i),
                        ExternalCache("cpu%d.dcache" % i))

        system.cpu[i].createInterruptController()
        if options.l3cache:
            system.cpu[i].connectAllPorts(system.cpu[i].tol2bus.cpu_side_ports, system.membus.cpu_side_ports, system.membus.mem_side_ports)
        elif options.l2cache:
            system.cpu[i].connectAllPorts(
                system.tol2bus.cpu_side_ports,
                system.membus.cpu_side_ports, system.membus.mem_side_ports)
        elif options.external_memory_system:
            system.cpu[i].connectUncachedPorts(
                system.membus.cpu_side_ports, system.membus.mem_side_ports)
        else:
            system.cpu[i].connectBus(system.membus)

    return system

 

7. Build

scons ./build/X86/gem5.opt

 

잘 동작한당

반응형