package MesaFlexLpm;

// ----------------------------------------------------------------
//
// The LPM module is responsible for taking 32-bit IP addresses, looking up
// the destination (32-bit data) for each IP address in a table in an SRAM,
// and returning the destinations.
//
// ----------------------------------------------------------------
//
// ILpm is the interface to the LPM module, containing two
// sub-interfaces: mif and ram.  It is defined in the interface definition
// package, MesaIDefs, imported above.
//
// ----------------------------------------------------------------
//
// The LPM module sits between two other blocks:
//   * the MIF (Media Interface)
//   * the SRAM
//
// The LPM receives requests from the MIF module by the method
//     mif.put (LuRequest, LuTag)
// and returns results (some cycles later) of the form (luResponse, luTag) by
// the method
//     mif.get.
//
// The LPM sends addresses to the RAM by calling
//     ram.put(sramAddr)
// and receives result data (some cycles later) from the RAM by the method
//     ram.get
//
// ----------------------------------------------------------------
//
// The longest prefix match traverses a tree representation in  memory. The
// first step is a table lookup, and after that it iterates if necessary until
// a leaf is reached.
//
// ----------------------------------------------------------------
//
// The module is pipelined, i.e., IP addresses stream in, and results stream
// out (after some latency).  Results are returned in the same order as
// requests.
//
// The SRAM is also pipelined: addresses stream in, and data stream out
// (after some latency).  It can accept addresses and deliver data on every
// cycle.
//
// Performance metric: as long as IP lookup requests are available, the LPM
// module must keep the SRAM 100% utilized, i.e., it should issue a request
// into the SRAM on every cycle.
//
// ----------------------------------------------------------------
//
// This version uses a "flexible" linear pipeline.  Each stage has its own
// port to the SRAM, produced by a server replicator, which it uses if
// necessary; otherwise it passes the result, completed at an earlier stage,
// along the pipe with no further processing.
//
// ----------------------------------------------------------------

// Required library packages:
import RAM::*;
import ClientServer::*;
import GetPut::*;
import FIFO::*;
import RegFile::*;
import List::*;

// Other Mesa packages:
import Replicator::*;
import MesaDefs::*;
import MesaIDefs::*;

// The type of results from the table lookup:
typedef union tagged {
    Bit#(21) Pointer;
    Bit#(31) Leaf;
} TableType deriving (Eq, Bits);

(* synthesize *)
module mkMesaLpm(ILpm);
   // max is the size of each FIFO in the pipeline.  If it is too low, there
   // may be bottlenecks when items mount up at a particular stage; if it is
   // too high the FIFOs will be unnecessarily large.
   let max =  8;
   // latency is the latency of the SRAM.  If this number is too low, the
   // memory will not be fully utilized; if too high, the replicator will
   // consume unnecessary amounts of storage.
   let latency = 8;

   // The ROM replicator:
   ClientServer#(RAMreq#(SramAddr,SramData), SramData) cs();
   mkRequestResponseBuffer the_cs(cs);
   match {.c,.s} = cs;
   Tuple3#(Server#(RAMreq#(SramAddr,SramData), SramData),
	   Server#(RAMreq#(SramAddr,SramData), SramData),
	   Server#(RAMreq#(SramAddr,SramData), SramData)) srams();
   mk3Servers#(latency) the_srams(s, srams);
   match {.sram0, .sram1, .sram2} = srams;

   // The FIFO for incoming requests:
   FIFO#(Tuple2#(LuRequest, LuTag)) ififo();
   mkSizedFIFO#(4) the_ififo(ififo);

   // The intermediate FIFOs:
   FIFO#(Tuple3#(Bit#(8), Bit#(8), LuTag)) fifo0();
   mkSizedFIFO#(max) the_fifo0(fifo0);

   FIFO#(Tuple3#(Maybe#(LuResponse), Bit#(8), LuTag)) fifo1();
   mkSizedFIFO#(max) the_fifo1(fifo1);

   FIFO#(Tuple2#(Maybe#(LuResponse), LuTag)) fifo2();
   mkSizedFIFO#(max) the_fifo2(fifo2);

   // The output FIFO:
   FIFO#(Tuple2#(LuResponse, LuTag)) ofifo();
   mkSizedFIFO#(4) the_ofifo(ofifo);

   // All processing starts with a table lookup. We send a request to the RAM,
   // and also enqueue (for the next stage) the remainder of the address and
   // the lookup-tag.
   rule stage0;
      let x = ififo.first;
      match {.ireq, .tag} = x;
      ififo.deq;

      sram0.request.put(Read(zeroExtend(ireq[31:16])));
      (fifo0.enq)(tuple3(ireq[15:8], ireq[7:0], tag));
   endrule: stage0

   // This rule handles the result of the first lookup.
   (* descending_urgency = "stage1,stage0" *)
   (* descending_urgency = "stage2_RAM,stage1" *)
   rule stage1;
      let x <- sram0.response.get();
      match {.a2,.a3,.tag} = fifo0.first;
      fifo0.deq;

      case (unpack(x)) matches
	 tagged Leaf .v:
	    // A leaf has been reached, so no further lookups are needed.  We
	    // send the result, with the tag, along the pipe.
	    fifo1.enq(tuple3(Valid (zeroExtend(v)), a3, tag));
	 tagged Pointer .a:
	    // No leaf, so another lookup is required.  We enqueue the
	    // relevant address, and send the remainder (with the tag) down
	    // the pipe.
	    begin
	       sram1.request.put(Read(a + zeroExtend(a2)));
	       fifo1.enq(tuple3(Invalid, a3, tag));
	    end
      endcase
   endrule: stage1

   // This rule for the next stage handles the case where a further lookup was
   // not required, so there is no result from the RAM to await.  We just pass
   // the result on.
   rule stage2_NoRAM (fifo1.first matches {tagged Valid .v, .a3, .tag});
      fifo1.deq;
      fifo2.enq(tuple2(Valid(v), tag));
   endrule: stage2_NoRAM

   // This rule handles the case where a second lookup was required.  It deals
   // with the result from the RAM in a way similar to stage 1.
   rule stage2_RAM (fifo1.first matches {tagged Invalid, .a3, .tag});
      let x <- sram1.response.get();
      fifo1.deq;

      case (unpack(x)) matches
	 tagged Leaf .v:
	    fifo2.enq(tuple2(Valid (zeroExtend(v)), tag));
	 tagged Pointer .a:
	    begin
	       sram2.request.put(Read(a + zeroExtend(a3)));
	       fifo2.enq(tuple2(Invalid, tag));
	    end
      endcase
   endrule: stage2_RAM

   // The next two rules handle the third stage in the same way.
   rule stage3_NoRAM (fifo2.first matches {tagged Valid .v, .tag});
      fifo2.deq;
      ofifo.enq(tuple2(v, tag));
   endrule: stage3_NoRAM

   rule stage3_RAM (fifo2.first matches {tagged Invalid, .tag});
      let x <- sram2.response.get();
      fifo2.deq;

      case (unpack(x)) matches
	 tagged Leaf .v:
	    ofifo.enq(tuple2(zeroExtend(v), tag));
	 tagged Pointer .a:
	    // This case should never happen (it indicates an error in the
	    // lookup table); we treat the pointer as though it was a leaf.
	    ofifo.enq(tuple2(zeroExtend(a), tag));
      endcase
   endrule: stage3_RAM

   // The MIF interface is formed from the input and output FIFOs:
   interface Server mif;
      interface Put request = fifoToPut(ififo);
      interface Get response = fifoToGet(ofifo);
   endinterface: mif

   // The RAM interface is the client corresponding to the replicated server
   // above:
   interface Client ram = c;
endmodule

endpackage: MesaFlexLpm