package AlignedFIFOs; // The AlignedFIFOs package contains a parameterized FIFO module // intended for creating synchronizing FIFOs between clock domains // with aligned edges. Either every edge in the source domain implies // the existence of a simultaneous edge in the destination domain (a // slow-to-fast crossing), or every edge in the destination domain // implies the existence of a simultaneous edge in the source domain // (a fast-to-slow crossing). // // The FIFO is parameterized on the type of store used to hold the // FIFO data, which is itself parameterized on the index type, value // type and read latency. Modules to construct stores based on a // single register, a vector of registers and a BRAM are provided, and // the user can supply their own store implementation as well. // // The FIFO allows the user to control whether or not outputs are held // stable during the full slow clock cycle or allowed to transition // mid-cycle. Holding the outputs stable is the safest option but it // slightly increases the minimum latency through the FIFO. // // A primary design goal of this FIFO is to provide an efficient // and flexible family of synchronizing FIFOs between aligned clock // domains which are written in BSV and are fully compatible with // Bluesim. These FIFOs (particularly ones using vectors of registers) // may not be the best choice for ASIC synthesis due to the muxing // to select the head value in the first() method. import Clocks::*; import Vector::*; import BRAMCore::*; import GetPut::*; // Abstraction of indexed storage, with write and read in separate // clock domains. Type parameters are: // i - index type // a - value type // n - read latency (should be 0 or 1) // When read latency is 0, prefetch is not used and read method index // argument determines the returned value. When latency is 1, // prefetch must be used, the read method index argument is ignored, // and the read method returns the value at the previously fetched // index. interface Store#(type i, type a, numeric type n); method Action write(i idx, a value); method Action prefetch(i idx); method a read(i idx); endinterface: Store // Make a register which can be read and written in // different clock domains, with no safety checks. (* always_ready *) interface RawReg#(numeric type n); method Action write(Bit#(n) x); method Bit#(n) read(); endinterface: RawReg import "BVI" RegUN = module vMkRegU(Clock dClock, RawReg#(n) ifc); default_clock sclk(CLK); no_reset; input_clock dclk () = dClock; parameter width = valueOf(n); method write(D_IN) enable(EN) clocked_by(sclk) reset_by(no_reset); method Q_OUT read() clocked_by(dclk) reset_by(no_reset); schedule read CF read; schedule write C write; endmodule: vMkRegU // Implementation of a single-element store module mkRegStore(Clock sClock, Clock dClock, Store#(UInt#(0),a,0) ifc) provisos(Bits#(a,a_sz)); RawReg#(a_sz) _r <- vMkRegU(dClock, clocked_by sClock); method Action write(i idx, a value); _r.write(pack(value)); endmethod method Action prefetch(i idx); $display("ERROR: Do not prefetch a RegStore (read latency is 0)!"); endmethod method a read(i idx); return unpack(_r.read()); endmethod endmodule // Implementation of a vector-of-registers store module mkRegVectorStore(Clock sClock, Clock dClock, Store#(UInt#(w),a,0) ifc) provisos( Bits#(a,a_sz) ); Vector#(TExp#(w),RawReg#(a_sz)) _v <- replicateM(vMkRegU(dClock, clocked_by sClock)); method Action write(UInt#(w) idx, a value); _v[idx].write(pack(value)); endmethod method Action prefetch(UInt#(w) idx); $display("ERROR: Do not prefetch a RegVectorStore (read latency is 0)!"); endmethod method a read(UInt#(w) idx); return unpack(_v[idx].read()); endmethod endmodule // Implementations of a BRAM-based store module mkBRAMStore2W1R(Clock sClock, Reset sReset, Clock dClock, Reset dReset, Store#(i,a,1) ifc) provisos( Bits#(a,a_sz), Bits#(i,w), Eq#(i) ); // this model assumes the read clock is a Nx divided version // of the write clock, so it requires a 2-deep write bypass // buffer Integer memSize = 2 ** valueOf( w ); BRAM_DUAL_PORT#(i,a) bram <- mkSyncBRAMCore2 ( memSize, False, sClock, noReset, dClock, noReset); Reg#(Bool) prefetch_ok <- mkReg(False, clocked_by dClock, reset_by dReset); PulseWire prefetch_called <- mkPulseWire(clocked_by dClock, reset_by dReset); CrossingReg#(i) rd_addr <- mkNullCrossingRegU(sClock, clocked_by dClock); i crossed_rd_addr = rd_addr.crossed(); Wire#(Tuple2#(i,a)) wr_record <- mkWire(clocked_by sClock, reset_by sReset); CrossingReg#(Maybe#(Tuple2#(i,a))) wr_buf0 <- mkNullCrossingReg(dClock, tagged Invalid, clocked_by sClock, reset_by sReset); Maybe#(Tuple2#(i,a)) crossed_wr_buf0 = wr_buf0.crossed(); CrossingReg#(Maybe#(Tuple2#(i,a))) wr_buf1 <- mkNullCrossingReg(dClock, tagged Invalid, clocked_by sClock, reset_by sReset); Maybe#(Tuple2#(i,a)) crossed_wr_buf1 = wr_buf1.crossed(); Bool two_deep; if (wr_buf0 matches tagged Valid {.wr_idx,.wr_value} &&& (wr_idx == crossed_rd_addr)) two_deep = True; else if (wr_buf1 matches tagged Valid {.wr_idx,.wr_value} &&& (wr_idx == crossed_rd_addr)) two_deep = False; else two_deep = True; `ifdef ALIGNED_ORIG two_deep = True; `endif rule record_prefetch; prefetch_ok <= prefetch_called; endrule rule record_write; if (two_deep) wr_buf1 <= wr_buf0; wr_buf0 <= tagged Valid wr_record; endrule method Action write(i idx, a value); wr_record <= tuple2(idx,value); bram.a.put(True, idx, value); endmethod method Action prefetch(i idx); rd_addr <= idx; bram.b.put(False, idx, ?); prefetch_called.send(); endmethod method a read(i idx) if (prefetch_ok); a result; if (crossed_wr_buf0 matches tagged Valid {.wr_idx,.wr_value} &&& (wr_idx == rd_addr)) result = wr_value; else if (crossed_wr_buf1 matches tagged Valid {.wr_idx,.wr_value} &&& (wr_idx == rd_addr)) result = wr_value; else result = bram.b.read(); return result; endmethod endmodule module mkBRAMStore1W2R(Clock sClock, Reset sReset, Clock dClock, Reset dReset, Store#(i,a,1) ifc) provisos( Bits#(a,a_sz), Bits#(i,w), Eq#(i) ); // this model assumes the write clock is a Nx divided version // of the read clock, so it requires a 1-deep write bypass // buffer Integer memSize = 2 ** valueOf (w); BRAM_DUAL_PORT#(i,a) bram <- mkSyncBRAMCore2 ( memSize, False, sClock, noReset, dClock, noReset); Reg#(Bool) prefetch_ok <- mkReg(False, clocked_by dClock, reset_by dReset); PulseWire prefetch_called <- mkPulseWire(clocked_by dClock, reset_by dReset); Reg#(i) rd_addr <- mkRegU(clocked_by dClock); RWire#(Tuple2#(i,a)) wr_record <- mkRWire(clocked_by sClock, reset_by sReset); CrossingReg#(Maybe#(Tuple2#(i,a))) wr_buf0 <- mkNullCrossingReg(dClock, tagged Invalid, clocked_by sClock, reset_by sReset); Maybe#(Tuple2#(i,a)) crossed_wr_buf0 = wr_buf0.crossed(); rule record_prefetch; prefetch_ok <= prefetch_called; endrule rule record_write; wr_buf0 <= wr_record.wget(); endrule method Action write(i idx, a value); wr_record.wset(tuple2(idx,value)); bram.a.put(True, idx, value); endmethod method Action prefetch(i idx); rd_addr <= idx; bram.b.put(False, idx, ?); prefetch_called.send(); endmethod method a read(i idx) if (prefetch_ok); a result; if (crossed_wr_buf0 matches tagged Valid {.wr_idx,.wr_value} &&& (wr_idx == rd_addr)) result = wr_value; else result = bram.b.read(); return result; endmethod endmodule // Interface for synchronizing FIFO between clocks with // aligned edges. interface AlignedFIFO#(type a); method Action enq(a x); method a first(); method Action deq(); method Bool dNotFull(); method Bool dNotEmpty(); method Bool sNotFull(); method Bool sNotEmpty(); method Action dClear(); method Action sClear(); endinterface: AlignedFIFO // Make a synchronizing FIFO for aligned clocks, based on the // given backing store. The store is assumed to have 2^w slots // addressed from 0 to (2^w)-1. The store will be written in // the source clock domain and read in the destination clock domain. // // The enq() and deq() methods will only be callable when the allow_enq // and allow_deq inputs are high. For a slow-to-fast crossing, use: // allow_enq = constant True and allow_deq = pre-edge signal // For a fast-to-slow crossing, use: // allow_enq = pre-edge signal and allow_deq = constant True // These settings ensure that the outputs in the slow clock // domain are stable for the entire cycle. Setting both inputs // to constant True reduces the minimum latency through the FIFO, // but allows outputs in the slow domain to transition mid-cycle. // This is less safe and can interact badly with $displays in a // Verilog simulation. // // It is not advisable to call both dClear and sClear simultaneously. (* no_default_clock, no_default_reset *) module mkUGAlignedFIFO( Bool ugenq , Bool ugdeq , Clock sClock , Reset sReset , Clock dClock , Reset dReset , Store#(i,a,n) store , Bool allow_enq , Bool allow_deq , AlignedFIFO#(a) ifc ) provisos( Bits#(a,sz_a), Bits#(i,w), Eq#(i), Arith#(i) ); // Check the latency of the store Integer latency = valueOf(n); if ((latency < 0) || (latency > 1)) errorM("mkUGAlignedFIFO expects a store with either 0 or 1 cycles of latency"); // Combine the sReset and dReset into identical resets in both domains Reset sCrosseddReset <- mkAsyncReset(0,dReset,sClock); Reset dCrossedsReset <- mkAsyncReset(0,sReset,dClock); Reset sCombinedReset <- mkResetEither(sReset, sCrosseddReset, clocked_by sClock); Reset dCombinedReset <- mkResetEither(dReset, dCrossedsReset, clocked_by dClock); // Test when resets are asserted ReadOnly#(Bool) sInReset <- isResetAssertedDirect(clocked_by sClock, reset_by sCombinedReset); ReadOnly#(Bool) dInReset <- isResetAssertedDirect(clocked_by dClock, reset_by dCombinedReset); // Location of next deq slot (in dest. domain) CrossingReg#(i) head <- mkNullCrossingReg(sClock, 0, clocked_by dClock, reset_by dReset); // Location of next enq slot (in src. domain) CrossingReg#(i) tail <- mkNullCrossingReg(dClock, 0, clocked_by sClock, reset_by sReset); PulseWire enq_pw <- mkPulseWire(clocked_by sClock, reset_by sReset); PulseWire deq_pw <- mkPulseWire(clocked_by dClock, reset_by dReset); PulseWire sClear_pw <- mkPulseWire(clocked_by sClock, reset_by sReset); PulseWire dClear_pw <- mkPulseWire(clocked_by dClock, reset_by dReset); // We track the "wrap state" as the parity of the number of // times that a value has wrapped around to 0. Since the tail // can never overtake the head, we only need one bit of each // to distinguish between the empty and full states // (when head == tail). CrossingReg#(Bool) head_wrapped <- mkNullCrossingReg(sClock, False, clocked_by dClock, reset_by dReset); CrossingReg#(Bool) tail_wrapped <- mkNullCrossingReg(dClock, False, clocked_by sClock, reset_by sReset); // Cross head and tail info into alternate domains. // This is designed so that we only cross registered values. i sCrossedHead = head.crossed(); Bool sCrossedHeadWrapped = head_wrapped.crossed(); i dCrossedTail = tail.crossed(); Bool dCrossedTailWrapped = tail_wrapped.crossed(); // Make empty/full info available in both domains. // The FIFO is empty when head == tail in the same // wrap state. The FIFO is full when head == tail // in opposite wrap states. Bool dIsEmpty = (head == dCrossedTail) && (head_wrapped == dCrossedTailWrapped); Bool sIsEmpty = (sCrossedHead == tail) && (sCrossedHeadWrapped == tail_wrapped); Bool dIsFull = (head == dCrossedTail) && (head_wrapped != dCrossedTailWrapped); Bool sIsFull = (sCrossedHead == tail) && (sCrossedHeadWrapped != tail_wrapped); // Next head and tail values i next_tail; i next_head; if (valueOf(w) == 0) begin next_tail = tail; next_head = head; end else begin next_tail = tail + 1; next_head = head + 1; end // For a store with latency, we need to prefetch the value // to satisfy first() in the next cycle. When the deq() // method is called we use the next head value, otherwise we // re-fetch the current head value. PulseWire deq_happened <- mkPulseWire(clocked_by dClock, reset_by dReset); if (latency != 0) begin Wire#(i) old_head <- mkBypassWire(clocked_by dClock, reset_by dReset); // In dClock domain (before deq()) rule save_old_head; old_head <= head; endrule // In dClock domain (after deq()) rule do_fetch; store.prefetch(deq_happened ? (old_head + 1) : old_head); endrule end // When either side goes into reset, both sInReset and dInReset // will become True. If only 1 reset is actually asserted, we have // a rule here which will force the other to assume its reset value // too. This ensures that the FIFO resets correctly even if only // 1 of its 2 resets is asserted. // In sClock domain rule enq_update_tail (!sInReset && enq_pw && !sClear_pw); tail <= next_tail; if (next_tail == 0) tail_wrapped <= !tail_wrapped; endrule // In sClock domain rule sClear_update_tail (!sInReset && sClear_pw); tail <= sCrossedHead; tail_wrapped <= sCrossedHeadWrapped; endrule // In sClock domain rule reset_tail if (sInReset); tail <= 0; tail_wrapped <= False; endrule // In dClock domain rule deq_update_head (!dInReset && deq_pw && !dClear_pw); head <= next_head; if (next_head == 0) head_wrapped <= !head_wrapped; endrule // In dClock domain rule dClear_update_head (!dInReset && dClear_pw); head <= dCrossedTail; head_wrapped <= dCrossedTailWrapped; endrule // In dClock domain rule reset_head if (dInReset); head <= 0; head_wrapped <= False; endrule // Interface methods // In sClock domain method Action enq(a x) if ((!sIsFull && !sInReset && allow_enq) || ugenq); enq_pw.send; store.write(tail,x); endmethod: enq // In dClock domain method a first if ((!dIsEmpty && !dInReset) || ugdeq); let result = ?; if (latency == 0) result = store.read(head); else result = store.read(?); return result; endmethod: first // In dClock domain method Action deq() if ((!dIsEmpty && !dInReset && allow_deq) || ugdeq); deq_pw.send; if (latency != 0) deq_happened.send(); endmethod: deq // Full/Empty methods method Bool dNotFull = !dIsFull; method Bool dNotEmpty = !dIsEmpty; method Bool sNotFull = !sIsFull; method Bool sNotEmpty = !sIsEmpty; // Clear FIFO from destination domain method Action dClear() if (allow_deq && !dInReset); dClear_pw.send; endmethod: dClear // Clear FIFO from source domain method Action sClear() if (allow_enq && !sInReset); sClear_pw.send; endmethod: sClear endmodule: mkUGAlignedFIFO (* no_default_clock, no_default_reset *) module mkAlignedFIFO( Clock sClock , Reset sReset , Clock dClock , Reset dReset , Store#(i,a,n) store , Bool allow_enq , Bool allow_deq , AlignedFIFO#(a) ifc ) provisos( Bits#(a,sz_a), Bits#(i,w), Eq#(i), Arith#(i) ); // Check the latency of the store Integer latency = valueOf(n); if ((latency < 0) || (latency > 1)) errorM("mkAlignedFIFO expects a store with either 0 or 1 cycles of latency"); // Just use mkUGAlignedFIFO with both sides guarded let _fifo <- mkUGAlignedFIFO(False,False,sClock,sReset,dClock,dReset,store,allow_enq,allow_deq); return _fifo; endmodule: mkAlignedFIFO // Typeclass instances instance ToGet#(AlignedFIFO#(t), t); function Get#(t) toGet(AlignedFIFO#(t) fifo); return (interface Get; method ActionValue#(t) get(); fifo.deq(); return fifo.first(); endmethod endinterface); endfunction endinstance instance ToPut#(AlignedFIFO#(t), t); function Put#(t) toPut(AlignedFIFO#(t) fifo); return (interface Put; method Action put(t val) = fifo.enq(val); endinterface); endfunction endinstance endpackage: AlignedFIFOs