// FIR filters
//
// ROM init file:	REQUIRED, with 256 or 512 coefficients.  See below.
// Number of taps:	NTAPS.
// Input bits:		18 fixed.
// Output bits:		OBITS, default 24.
// Adder bits:		ABITS, default 24.


module firX8R8 (		// FIR filter decimate by 8
// This requires eight MifFile's.
// Maximum NTAPS is 8 * (previous and current decimation) less overhead.
// Maximum NTAPS is 2048 (or less).
	input clock,
	input x_avail,							// new sample is available
	input signed [MBITS-1:0] x_real,		// x is the sample input
	input signed [MBITS-1:0] x_imag,
	output reg y_avail,						// new output is available
	output wire signed [OBITS-1:0] y_real,	// y is the filtered output
	output wire signed [OBITS-1:0] y_imag);
	
	localparam ADDRBITS	= 8;		// Address bits for 18/36 X 256 rom/ram blocks
	localparam MBITS	= 18;		// multiplier bits == input bits
	
	parameter ABITS		= 24;		// adder bits
	parameter OBITS		= 24;		// output bits
	parameter NTAPS		= 976;		// number of filter taps, even by 8
	
	reg [4:0] wstate;	// state machine for write samples
	parameter sWaitA		= 0;
	parameter sWaitB		= 2;
	parameter sWaitC		= 4;
	parameter sWaitD		= 6;
	parameter sWaitE		= 8;
	parameter sWaitF		= 10;
	parameter sWaitG		= 12;
	parameter sWaitH		= 14;
	parameter sWriteH		= 15;
	parameter sAddrH		= 16;
	
	reg  [ADDRBITS-1:0] waddr;		// write sample memory address
	reg weA, weB, weC, weD, weE, weF, weG, weH;
	reg  signed [ABITS-1:0] Racc, Iacc;
	wire signed [ABITS-1:0] RaccA, RaccB, RaccC, RaccD, RaccE, RaccF, RaccG, RaccH;
	wire signed [ABITS-1:0] IaccA, IaccB, IaccC, IaccD, IaccE, IaccF, IaccG, IaccH;

	assign y_real = Racc[ABITS-1 -: OBITS];
	assign y_imag = Iacc[ABITS-1 -: OBITS];

	initial
	begin
		wstate = sWaitA;
		waddr = 0;
		weA = 0;
		weB = 0;
		weC = 0;
		weD = 0;
		weE = 0;
		weF = 0;
		weG = 0;
		weH = 0;
	end
	
	always @(posedge clock)
	begin
		case (wstate)
			sWaitA:
			begin		// wait for the first x input
				if (x_avail)
				begin
					wstate <= sWaitB;
					weA <= 1'd1;		// write new sample to memory
					Racc <= RaccA;		// save accumulators
					Iacc <= IaccA;
				end
			end
			sWaitB:
			begin		// wait for the next x input
				weA <= 1'd0;
				if (x_avail)
				begin
					wstate <= sWaitC;
					weB <= 1'd1;
					Racc <= Racc + RaccB;		// add accumulators
					Iacc <= Iacc + IaccB;
				end
			end
			sWaitC:
			begin
				weB <= 1'd0;
				if (x_avail)
				begin
					wstate <= sWaitD;
					weC <= 1'd1;
					Racc <= Racc + RaccC;		// add accumulators
					Iacc <= Iacc + IaccC;
				end
			end
			sWaitD:
			begin
				weC <= 1'd0;
				if (x_avail)
				begin
					wstate <= sWaitE;
					weD <= 1'd1;
					Racc <= Racc + RaccD;		// add accumulators
					Iacc <= Iacc + IaccD;
				end
			end
			sWaitE:
			begin
				weD <= 1'd0;
				if (x_avail)
				begin
					wstate <= sWaitF;
					weE <= 1'd1;
					Racc <= Racc + RaccE;		// add accumulators
					Iacc <= Iacc + IaccE;
				end
			end
			sWaitF:
			begin
				weE <= 1'd0;
				if (x_avail)
				begin
					wstate <= sWaitG;
					weF <= 1'd1;
					Racc <= Racc + RaccF;		// add accumulators
					Iacc <= Iacc + IaccF;
				end
			end
			sWaitG:
			begin
				weF <= 1'd0;
				if (x_avail)
				begin
					wstate <= sWaitH;
					weG <= 1'd1;
					Racc <= Racc + RaccG;		// add accumulators
					Iacc <= Iacc + IaccG;
				end
			end
			sWaitH:
			begin		// wait for the last x input
				weG <= 1'd0;
				if (x_avail)
				begin
					wstate <= sWriteH;
					weH <= 1'd1;		// write new sample to memory
					Racc <= Racc + RaccH;
					Iacc <= Iacc + IaccH;
				end
			end
			sWriteH:
			begin
				wstate <= sAddrH;
				weH <= 1'd0;
				y_avail <= 1'd1;		// output a sample
			end
			sAddrH:
			begin
				wstate <= sWaitA;
				y_avail <= 1'd0;
				waddr <= waddr + 1'd1;
			end
		endcase
	end
	
	parameter TAPS = NTAPS / 8;		// Must be even by 8
	
	defparam A.MifFile = "coefL8A.mif", A.ABITS = ABITS, A.NTAPS = TAPS;
	defparam B.MifFile = "coefL8B.mif", B.ABITS = ABITS, B.NTAPS = TAPS;
	defparam C.MifFile = "coefL8C.mif", C.ABITS = ABITS, C.NTAPS = TAPS;
	defparam D.MifFile = "coefL8D.mif", D.ABITS = ABITS, D.NTAPS = TAPS;
	defparam E.MifFile = "coefL8E.mif", E.ABITS = ABITS, E.NTAPS = TAPS;
	defparam F.MifFile = "coefL8F.mif", F.ABITS = ABITS, F.NTAPS = TAPS;
	defparam G.MifFile = "coefL8G.mif", G.ABITS = ABITS, G.NTAPS = TAPS;
	defparam H.MifFile = "coefL8H.mif", H.ABITS = ABITS, H.NTAPS = TAPS;
	fir256 A (clock, waddr, weA, x_real, x_imag, RaccA, IaccA);
	fir256 B (clock, waddr, weB, x_real, x_imag, RaccB, IaccB);
	fir256 C (clock, waddr, weC, x_real, x_imag, RaccC, IaccC);
	fir256 D (clock, waddr, weD, x_real, x_imag, RaccD, IaccD);
	fir256 E (clock, waddr, weE, x_real, x_imag, RaccE, IaccE);
	fir256 F (clock, waddr, weF, x_real, x_imag, RaccF, IaccF);
	fir256 G (clock, waddr, weG, x_real, x_imag, RaccG, IaccG);
	fir256 H (clock, waddr, weH, x_real, x_imag, RaccH, IaccH);
endmodule

module fir256(		// FIR filter; up to 256 taps
	input clock,
	input [ADDRBITS-1:0] waddr,			// memory write address
	input we,							// memory write enable
	input signed [MBITS-1:0] x_real,	// sample to write
	input signed [MBITS-1:0] x_imag,
	output reg signed [ABITS-1:0] Raccum,	// accumulator output
	output reg signed [ABITS-1:0] Iaccum
);
// This filter waits until a new sample is written to memory at waddr.  Then
// it starts by multiplying that sample by coef[0], the next prior sample
// by coef[1], (etc.) and accumulating.  For R=8 decimation, coef[1] is the
// coeficient 8 prior to coef[0].
	localparam ADDRBITS	= 8;		// Address bits for 18/36 X 256 rom/ram blocks
	localparam MBITS	= 18;		// multiplier bits == input bits
	
	// These must be defined above
	parameter MifFile	= "xx.mif";	// ROM coefficients
	parameter ABITS		= 0;		// adder bits
	parameter NTAPS		= 0;		// number of filter taps, max 2**ADDRBITS

	reg [ADDRBITS-1:0] raddr, caddr;			// read address for sample and coef
	wire [MBITS*2-1:0] q;						// I/Q sample read from memory
	reg  [MBITS*2-1:0] reg_q;
	wire signed [MBITS-1:0] q_real, q_imag;		// I/Q sample read from memory
	wire signed [MBITS-1:0] coef;				// coefficient read from memory
	reg  signed [MBITS-1:0] reg_coef;
	reg signed [MBITS*2-1:0] Rmult, Imult;		// multiplier result
	reg [ADDRBITS:0] counter;					// count NTAPS samples + latency

	assign q_real = reg_q[MBITS*2-1:MBITS];
	assign q_imag = reg_q[MBITS-1:0];

	reg [2:0] rstate;				// state machine
	parameter rWait		= 0;
	parameter rAddr		= 1;
	parameter rAddrA	= 2;
	parameter rAddrB	= 3;
	parameter rRun		= 4;
	parameter rEnd		= 5;
	
	defparam rom.MifFile = MifFile;
	firromH rom (caddr, clock, coef);	// coefficient ROM 18 X 256
	// sample RAM 36 X 256;  36 bit == 18 bits I and 18 bits Q
	firram36 ram (clock, {x_real, x_imag}, raddr, waddr, we, q);
	
	initial
	begin
		rstate = rWait;
	end
	
	task next_addr;		// increment address and register the next sample
		raddr <= raddr - 1'd1;		// move to prior sample
		caddr <= caddr + 1'd1;		// move to next coefficient
		reg_q <= q;
		reg_coef <= coef;
	endtask
	
	always @(posedge clock)
	begin
		case (rstate)
			rWait:
			begin
				if (we)		// Wait until a new sample is written to memory
				begin
					rstate <= rAddr;
					counter <= NTAPS[ADDRBITS:0] + 1'd1;	// count samples and pipeline latency
					raddr <= waddr;		// read address -> newest sample
					caddr <= 1'd0;		// start at coefficient zero
					Raccum <= 1'd0;
					Iaccum <= 1'd0;
					Rmult <= 1'd0;
					Imult <= 1'd0;
				end
			end
			rAddr:	// prime the memory pipeline
			begin
				rstate <= rAddrA;
				next_addr;
			end
			rAddrA:
			begin
				rstate <= rAddrB;
				next_addr;
			end
			rAddrB:
			begin
				rstate <= rRun;
				next_addr;
			end
			rRun:
			begin		// main pipeline here
				next_addr;
				Rmult <= q_real * reg_coef;
				Imult <= q_imag * reg_coef;
				Raccum <= Raccum + Rmult[MBITS*2-1 -: ABITS];
				Iaccum <= Iaccum + Imult[MBITS*2-1 -: ABITS];
				counter <= counter - 1'd1;
				if (counter == 0)
					rstate <= rWait;
			end
		endcase
	end
endmodule
