/*=================================================================== 'Mult_16bit_signed' -- signed 16-bit sequential multiplier Overview ======== This module implements a 16-bit sequential multiplier using Booth's algorithm (see p. 259 of "Computer Organization and Design" by Hennessy and Patterson, Morgan Kaufmann Publishers (San Francisco), 1998). The multipler requires thirty-three clock cycles to produce a result. While this number is about six times as long as the 16-bit multiplier produced by Xilinx Core Generator, it is also about 3.5 times smaller (60 CLBs vs. 220 CLBs). 16-bit values in 2's complement format are applied to the X and Y inputs. The inputs are also considered to represent fractional values in the range -1 to +1 (minus a tad). More specifically, the 16-bit value 8000 (-32,768 decimal) denotes -1.00000 (= -32,768 / 2^15) and the value 7FFF (+32,767 decimal) denotes +0.99997 (= +32,767 / 2^15). The decimal point is effectively shifted 15 places to the right, so the multiplication looks like this: x.xxx_xxxx_xxxx_xxxx X y.yyy_yyyy_yyyy_yyyy -------------------- zz.zz_zzzz_zzzz_zzzz_zzzz_zzzz_zzzz_zzzz ^^^^^^^^^^^^^^^^^^^^ multiplier output The decimal point is shifted one more place to the right in the product, so the final 16-bit output Z is taken from the product as shown above. The multiplier conforms to the "distributed control" timing model for cascaded computation: 1. If either input data "freshness" signal deasserts (goes low), then the multiplier will deassert its output data freshness signal and wait for fresh input data. 2. Once fresh input data is indicated (both input freshness signals asserted), the multiplier will compute the product. The Z output will change during this time. 3. Once the product is ready, the output freshness signal will be asserted and the multiplier will return to its idle state. 4. Input data freshness signals are internally synchronized to the falling edge of the clock. Therefore, it would be best if the input freshness signals transition on the positive edge of the clock. Instantiation Examples ====================== Example 1 -- Use multiplier as a sign inverter, i.e., Z = -X: Mult_16bit_signed U1 (.I$Clock(...), .I$Reset(...), // Active high .I$X(...), // X-input data bus .I$Y(16'h8000), // Y-input data bus .O$Z(...), // Z-output data bus .I$XValid(...), // X-input data is current; active high .I$YValid(1'b1), // Y-input data is current; active high .O$ZValid(...) // Z-output data is current; active high ); Verification Results ==================== The multiplier has been verified for the following extremes of the input data range: X Y Z Comments ---- ---- ---- ------- 7FFF 7FFF 7FFE +1 X +1 = +1 7FFF 8000 8001 +1 X -1 = -1 8000 7FFF 8001 -1 X +1 = -1 8000 8001 7FFF -1 X -1 = +1 ===================================================================*/ module Mult_16bit_signed (I$Clock, I$Reset, // Active high I$X, // X-input data bus I$Y, // Y-input data bus O$Z, // Z-output data bus I$XValid, // X-input data is current; active high I$YValid, // Y-input data is current; active high O$ZValid // Z-output data is current; active high ); parameter p$N = 16; // Data bus width (must be even) parameter p$Log2ofN = 4; // ceil(log2(N)) //NOTE: Some portions of code are hardwired to 16 bits // Port mode declarations input I$Clock; input I$Reset; input [p$N-1:0] I$X; input [p$N-1:0] I$Y; output [p$N-1:0] O$Z; input I$XValid; input I$YValid; output O$ZValid; // State definitions parameter p$NumStateBits = 5; parameter p$Idle = 5'b0_0001; parameter p$Wait = 5'b0_0010; parameter p$Init = 5'b0_0100; parameter p$TestBits = 5'b0_1000; parameter p$ShiftBits = 5'b1_0000; parameter p$Idle_case = 5'b?_???1; parameter p$Wait_case = 5'b?_??1?; parameter p$Init_case = 5'b?_?1??; parameter p$TestBits_case = 5'b?_1???; parameter p$ShiftBits_case = 5'b1_????; // Registered variable declarations reg [p$NumStateBits-1:0] r$State; // State register reg [p$NumStateBits-1:0] r$NextState; // Next-state decoder output reg r$XValid; reg r$YValid; reg O$ZValid; reg r$Initialize; reg r$AddMultiplicand; reg r$SubtractMultiplicand; reg r$ShiftProduct; reg r$DecrementCount; reg [p$Log2ofN:0] r$Count; // Keeps track of the number of shift // operations. reg [2*p$N+1:0] r$Product; // 2N bits wide, plus an additional bit on // either side of the product register. // The LSB serves as the rightmost test bit // (see details of Booth algorithm), and the // MSB provides enough dynamic range to // avoid overflow problems during the add/ // subtract operations. // Next-state decoder always @ (r$State or r$XValid or r$YValid or r$Product or r$Count) begin // Nominal outputs O$ZValid <= 0; r$Initialize <= 0; r$AddMultiplicand <= 0; r$SubtractMultiplicand <= 0; r$ShiftProduct <= 0; r$DecrementCount <= 0; r$NextState <= p$Idle; // Decoder outputs casez (r$State) // Wait here as long as input data is fresh on both inputs p$Idle_case: begin O$ZValid <= 1; r$NextState <= (r$XValid & r$YValid) ? p$Idle : p$Wait; end // Wait for data on both inputs to be updated p$Wait_case: begin r$NextState <= !(r$XValid & r$YValid) ? p$Wait : p$Init; end // Initialize registers p$Init_case: begin r$Initialize <= 1; r$NextState <= p$TestBits; end // Test lower two bits of product register p$TestBits_case: begin case (r$Product[1:0]) 2'b00: begin end 2'b01: begin r$AddMultiplicand <= 1; end 2'b10: begin r$SubtractMultiplicand <= 1; end 2'b11: begin end endcase r$NextState <= p$ShiftBits; end // Arithmetic right shift on the product register, and // decrement count p$ShiftBits_case: begin r$ShiftProduct <= 1; r$DecrementCount <= 1; r$NextState <= (r$Count==0) ? p$Idle : p$TestBits; end default: r$NextState <= p$Idle; endcase end // Next state register always @ (posedge I$Clock or posedge I$Reset) begin if (I$Reset == 1) r$State <= p$Idle; else r$State <= r$NextState; end // Data path elements controlled by state machine always @ (negedge I$Clock or posedge I$Reset) begin if (I$Reset == 1) begin r$XValid <= 1; r$YValid <= 1; r$Count <= 5'h00; r$Product <= 34'h000000000; end else begin if (r$Initialize) begin r$Count <= p$N; r$Product[2*p$N+1:p$N+1] <= 17'h0000; r$Product[p$N:1] <= I$Y; r$Product[0] <= 1'b0; end r$XValid <= I$XValid; // sync asynchronous inputs r$YValid <= I$YValid; if (r$AddMultiplicand) r$Product[2*p$N+1:p$N+1] <= r$Product[2*p$N+1:p$N+1] + {I$X[p$N-1], I$X}; if (r$SubtractMultiplicand) r$Product[2*p$N+1:p$N+1] <= r$Product[2*p$N+1:p$N+1] - {I$X[p$N-1], I$X}; if (r$ShiftProduct) r$Product[2*p$N:0] <= r$Product[2*p$N+1:1]; if (r$DecrementCount) r$Count <= r$Count - 1; end end // Connect most-significant word of product register to output assign O$Z = r$Product[2*p$N-1:p$N]; endmodule