// SPDX-License-Identifier: GPL-2.0
/****************************************************************************
 * Driver for Xilinx network controllers and boards
 * Copyright 2021 Xilinx Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation, incorporated herein by reference.
 */

#include <linux/highmem.h>
#include "efct_tx.h"
#include "efct_reg.h"
#include "efct_io.h"
#include "efct_ptp.h"
#ifdef CONFIG_XILINX_EFCT_TRACING
#include <trace/events/xilinx_efct.h>
#endif

/* Transmit header size in bytes */
#define EFCT_TX_HEADER_BYTES (ESE_HZ_XN_CTPIO_HDR_STRUCT_SIZE / 8)

/* Tx packet alignment */
#define EFCT_TX_PACKET_ALIGN 64

#define EFCT_PKT_DATA_WRITE_ALIGN 8

/* Minimum frame alignment */
#define EFCT_MIN_FRAME_ALIGN 32

static size_t txq_available(struct efct_tx_queue *txq)
{
	/* Make sure least EFCT_TX_PACKET_ALIGN are left extra so FIFO is not overflown */
	return (txq->fifo_size - EFCT_TX_PACKET_ALIGN) - atomic_read(&txq->inuse_fifo_bytes);
}

static void txq_may_restart(struct efct_tx_queue *txq)
{
	if ((txq_available(txq) >
	     ALIGN((txq->efct->mtu + ETH_HLEN + VLAN_HLEN + EFCT_TX_HEADER_BYTES),
		   EFCT_TX_PACKET_ALIGN)) &&
	    atomic_read(&txq->inflight_pkts) < (MAX_TX_BUFFERS - 1))
		netif_tx_wake_queue(txq->core_txq);
}

static void txq_may_stop(struct efct_tx_queue *txq)
{
	if ((likely(txq_available(txq) >
		ALIGN((txq->efct->mtu + ETH_HLEN + VLAN_HLEN + EFCT_TX_HEADER_BYTES),
		      EFCT_TX_PACKET_ALIGN))) &&
		(atomic_read(&txq->inflight_pkts) < (MAX_TX_BUFFERS - 1)))
		return;

	netif_tx_stop_queue(txq->core_txq);
	txq->n_tx_stop_queue++;
}

static bool txq_can_transmit(struct efct_tx_queue *txq, size_t len)
{
	return (txq_available(txq) > ALIGN((len + EFCT_TX_HEADER_BYTES), EFCT_TX_PACKET_ALIGN));
}

struct efct_short_copy_buffer {
	int used;
	u8 buf[64];
};

static uint64_t efct_tx_header(u16 pkt_len, u8 ct_thresh,
			       u8 ts_flag, u8 warm_flag, u8 action)
{
	union efct_qword hdr;

	EFCT_POPULATE_QWORD_5(hdr,
			      ESF_HZ_CTPIO_HDR_PACKET_LENGTH, pkt_len,
			     ESF_HZ_CTPIO_HDR_CT_THRESH, ct_thresh,
			     ESF_HZ_CTPIO_HDR_TIMESTAMP_FLAG, ts_flag,
			     ESF_HZ_CTPIO_HDR_WARM_FLAG, warm_flag,
			     ESF_HZ_CTPIO_HDR_ACTION, action);

	return le64_to_cpu(hdr.u64[0]);
}

static uint64_t efct_tx_pkt_header(u16 pkt_len, u8 ct_thresh, bool ts_flag)
{
	return efct_tx_header(pkt_len, ct_thresh, ts_flag, 0, 0);
}

/* Copy in explicit 64-bit writes. */
static void txq_piobuf_w64(struct efct_tx_queue *txq, u64 val)
{
	u64 __iomem *dest64 = txq->piobuf + txq->piobuf_offset;

	writeq(val, dest64);
	++txq->piobuf_offset;

	if (unlikely(txq->piobuf_offset == txq->aperture_qword))
		txq->piobuf_offset = 0;
}

#define ER_HZ_PORT0_REG_HOST_NOOP_WRITE  0x12004
static void txq_noop_write(struct efct_ev_queue *eventq)
{
	_efct_writed(cpu_to_le32(1), (eventq->efct->membase + ER_HZ_PORT0_REG_HOST_NOOP_WRITE +
			(eventq->index * eventq->efct->efct_dev->params.evq_stride)));
}

static void txq_piobuf_pad64(struct efct_tx_queue *txq, int skb_len)
{
	size_t len;
	size_t l64;
	size_t i;

	len = EFCT_TX_PACKET_ALIGN - ((ALIGN(skb_len + EFCT_TX_HEADER_BYTES,
				       EFCT_PKT_DATA_WRITE_ALIGN)) % EFCT_TX_PACKET_ALIGN);
	l64 = (len % EFCT_TX_PACKET_ALIGN) / 8;
	for (i = 0; i < l64; i++)
		txq_piobuf_w64(txq, 0L);
#ifdef __x86_64__
	__asm__ __volatile__ ("sfence");
#endif
	txq_noop_write(&txq->efct->evq[txq->evq_index]);
}

static void txq_piobuf_wblock(struct efct_tx_queue *txq, void *src, size_t len)
{
	size_t l64 = len / EFCT_PKT_DATA_WRITE_ALIGN;
	u64 *src64 = src;
	size_t i;

	WARN_ON_ONCE((len % EFCT_PKT_DATA_WRITE_ALIGN) != 0);

	for (i = 0; i < l64; i++)
		txq_piobuf_w64(txq, src64[i]);
}

/*Copy data from src to piobuffer and remaining unaligned bytes to copy_buf*/

static void txq_copyto_piobuf(struct efct_tx_queue *txq, void *src, size_t len,
			      struct efct_short_copy_buffer *copy_buf)
{
	size_t block_len = len & ~(sizeof(copy_buf->buf) - 1);

	txq_piobuf_wblock(txq, src, block_len);
	len -= block_len;

	if (len) {
		src = (u8 *)src + block_len;
		memcpy(copy_buf->buf, src, len);
		copy_buf->used = len;
	}
}

/*Copy data from copy_buf then src to piobuf and remaining unaligned bytes to copy_buf*/
static void txq_copyto_piobuf_cb(struct efct_tx_queue *txq, void *src, size_t len,
				 struct efct_short_copy_buffer *copy_buf)
{
	if (copy_buf->used) {
		/* If copy buffer is partially filled, fill it up and write */
		int fill_bytes =
			min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len);

		memcpy(copy_buf->buf + copy_buf->used, src, fill_bytes);
		copy_buf->used += fill_bytes;

		if (copy_buf->used < sizeof(copy_buf->buf))
			return;

		txq_piobuf_wblock(txq, copy_buf->buf, sizeof(copy_buf->buf));
		src = (u8 *)src + fill_bytes;
		len -= fill_bytes;
		copy_buf->used = 0;
	}

	txq_copyto_piobuf(txq, src, len, copy_buf);
}

static void txq_copy_skb_frags(struct efct_tx_queue *txq, struct sk_buff *skb)
{
	struct efct_short_copy_buffer copy_buf;
	int i;

	copy_buf.used = 0;

	/* Copy skb header */
	txq_copyto_piobuf(txq, skb->data, skb_headlen(skb), &copy_buf);

	/* Copy fragments */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
		struct page *p;
		u32 p_off;
		u8 *vaddr;
		/* We need to adjust page and offset in case offset points to somewhere
		 * in-between page + n. Case of skb->len > PAGE_SIZE is not handled
		 * since MTU < PAGE_SIZE
		 */
		p = skb_frag_page(f) + (skb_frag_off(f) >> PAGE_SHIFT);
		p_off = (skb_frag_off(f)) & (PAGE_SIZE - 1);
		vaddr = kmap_local_page(p);
		txq_copyto_piobuf_cb(txq, vaddr + p_off, skb_frag_size(f), &copy_buf);
		kunmap_local(vaddr);
	}

	if (copy_buf.used)
		txq_piobuf_wblock(txq, copy_buf.buf, ALIGN(copy_buf.used, 8));
}

int efct_enqueue_skb(struct efct_tx_queue *txq, struct sk_buff *skb, struct net_device *net_dev)
{
#ifdef CONFIG_XILINX_PTP
	struct efct_ptp_data *ptp;
#endif
	bool ts = false;
	u64 pkt_header;
	int skb_len;

	skb_len = skb->len;
	if (!txq_can_transmit(txq, skb_len)) {
		netif_err(txq->efct, probe, txq->efct->net_dev,
			  "Exceeding txq FIFO. skb len : %u In use FIFO Bytes : %u\n",
				skb_len, atomic_read(&txq->inuse_fifo_bytes));
		netif_tx_stop_queue(txq->core_txq);
		dev_kfree_skb_any(skb);
		return -EBUSY;
	}

	netdev_tx_sent_queue(txq->core_txq, skb_len);
	txq->tx_buffer[txq->added_sequence].skb = skb;
	++txq->added_sequence;

	atomic_add(ALIGN((skb_len + EFCT_TX_HEADER_BYTES), EFCT_TX_PACKET_ALIGN),
		   &txq->inuse_fifo_bytes);
	atomic_inc(&txq->inflight_pkts);

	txq_may_stop(txq);

#ifdef CONFIG_XILINX_PTP
	ptp = txq->efct->ptp_data;
	if (ptp->txtstamp && efct_xmit_with_hwtstamp(skb)) {
		ts = true;
		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
	}
#endif
	pkt_header = efct_tx_pkt_header(skb_len < EFCT_MIN_FRAME_ALIGN ?
			EFCT_MIN_FRAME_ALIGN : skb_len, txq->ct_thresh, ts);

#ifdef CONFIG_XILINX_EFCT_TRACING
	trace_xilinx_efct_transmit(skb, net_dev, txq, pkt_header);
#endif
	skb_tx_timestamp(skb);

	/* Write Header */
	txq_piobuf_w64(txq, pkt_header);
	/* Write packet data */
	if (skb_shinfo(skb)->nr_frags) {
		txq_copy_skb_frags(txq, skb);
		txq_piobuf_pad64(txq, skb_len);
	} else {
		/* Pad the write to 8 bytes align.
		 * We can do this because we know the skb_shared_info struct is
		 * after the source, and the destination buffer is big enough.
		 */
		BUILD_BUG_ON(EFCT_PKT_DATA_WRITE_ALIGN >
			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
		txq_piobuf_wblock(txq, skb->data, ALIGN(skb_len, EFCT_PKT_DATA_WRITE_ALIGN));
		txq_piobuf_pad64(txq, skb_len);
	}

	return 0;
}

void _efct_ev_tx(struct efct_tx_queue *txq, u8 seq, bool __always_unused ts_status, u64 partial_ts,
		 bool purge)
{
	u32 pkts = 0, bytes = 0;
	u8 process_pkt, cur_seq;
	struct sk_buff *skb;

	cur_seq = txq->completed_sequence;

	if (cur_seq > seq)
		process_pkt = MAX_TX_BUFFERS - cur_seq + seq;
	else
		process_pkt = seq - cur_seq;
	while (process_pkt) {
		//Being u8 type cur_seq will wrap itself on reaching MAX_TX_BUFFERS
		cur_seq += 1;
		skb = txq->tx_buffer[cur_seq].skb;
		if (unlikely(!skb))  {
			--process_pkt;
			netif_err(txq->efct, tx_err, txq->efct->net_dev, "Error: skb should not be null\n");
			continue;
		}
#ifdef CONFIG_XILINX_PTP
		if (ts_status)
			efct_include_ts_in_skb(txq, partial_ts, skb);
#endif
		pkts++;
		bytes += skb->len;

		atomic_sub(ALIGN((skb->len + EFCT_TX_HEADER_BYTES), EFCT_TX_PACKET_ALIGN),
			   &txq->inuse_fifo_bytes);
		atomic_dec(&txq->inflight_pkts);
		if (unlikely(purge))
			dev_kfree_skb_any(skb);
		else
			dev_consume_skb_any(skb);
		txq->tx_buffer[cur_seq].skb = NULL;
		--process_pkt;
	}

	/*stat for ethtool reporting*/
	txq->tx_packets += pkts;
	/*Used for BQL*/
	txq->pkts += pkts;
	txq->bytes += bytes;

	txq->completed_sequence = seq;
	if (unlikely(netif_tx_queue_stopped(txq->core_txq))) {
		txq_may_restart(txq);
		//TODO update relevant stats
	}
}

void efct_ev_tx(struct efct_tx_queue *txq, union efct_qword *p_event, bool purge)
{
	bool ts_status;
	u64 partial_ts;
	u8 seq;

	seq = EFCT_QWORD_FIELD(*p_event, ESF_HZ_EV_TXCMPL_SEQUENCE);
	ts_status = EFCT_QWORD_FIELD(*p_event, ESF_HZ_EV_TXCMPL_TIMESTAMP_STATUS);
	partial_ts = EFCT_QWORD_FIELD(*p_event, ESF_HZ_EV_TXCMPL_PARTIAL_TSTAMP);

	_efct_ev_tx(txq, seq, ts_status, partial_ts, purge);
}

