blob: c3d167fa944c7960d54d65de10e8c94b76395d0a [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies. */
#include <linux/refcount.h>
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/rtnetlink.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <net/netevent.h>
#include "neigh.h"
#include "tc.h"
#include "en_rep.h"
#include "fs_core.h"
#include "diag/en_rep_tracepoint.h"
static unsigned long mlx5e_rep_ipv6_interval(void)
{
if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
return ~0UL;
}
static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
{
unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
}
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
mlx5_fc_queue_stats_work(priv->mdev,
&neigh_update->neigh_stats_work,
neigh_update->min_interval);
}
static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
{
return refcount_inc_not_zero(&nhe->refcnt);
}
static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
{
if (refcount_dec_and_test(&nhe->refcnt)) {
mlx5e_rep_neigh_entry_remove(nhe);
kfree_rcu(nhe, rcu);
}
}
static struct mlx5e_neigh_hash_entry *
mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_neigh_hash_entry *next = NULL;
rcu_read_lock();
for (next = nhe ?
list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
&nhe->neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list) :
list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list);
next;
next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
&next->neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list))
if (mlx5e_rep_neigh_entry_hold(next))
break;
rcu_read_unlock();
if (nhe)
mlx5e_rep_neigh_entry_release(nhe);
return next;
}
static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
{
struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
neigh_update.neigh_stats_work.work);
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_neigh_hash_entry *nhe = NULL;
rtnl_lock();
if (!list_empty(&rpriv->neigh_update.neigh_list))
mlx5e_rep_queue_neigh_stats_work(priv);
while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
mlx5e_tc_update_neigh_used_value(nhe);
rtnl_unlock();
}
static void mlx5e_rep_neigh_update(struct work_struct *work)
{
struct mlx5e_neigh_hash_entry *nhe =
container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
struct neighbour *n = nhe->n;
struct mlx5e_encap_entry *e;
unsigned char ha[ETH_ALEN];
struct mlx5e_priv *priv;
bool neigh_connected;
u8 nud_state, dead;
rtnl_lock();
/* If these parameters are changed after we release the lock,
* we'll receive another event letting us know about it.
* We use this lock to avoid inconsistency between the neigh validity
* and it's hw address.
*/
read_lock_bh(&n->lock);
memcpy(ha, n->ha, ETH_ALEN);
nud_state = n->nud_state;
dead = n->dead;
read_unlock_bh(&n->lock);
neigh_connected = (nud_state & NUD_VALID) && !dead;
trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
list_for_each_entry(e, &nhe->encap_list, encap_list) {
if (!mlx5e_encap_take(e))
continue;
priv = netdev_priv(e->out_dev);
mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
mlx5e_encap_put(priv, e);
}
mlx5e_rep_neigh_entry_release(nhe);
rtnl_unlock();
neigh_release(n);
}
static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry *nhe,
struct neighbour *n)
{
/* Take a reference to ensure the neighbour and mlx5 encap
* entry won't be destructed until we drop the reference in
* delayed work.
*/
neigh_hold(n);
/* This assignment is valid as long as the the neigh reference
* is taken
*/
nhe->n = n;
if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
mlx5e_rep_neigh_entry_release(nhe);
neigh_release(n);
}
}
static int mlx5e_rep_netevent_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
neigh_update.netevent_nb);
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_neigh_hash_entry *nhe = NULL;
struct mlx5e_neigh m_neigh = {};
struct neigh_parms *p;
struct neighbour *n;
bool found = false;
switch (event) {
case NETEVENT_NEIGH_UPDATE:
n = ptr;
#if IS_ENABLED(CONFIG_IPV6)
if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
#else
if (n->tbl != &arp_tbl)
#endif
return NOTIFY_DONE;
m_neigh.dev = n->dev;
m_neigh.family = n->ops->family;
memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
rcu_read_lock();
nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
rcu_read_unlock();
if (!nhe)
return NOTIFY_DONE;
mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
break;
case NETEVENT_DELAY_PROBE_TIME_UPDATE:
p = ptr;
/* We check the device is present since we don't care about
* changes in the default table, we only care about changes
* done per device delay prob time parameter.
*/
#if IS_ENABLED(CONFIG_IPV6)
if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
#else
if (!p->dev || p->tbl != &arp_tbl)
#endif
return NOTIFY_DONE;
rcu_read_lock();
list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
neigh_list) {
if (p->dev == nhe->m_neigh.dev) {
found = true;
break;
}
}
rcu_read_unlock();
if (!found)
return NOTIFY_DONE;
neigh_update->min_interval = min_t(unsigned long,
NEIGH_VAR(p, DELAY_PROBE_TIME),
neigh_update->min_interval);
mlx5_fc_update_sampling_interval(priv->mdev,
neigh_update->min_interval);
break;
}
return NOTIFY_DONE;
}
static const struct rhashtable_params mlx5e_neigh_ht_params = {
.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
.key_len = sizeof(struct mlx5e_neigh),
.automatic_shrinking = true,
};
int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
{
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
int err;
err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
if (err)
return err;
INIT_LIST_HEAD(&neigh_update->neigh_list);
mutex_init(&neigh_update->encap_lock);
INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
mlx5e_rep_neigh_stats_work);
mlx5e_rep_neigh_update_init_interval(rpriv);
rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
if (err)
goto out_err;
return 0;
out_err:
rhashtable_destroy(&neigh_update->neigh_ht);
return err;
}
void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
{
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
unregister_netevent_notifier(&neigh_update->netevent_nb);
flush_workqueue(priv->wq); /* flush neigh update works */
cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
mutex_destroy(&neigh_update->encap_lock);
rhashtable_destroy(&neigh_update->neigh_ht);
}
static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
int err;
err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
&nhe->rhash_node,
mlx5e_neigh_ht_params);
if (err)
return err;
list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
return err;
}
static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
mutex_lock(&rpriv->neigh_update.encap_lock);
list_del_rcu(&nhe->neigh_list);
rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
&nhe->rhash_node,
mlx5e_neigh_ht_params);
mutex_unlock(&rpriv->neigh_update.encap_lock);
}
/* This function must only be called under the representor's encap_lock or
* inside rcu read lock section.
*/
struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct mlx5e_neigh_hash_entry *nhe;
nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
mlx5e_neigh_ht_params);
return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
}
int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e,
struct mlx5e_neigh_hash_entry **nhe)
{
int err;
*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
if (!*nhe)
return -ENOMEM;
(*nhe)->priv = priv;
memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
spin_lock_init(&(*nhe)->encap_list_lock);
INIT_LIST_HEAD(&(*nhe)->encap_list);
refcount_set(&(*nhe)->refcnt, 1);
err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
if (err)
goto out_free;
return 0;
out_free:
kfree(*nhe);
return err;
}