/*
   Unix SMB/CIFS implementation.

   Copyright (C) Stefan Metzmacher 2009

     ** NOTE! The following LGPL license applies to the tsocket
     ** library. This does NOT imply that all of Samba is released
     ** under the LGPL

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 3 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#include "replace.h"
#include "system/filesys.h"
#include "system/network.h"
#include "tsocket.h"
#include "tsocket_internal.h"
#include "lib/util/iov_buf.h"
#include "lib/util/blocking.h"
#include "lib/util/util_net.h"
#include "lib/util/samba_util.h"
#include "lib/async_req/async_sock.h"

static int tsocket_bsd_error_from_errno(int ret,
					int sys_errno,
					bool *retry)
{
	*retry = false;

	if (ret >= 0) {
		return 0;
	}

	if (ret != -1) {
		return EIO;
	}

	if (sys_errno == 0) {
		return EIO;
	}

	if (sys_errno == EINTR) {
		*retry = true;
		return sys_errno;
	}

	if (sys_errno == EINPROGRESS) {
		*retry = true;
		return sys_errno;
	}

	if (sys_errno == EAGAIN) {
		*retry = true;
		return sys_errno;
	}

	/* ENOMEM is retryable on Solaris/illumos, and possibly other systems. */
	if (sys_errno == ENOMEM) {
		*retry = true;
		return sys_errno;
	}

#ifdef EWOULDBLOCK
	if (sys_errno == EWOULDBLOCK) {
		*retry = true;
		return sys_errno;
	}
#endif

	return sys_errno;
}

static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
{
	int i;
	int sys_errno = 0;
	int fds[3];
	int num_fds = 0;

	int result;
	bool ok;

	if (fd == -1) {
		return -1;
	}

	/* first make a fd >= 3 */
	if (high_fd) {
		while (fd < 3) {
			fds[num_fds++] = fd;
			fd = dup(fd);
			if (fd == -1) {
				sys_errno = errno;
				break;
			}
		}
		for (i=0; i<num_fds; i++) {
			close(fds[i]);
		}
		if (fd == -1) {
			errno = sys_errno;
			return fd;
		}
	}

	result = set_blocking(fd, false);
	if (result == -1) {
		goto fail;
	}

	ok = smb_set_close_on_exec(fd);
	if (!ok) {
		goto fail;
	}

	return fd;

 fail:
	if (fd != -1) {
		sys_errno = errno;
		close(fd);
		errno = sys_errno;
	}
	return -1;
}

#ifdef HAVE_LINUX_RTNETLINK_H
/**
 * Get the amount of pending bytes from a netlink socket
 *
 * For some reason netlink sockets don't support querying the amount of pending
 * data via ioctl with FIONREAD, which is what we use in tsocket_bsd_pending()
 * below.
 *
 * We know we are on Linux as we're using netlink, which means we have a working
 * MSG_TRUNC flag to recvmsg() as well, so we use that together with MSG_PEEK.
 **/
static ssize_t tsocket_bsd_netlink_pending(int fd)
{
	struct iovec iov;
	struct msghdr msg;
	char buf[1];

	iov = (struct iovec) {
		.iov_base = buf,
		.iov_len = sizeof(buf)
	};

	msg = (struct msghdr) {
		.msg_iov = &iov,
		.msg_iovlen = 1
	};

	return recvmsg(fd, &msg, MSG_PEEK | MSG_TRUNC);
}
#else
static ssize_t tsocket_bsd_netlink_pending(int fd)
{
	errno = ENOSYS;
	return -1;
}
#endif

static ssize_t tsocket_bsd_pending(int fd)
{
	int ret;
	int value = 0;

	ret = ioctl(fd, FIONREAD, &value);
	if (ret == -1) {
		return ret;
	}

	if (ret != 0) {
		/* this should not be reached */
		errno = EIO;
		return -1;
	}

	if (value != 0) {
		return value;
	}

	return samba_socket_poll_or_sock_error(fd);
}

static const struct tsocket_address_ops tsocket_address_bsd_ops;

int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
				       const struct sockaddr *sa,
				       size_t sa_socklen,
				       struct tsocket_address **_addr,
				       const char *location)
{
	struct tsocket_address *addr;
	struct samba_sockaddr *bsda = NULL;

	if (sa_socklen < sizeof(sa->sa_family)) {
		errno = EINVAL;
		return -1;
	}

	switch (sa->sa_family) {
	case AF_UNIX:
		if (sa_socklen > sizeof(struct sockaddr_un)) {
			sa_socklen = sizeof(struct sockaddr_un);
		}
		break;
	case AF_INET:
		if (sa_socklen < sizeof(struct sockaddr_in)) {
			errno = EINVAL;
			return -1;
		}
		sa_socklen = sizeof(struct sockaddr_in);
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		if (sa_socklen < sizeof(struct sockaddr_in6)) {
			errno = EINVAL;
			return -1;
		}
		sa_socklen = sizeof(struct sockaddr_in6);
		break;
#endif
	default:
		errno = EAFNOSUPPORT;
		return -1;
	}

	if (sa_socklen > sizeof(struct sockaddr_storage)) {
		errno = EINVAL;
		return -1;
	}

	addr = tsocket_address_create(mem_ctx,
				      &tsocket_address_bsd_ops,
				      &bsda,
				      struct samba_sockaddr,
				      location);
	if (!addr) {
		errno = ENOMEM;
		return -1;
	}

	ZERO_STRUCTP(bsda);

	memcpy(&bsda->u.ss, sa, sa_socklen);

	bsda->sa_socklen = sa_socklen;
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
	bsda->u.sa.sa_len = bsda->sa_socklen;
#endif

	*_addr = addr;
	return 0;
}

int _tsocket_address_bsd_from_samba_sockaddr(TALLOC_CTX *mem_ctx,
					 const struct samba_sockaddr *xs_addr,
					 struct tsocket_address **t_addr,
					 const char *location)
{
	return _tsocket_address_bsd_from_sockaddr(mem_ctx,
						  &xs_addr->u.sa,
						  xs_addr->sa_socklen,
						  t_addr,
						  location);
}

ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
				     struct sockaddr *sa,
				     size_t sa_socklen)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);

	if (!bsda) {
		errno = EINVAL;
		return -1;
	}

	if (sa_socklen < bsda->sa_socklen) {
		errno = EINVAL;
		return -1;
	}

	if (sa_socklen > bsda->sa_socklen) {
		memset(sa, 0, sa_socklen);
		sa_socklen = bsda->sa_socklen;
	}

	memcpy(sa, &bsda->u.ss, sa_socklen);
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
	sa->sa_len = sa_socklen;
#endif
	return sa_socklen;
}

bool tsocket_address_is_inet(const struct tsocket_address *addr, const char *fam)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);

	if (!bsda) {
		return false;
	}

	switch (bsda->u.sa.sa_family) {
	case AF_INET:
		if (strcasecmp(fam, "ip") == 0) {
			return true;
		}

		if (strcasecmp(fam, "ipv4") == 0) {
			return true;
		}

		return false;
#ifdef HAVE_IPV6
	case AF_INET6:
		if (strcasecmp(fam, "ip") == 0) {
			return true;
		}

		if (strcasecmp(fam, "ipv6") == 0) {
			return true;
		}

		return false;
#endif
	}

	return false;
}

int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
				       const char *fam,
				       const char *addr,
				       uint16_t port,
				       struct tsocket_address **_addr,
				       const char *location)
{
	struct addrinfo hints;
	struct addrinfo *result = NULL;
	char port_str[6];
	int ret;

	ZERO_STRUCT(hints);
	/*
	 * we use SOCKET_STREAM here to get just one result
	 * back from getaddrinfo().
	 */
	hints.ai_socktype = SOCK_STREAM;
	hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;

	if (strcasecmp(fam, "ip") == 0) {
		hints.ai_family = AF_UNSPEC;
		if (!addr) {
#ifdef HAVE_IPV6
			addr = "::";
#else
			addr = "0.0.0.0";
#endif
		}
	} else if (strcasecmp(fam, "ipv4") == 0) {
		hints.ai_family = AF_INET;
		if (!addr) {
			addr = "0.0.0.0";
		}
#ifdef HAVE_IPV6
	} else if (strcasecmp(fam, "ipv6") == 0) {
		hints.ai_family = AF_INET6;
		if (!addr) {
			addr = "::";
		}
#endif
	} else {
		errno = EAFNOSUPPORT;
		return -1;
	}

	snprintf(port_str, sizeof(port_str), "%u", port);

	ret = getaddrinfo(addr, port_str, &hints, &result);
	if (ret != 0) {
		switch (ret) {
		case EAI_FAIL:
		case EAI_NONAME:
#ifdef EAI_ADDRFAMILY
		case EAI_ADDRFAMILY:
#endif
			errno = EINVAL;
			break;
		}
		ret = -1;
		goto done;
	}

	if (result->ai_socktype != SOCK_STREAM) {
		errno = EINVAL;
		ret = -1;
		goto done;
	}

	ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
						  result->ai_addr,
						  result->ai_addrlen,
						  _addr,
						  location);

done:
	if (result) {
		freeaddrinfo(result);
	}
	return ret;
}

int _tsocket_address_inet_from_hostport_strings(TALLOC_CTX *mem_ctx,
						const char *fam,
						const char *host_port_addr,
						uint16_t default_port,
						struct tsocket_address **_addr,
						const char *location)
{
	char *pl_sq = NULL;
	char *pr_sq = NULL;
	char *pl_period = NULL;
	char *port_sep = NULL;
	char *cport = NULL;
	char *buf = NULL;
	uint64_t port = 0;
	int ret;
	char *s_addr = NULL;
	uint16_t s_port = default_port;
	bool conv_ret;
	bool is_ipv6_by_squares = false;

	if (host_port_addr == NULL) {
		/* got straight to next function if host_port_addr is NULL */
		goto get_addr;
	}
	buf = talloc_strdup(mem_ctx, host_port_addr);
	if (buf == NULL) {
		errno = ENOMEM;
		return -1;
	}
	pl_period = strchr_m(buf, '.');
	port_sep = strrchr_m(buf, ':');
	pl_sq = strchr_m(buf, '[');
	pr_sq = strrchr_m(buf, ']');
	/* See if its IPv4 or IPv6 */
	/* Only parse IPv6 with squares with/without port, and IPv4 with port */
	/* Everything else, let tsocket_address_inet_from string() */
	/* find parsing errors */
#ifdef HAVE_IPV6
	is_ipv6_by_squares = (pl_sq != NULL && pr_sq != NULL && pr_sq > pl_sq);
#endif
	if (is_ipv6_by_squares) {
		/* IPv6 possibly with port - squares detected */
		port_sep = pr_sq + 1;
		if (*port_sep == '\0') {
			s_addr = pl_sq + 1;
			*pr_sq = 0;
			s_port = default_port;
			goto get_addr;
		}
		if (*port_sep != ':') {
			errno = EINVAL;
			return -1;
		}
		cport = port_sep + 1;
		conv_ret = conv_str_u64(cport, &port);
		if (!conv_ret) {
			errno = EINVAL;
			return -1;
		}
		if (port > 65535) {
			errno = EINVAL;
			return -1;
		}
		s_port = (uint16_t)port;
		*port_sep = 0;
		*pr_sq = 0;
		s_addr = pl_sq + 1;
		*pl_sq = 0;
		goto get_addr;
	} else if (pl_period != NULL && port_sep != NULL) {
		/* IPv4 with port - more than one period in string */
		cport = port_sep + 1;
		conv_ret = conv_str_u64(cport, &port);
		if (!conv_ret) {
			errno = EINVAL;
			return -1;
		}
		if (port > 65535) {
			errno = EINVAL;
			return -1;
		}
		s_port = (uint16_t)port;
		*port_sep = 0;
		s_addr = buf;
		goto get_addr;
	} else {
		/* Everything else, let tsocket_address_inet_from string() */
		/* find parsing errors */
		s_addr = buf;
		s_port = default_port;
		goto get_addr;
	}
get_addr:
	ret = _tsocket_address_inet_from_strings(
	    mem_ctx, fam, s_addr, s_port, _addr, location);

	return ret;
}

char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
				       TALLOC_CTX *mem_ctx)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);
	char addr_str[INET6_ADDRSTRLEN+1];
	const char *str;

	if (!bsda) {
		errno = EINVAL;
		return NULL;
	}

	switch (bsda->u.sa.sa_family) {
	case AF_INET:
		str = inet_ntop(bsda->u.in.sin_family,
				&bsda->u.in.sin_addr,
				addr_str, sizeof(addr_str));
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		str = inet_ntop(bsda->u.in6.sin6_family,
				&bsda->u.in6.sin6_addr,
				addr_str, sizeof(addr_str));
		break;
#endif
	default:
		errno = EINVAL;
		return NULL;
	}

	if (!str) {
		return NULL;
	}

	return talloc_strdup(mem_ctx, str);
}

uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);
	uint16_t port = 0;

	if (!bsda) {
		errno = EINVAL;
		return 0;
	}

	switch (bsda->u.sa.sa_family) {
	case AF_INET:
		port = ntohs(bsda->u.in.sin_port);
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		port = ntohs(bsda->u.in6.sin6_port);
		break;
#endif
	default:
		errno = EINVAL;
		return 0;
	}

	return port;
}

int tsocket_address_inet_set_port(struct tsocket_address *addr,
				  uint16_t port)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);

	if (!bsda) {
		errno = EINVAL;
		return -1;
	}

	switch (bsda->u.sa.sa_family) {
	case AF_INET:
		bsda->u.in.sin_port = htons(port);
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		bsda->u.in6.sin6_port = htons(port);
		break;
#endif
	default:
		errno = EINVAL;
		return -1;
	}

	return 0;
}

bool tsocket_address_is_unix(const struct tsocket_address *addr)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);

	if (!bsda) {
		return false;
	}

	switch (bsda->u.sa.sa_family) {
	case AF_UNIX:
		return true;
	}

	return false;
}

int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
				    const char *path,
				    struct tsocket_address **_addr,
				    const char *location)
{
	struct sockaddr_un un;
	void *p = &un;
	int ret;

	if (!path) {
		path = "";
	}

	if (strlen(path) > sizeof(un.sun_path)-1) {
		errno = ENAMETOOLONG;
		return -1;
	}

	ZERO_STRUCT(un);
	un.sun_family = AF_UNIX;
	strncpy(un.sun_path, path, sizeof(un.sun_path)-1);

	ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
						 (struct sockaddr *)p,
						 sizeof(un),
						 _addr,
						 location);

	return ret;
}

char *tsocket_address_unix_path(const struct tsocket_address *addr,
				TALLOC_CTX *mem_ctx)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);
	const char *str;

	if (!bsda) {
		errno = EINVAL;
		return NULL;
	}

	switch (bsda->u.sa.sa_family) {
	case AF_UNIX:
		str = bsda->u.un.sun_path;
		break;
	default:
		errno = EINVAL;
		return NULL;
	}

	return talloc_strdup(mem_ctx, str);
}

static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
					TALLOC_CTX *mem_ctx)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);
	char *str;
	char *addr_str;
	const char *prefix = NULL;
	uint16_t port;

	switch (bsda->u.sa.sa_family) {
	case AF_UNIX:
		return talloc_asprintf(mem_ctx, "unix:%s",
				       bsda->u.un.sun_path);
	case AF_INET:
		prefix = "ipv4";
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		prefix = "ipv6";
		break;
#endif
	default:
		errno = EINVAL;
		return NULL;
	}

	addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
	if (!addr_str) {
		return NULL;
	}

	port = tsocket_address_inet_port(addr);

	str = talloc_asprintf(mem_ctx, "%s:%s:%u",
			      prefix, addr_str, port);
	talloc_free(addr_str);

	return str;
}

static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
							 TALLOC_CTX *mem_ctx,
							 const char *location)
{
	struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
					   struct samba_sockaddr);
	struct tsocket_address *copy;
	int ret;

	ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
						 &bsda->u.sa,
						 bsda->sa_socklen,
						 &copy,
						 location);
	if (ret != 0) {
		return NULL;
	}

	return copy;
}

static const struct tsocket_address_ops tsocket_address_bsd_ops = {
	.name		= "bsd",
	.string		= tsocket_address_bsd_string,
	.copy		= tsocket_address_bsd_copy,
};

struct tdgram_bsd {
	int fd;

	void *event_ptr;
	struct tevent_fd *fde;
	bool optimize_recvfrom;
	bool netlink;

	void *readable_private;
	void (*readable_handler)(void *private_data);
	void *writeable_private;
	void (*writeable_handler)(void *private_data);
};

bool tdgram_bsd_optimize_recvfrom(struct tdgram_context *dgram,
				  bool on)
{
	struct tdgram_bsd *bsds =
		talloc_get_type(_tdgram_context_data(dgram),
		struct tdgram_bsd);
	bool old;

	if (bsds == NULL) {
		/* not a bsd socket */
		return false;
	}

	old = bsds->optimize_recvfrom;
	bsds->optimize_recvfrom = on;

	return old;
}

static void tdgram_bsd_fde_handler(struct tevent_context *ev,
				   struct tevent_fd *fde,
				   uint16_t flags,
				   void *private_data)
{
	struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
				  struct tdgram_bsd);

	if (flags & TEVENT_FD_WRITE) {
		bsds->writeable_handler(bsds->writeable_private);
		return;
	}
	if (flags & TEVENT_FD_READ) {
		if (!bsds->readable_handler) {
			TEVENT_FD_NOT_READABLE(bsds->fde);
			return;
		}
		bsds->readable_handler(bsds->readable_private);
		return;
	}
}

static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
					   struct tevent_context *ev,
					   void (*handler)(void *private_data),
					   void *private_data)
{
	if (ev == NULL) {
		if (handler) {
			errno = EINVAL;
			return -1;
		}
		if (!bsds->readable_handler) {
			return 0;
		}
		bsds->readable_handler = NULL;
		bsds->readable_private = NULL;

		return 0;
	}

	/* read and write must use the same tevent_context */
	if (bsds->event_ptr != ev) {
		if (bsds->readable_handler || bsds->writeable_handler) {
			errno = EINVAL;
			return -1;
		}
		bsds->event_ptr = NULL;
		TALLOC_FREE(bsds->fde);
	}

	if (tevent_fd_get_flags(bsds->fde) == 0) {
		TALLOC_FREE(bsds->fde);

		bsds->fde = tevent_add_fd(ev, bsds,
					  bsds->fd, TEVENT_FD_READ,
					  tdgram_bsd_fde_handler,
					  bsds);
		if (!bsds->fde) {
			errno = ENOMEM;
			return -1;
		}

		/* cache the event context we're running on */
		bsds->event_ptr = ev;
	} else if (!bsds->readable_handler) {
		TEVENT_FD_READABLE(bsds->fde);
	}

	bsds->readable_handler = handler;
	bsds->readable_private = private_data;

	return 0;
}

static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
					    struct tevent_context *ev,
					    void (*handler)(void *private_data),
					    void *private_data)
{
	if (ev == NULL) {
		if (handler) {
			errno = EINVAL;
			return -1;
		}
		if (!bsds->writeable_handler) {
			return 0;
		}
		bsds->writeable_handler = NULL;
		bsds->writeable_private = NULL;
		TEVENT_FD_NOT_WRITEABLE(bsds->fde);

		return 0;
	}

	/* read and write must use the same tevent_context */
	if (bsds->event_ptr != ev) {
		if (bsds->readable_handler || bsds->writeable_handler) {
			errno = EINVAL;
			return -1;
		}
		bsds->event_ptr = NULL;
		TALLOC_FREE(bsds->fde);
	}

	if (tevent_fd_get_flags(bsds->fde) == 0) {
		TALLOC_FREE(bsds->fde);

		bsds->fde = tevent_add_fd(ev, bsds,
					  bsds->fd, TEVENT_FD_WRITE,
					  tdgram_bsd_fde_handler,
					  bsds);
		if (!bsds->fde) {
			errno = ENOMEM;
			return -1;
		}

		/* cache the event context we're running on */
		bsds->event_ptr = ev;
	} else if (!bsds->writeable_handler) {
		TEVENT_FD_WRITEABLE(bsds->fde);
	}

	bsds->writeable_handler = handler;
	bsds->writeable_private = private_data;

	return 0;
}

struct tdgram_bsd_recvfrom_state {
	struct tdgram_context *dgram;
	bool first_try;
	uint8_t *buf;
	size_t len;
	struct tsocket_address *src;
};

static void tdgram_bsd_recvfrom_cleanup(struct tevent_req *req,
					enum tevent_req_state req_state)
{
	struct tdgram_bsd_recvfrom_state *state =
		tevent_req_data(req,
		struct tdgram_bsd_recvfrom_state);

	if (state->dgram != NULL) {
		struct tdgram_bsd *bsds =
			tdgram_context_data(state->dgram,
			struct tdgram_bsd);

		tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
		state->dgram = NULL;
	}
}

static void tdgram_bsd_recvfrom_handler(void *private_data);

static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
					struct tevent_context *ev,
					struct tdgram_context *dgram)
{
	struct tevent_req *req;
	struct tdgram_bsd_recvfrom_state *state;
	struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
	int ret;

	req = tevent_req_create(mem_ctx, &state,
				struct tdgram_bsd_recvfrom_state);
	if (!req) {
		return NULL;
	}

	state->dgram	= dgram;
	state->first_try= true;
	state->buf	= NULL;
	state->len	= 0;
	state->src	= NULL;

	tevent_req_set_cleanup_fn(req, tdgram_bsd_recvfrom_cleanup);

	if (bsds->fd == -1) {
		tevent_req_error(req, ENOTCONN);
		goto post;
	}


	/*
	 * this is a fast path, not waiting for the
	 * socket to become explicit readable gains
	 * about 10%-20% performance in benchmark tests.
	 */
	if (bsds->optimize_recvfrom) {
		/*
		 * We only do the optimization on
		 * recvfrom if the caller asked for it.
		 *
		 * This is needed because in most cases
		 * we prefer to flush send buffers before
		 * receiving incoming requests.
		 */
		tdgram_bsd_recvfrom_handler(req);
		if (!tevent_req_is_in_progress(req)) {
			goto post;
		}
	}

	ret = tdgram_bsd_set_readable_handler(bsds, ev,
					      tdgram_bsd_recvfrom_handler,
					      req);
	if (ret == -1) {
		tevent_req_error(req, errno);
		goto post;
	}

	return req;

 post:
	tevent_req_post(req, ev);
	return req;
}

static void tdgram_bsd_recvfrom_handler(void *private_data)
{
	struct tevent_req *req = talloc_get_type_abort(private_data,
				 struct tevent_req);
	struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
					struct tdgram_bsd_recvfrom_state);
	struct tdgram_context *dgram = state->dgram;
	struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
	struct samba_sockaddr *bsda = NULL;
	ssize_t ret;
	int err;
	bool retry;

	if (bsds->netlink) {
		ret = tsocket_bsd_netlink_pending(bsds->fd);
	} else {
		ret = tsocket_bsd_pending(bsds->fd);
	}

	if (state->first_try && ret == 0) {
		state->first_try = false;
		/* retry later */
		return;
	}
	state->first_try = false;

	err = tsocket_bsd_error_from_errno(ret, errno, &retry);
	if (retry) {
		/* retry later */
		return;
	}
	if (tevent_req_error(req, err)) {
		return;
	}

	/* note that 'ret' can be 0 here */
	state->buf = talloc_array(state, uint8_t, ret);
	if (tevent_req_nomem(state->buf, req)) {
		return;
	}
	state->len = ret;

	state->src = tsocket_address_create(state,
					    &tsocket_address_bsd_ops,
					    &bsda,
					    struct samba_sockaddr,
					    __location__ "bsd_recvfrom");
	if (tevent_req_nomem(state->src, req)) {
		return;
	}

	ZERO_STRUCTP(bsda);
	bsda->sa_socklen = sizeof(bsda->u.ss);
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
	bsda->u.sa.sa_len = bsda->sa_socklen;
#endif

	ret = recvfrom(bsds->fd, state->buf, state->len, 0,
		       &bsda->u.sa, &bsda->sa_socklen);
	err = tsocket_bsd_error_from_errno(ret, errno, &retry);
	if (retry) {
		/* retry later */
		return;
	}
	if (tevent_req_error(req, err)) {
		return;
	}

	/*
	 * Some systems (FreeBSD, see bug #7115) return too much
	 * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
	 * the return value includes some IP/UDP header bytes,
	 * while recvfrom() just returns the payload.
	 */
	state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
	if (tevent_req_nomem(state->buf, req)) {
		return;
	}
	state->len = ret;

	tevent_req_done(req);
}

static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
					int *perrno,
					TALLOC_CTX *mem_ctx,
					uint8_t **buf,
					struct tsocket_address **src)
{
	struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
					struct tdgram_bsd_recvfrom_state);
	ssize_t ret;

	ret = tsocket_simple_int_recv(req, perrno);
	if (ret == 0) {
		*buf = talloc_move(mem_ctx, &state->buf);
		ret = state->len;
		if (src) {
			*src = talloc_move(mem_ctx, &state->src);
		}
	}

	tevent_req_received(req);
	return ret;
}

struct tdgram_bsd_sendto_state {
	struct tdgram_context *dgram;

	const uint8_t *buf;
	size_t len;
	const struct tsocket_address *dst;

	ssize_t ret;
};

static void tdgram_bsd_sendto_cleanup(struct tevent_req *req,
				      enum tevent_req_state req_state)
{
	struct tdgram_bsd_sendto_state *state =
		tevent_req_data(req,
		struct tdgram_bsd_sendto_state);

	if (state->dgram != NULL) {
		struct tdgram_bsd *bsds =
			tdgram_context_data(state->dgram,
			struct tdgram_bsd);

		tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
		state->dgram = NULL;
	}
}

static void tdgram_bsd_sendto_handler(void *private_data);

static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
						 struct tevent_context *ev,
						 struct tdgram_context *dgram,
						 const uint8_t *buf,
						 size_t len,
						 const struct tsocket_address *dst)
{
	struct tevent_req *req;
	struct tdgram_bsd_sendto_state *state;
	struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
	int ret;

	req = tevent_req_create(mem_ctx, &state,
				struct tdgram_bsd_sendto_state);
	if (!req) {
		return NULL;
	}

	state->dgram	= dgram;
	state->buf	= buf;
	state->len	= len;
	state->dst	= dst;
	state->ret	= -1;

	tevent_req_set_cleanup_fn(req, tdgram_bsd_sendto_cleanup);

	if (bsds->fd == -1) {
		tevent_req_error(req, ENOTCONN);
		goto post;
	}

	/*
	 * this is a fast path, not waiting for the
	 * socket to become explicit writeable gains
	 * about 10%-20% performance in benchmark tests.
	 */
	tdgram_bsd_sendto_handler(req);
	if (!tevent_req_is_in_progress(req)) {
		goto post;
	}

	ret = tdgram_bsd_set_writeable_handler(bsds, ev,
					       tdgram_bsd_sendto_handler,
					       req);
	if (ret == -1) {
		tevent_req_error(req, errno);
		goto post;
	}

	return req;

 post:
	tevent_req_post(req, ev);
	return req;
}

static void tdgram_bsd_sendto_handler(void *private_data)
{
	struct tevent_req *req = talloc_get_type_abort(private_data,
				 struct tevent_req);
	struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
					struct tdgram_bsd_sendto_state);
	struct tdgram_context *dgram = state->dgram;
	struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
	struct sockaddr *sa = NULL;
	socklen_t sa_socklen = 0;
	ssize_t ret;
	int err;
	bool retry;

	if (state->dst) {
		struct samba_sockaddr *bsda =
			talloc_get_type(state->dst->private_data,
			struct samba_sockaddr);

		sa = &bsda->u.sa;
		sa_socklen = bsda->sa_socklen;
	}

	ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
	err = tsocket_bsd_error_from_errno(ret, errno, &retry);
	if (retry) {
		/* retry later */
		return;
	}

	if (err == EMSGSIZE) {
		/* round up in 1K increments */
		int bufsize = ((state->len + 1023) & (~1023));

		ret = setsockopt(bsds->fd, SOL_SOCKET, SO_SNDBUF, &bufsize,
				 sizeof(bufsize));
		if (ret == 0) {
			/*
			 * We do the retry here, rather then via the
			 * handler, as we only want to retry once for
			 * this condition, so if there is a mismatch
			 * between what setsockopt() accepts and what can
			 * actually be sent, we do not end up in a
			 * loop.
			 */

			ret = sendto(bsds->fd, state->buf, state->len,
				     0, sa, sa_socklen);
			err = tsocket_bsd_error_from_errno(ret, errno, &retry);
			if (retry) { /* retry later */
				return;
			}
		}
	}

	if (tevent_req_error(req, err)) {
		return;
	}

	state->ret = ret;

	tevent_req_done(req);
}

static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
{
	struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
					struct tdgram_bsd_sendto_state);
	ssize_t ret;

	ret = tsocket_simple_int_recv(req, perrno);
	if (ret == 0) {
		ret = state->ret;
	}

	tevent_req_received(req);
	return ret;
}

struct tdgram_bsd_disconnect_state {
	uint8_t __dummy;
};

static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
						     struct tevent_context *ev,
						     struct tdgram_context *dgram)
{
	struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
	struct tevent_req *req;
	struct tdgram_bsd_disconnect_state *state;
	int ret;
	int err;
	bool dummy;

	req = tevent_req_create(mem_ctx, &state,
				struct tdgram_bsd_disconnect_state);
	if (req == NULL) {
		return NULL;
	}

	if (bsds->fd == -1) {
		tevent_req_error(req, ENOTCONN);
		goto post;
	}

	TALLOC_FREE(bsds->fde);
	ret = close(bsds->fd);
	bsds->fd = -1;
	err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
	if (tevent_req_error(req, err)) {
		goto post;
	}

	tevent_req_done(req);
post:
	tevent_req_post(req, ev);
	return req;
}

static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
				      int *perrno)
{
	int ret;

	ret = tsocket_simple_int_recv(req, perrno);

	tevent_req_received(req);
	return ret;
}

static const struct tdgram_context_ops tdgram_bsd_ops = {
	.name			= "bsd",

	.recvfrom_send		= tdgram_bsd_recvfrom_send,
	.recvfrom_recv		= tdgram_bsd_recvfrom_recv,

	.sendto_send		= tdgram_bsd_sendto_send,
	.sendto_recv		= tdgram_bsd_sendto_recv,

	.disconnect_send	= tdgram_bsd_disconnect_send,
	.disconnect_recv	= tdgram_bsd_disconnect_recv,
};

static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
{
	TALLOC_FREE(bsds->fde);
	if (bsds->fd != -1) {
		close(bsds->fd);
		bsds->fd = -1;
	}
	return 0;
}

static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
				   const struct tsocket_address *remote,
				   bool broadcast,
				   TALLOC_CTX *mem_ctx,
				   struct tdgram_context **_dgram,
				   const char *location)
{
	struct samba_sockaddr *lbsda =
		talloc_get_type_abort(local->private_data,
		struct samba_sockaddr);
	struct samba_sockaddr *rbsda = NULL;
	struct tdgram_context *dgram;
	struct tdgram_bsd *bsds;
	int fd;
	int ret;
	bool do_bind = false;
	bool do_reuseaddr = false;
	bool do_ipv6only = false;
	bool is_inet = false;
	int sa_fam = lbsda->u.sa.sa_family;

	if (remote) {
		rbsda = talloc_get_type_abort(remote->private_data,
			struct samba_sockaddr);
	}

	switch (lbsda->u.sa.sa_family) {
	case AF_UNIX:
		if (broadcast) {
			errno = EINVAL;
			return -1;
		}
		if (lbsda->u.un.sun_path[0] != 0) {
			do_reuseaddr = true;
			do_bind = true;
		}
		break;
	case AF_INET:
		if (lbsda->u.in.sin_port != 0) {
			do_reuseaddr = true;
			do_bind = true;
		}
		if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
			do_bind = true;
		}
		is_inet = true;
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		if (lbsda->u.in6.sin6_port != 0) {
			do_reuseaddr = true;
			do_bind = true;
		}
		if (memcmp(&in6addr_any,
			   &lbsda->u.in6.sin6_addr,
			   sizeof(in6addr_any)) != 0) {
			do_bind = true;
		}
		is_inet = true;
		do_ipv6only = true;
		break;
#endif
	default:
		errno = EINVAL;
		return -1;
	}

	if (!do_bind && is_inet && rbsda) {
		sa_fam = rbsda->u.sa.sa_family;
		switch (sa_fam) {
		case AF_INET:
			do_ipv6only = false;
			break;
#ifdef HAVE_IPV6
		case AF_INET6:
			do_ipv6only = true;
			break;
#endif
		}
	}

	fd = socket(sa_fam, SOCK_DGRAM, 0);
	if (fd < 0) {
		return -1;
	}

	fd = tsocket_bsd_common_prepare_fd(fd, true);
	if (fd < 0) {
		return -1;
	}

	dgram = tdgram_context_create(mem_ctx,
				      &tdgram_bsd_ops,
				      &bsds,
				      struct tdgram_bsd,
				      location);
	if (!dgram) {
		int saved_errno = errno;
		close(fd);
		errno = saved_errno;
		return -1;
	}
	ZERO_STRUCTP(bsds);
	bsds->fd = fd;
	talloc_set_destructor(bsds, tdgram_bsd_destructor);

#ifdef HAVE_IPV6
	if (do_ipv6only) {
		int val = 1;

		ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
				 (const void *)&val, sizeof(val));
		if (ret == -1) {
			int saved_errno = errno;
			talloc_free(dgram);
			errno = saved_errno;
			return -1;
		}
	}
#endif

	if (broadcast) {
		int val = 1;

		ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
				 (const void *)&val, sizeof(val));
		if (ret == -1) {
			int saved_errno = errno;
			talloc_free(dgram);
			errno = saved_errno;
			return -1;
		}
	}

	if (do_reuseaddr) {
		int val = 1;

		ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
				 (const void *)&val, sizeof(val));
		if (ret == -1) {
			int saved_errno = errno;
			talloc_free(dgram);
			errno = saved_errno;
			return -1;
		}
	}

	if (do_bind) {
		ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen);
		if (ret == -1) {
			int saved_errno = errno;
			talloc_free(dgram);
			errno = saved_errno;
			return -1;
		}
	}

	if (rbsda) {
		if (rbsda->u.sa.sa_family != sa_fam) {
			talloc_free(dgram);
			errno = EINVAL;
			return -1;
		}

		ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen);
		if (ret == -1) {
			int saved_errno = errno;
			talloc_free(dgram);
			errno = saved_errno;
			return -1;
		}
	}

	*_dgram = dgram;
	return 0;
}

int _tdgram_bsd_existing_socket(TALLOC_CTX *mem_ctx,
				int fd,
				struct tdgram_context **_dgram,
				const char *location)
{
	struct tdgram_context *dgram;
	struct tdgram_bsd *bsds;
#ifdef HAVE_LINUX_RTNETLINK_H
	int result;
	struct sockaddr sa;
	socklen_t sa_len = sizeof(struct sockaddr);
#endif

	dgram = tdgram_context_create(mem_ctx,
				      &tdgram_bsd_ops,
				      &bsds,
				      struct tdgram_bsd,
				      location);
	if (!dgram) {
		return -1;
	}
	ZERO_STRUCTP(bsds);
	bsds->fd = fd;
	talloc_set_destructor(bsds, tdgram_bsd_destructor);

	*_dgram = dgram;

#ifdef HAVE_LINUX_RTNETLINK_H
	/*
	 * Try to determine the protocol family and remember if it's
	 * AF_NETLINK. We don't care if this fails.
	 */
	result = getsockname(fd, &sa, &sa_len);
	if (result == 0 && sa.sa_family == AF_NETLINK) {
		bsds->netlink = true;
	}
#endif

	return 0;
}

int _tdgram_inet_udp_socket(const struct tsocket_address *local,
			    const struct tsocket_address *remote,
			    TALLOC_CTX *mem_ctx,
			    struct tdgram_context **dgram,
			    const char *location)
{
	struct samba_sockaddr *lbsda =
		talloc_get_type_abort(local->private_data,
		struct samba_sockaddr);
	int ret;

	switch (lbsda->u.sa.sa_family) {
	case AF_INET:
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		break;
#endif
	default:
		errno = EINVAL;
		return -1;
	}

	ret = tdgram_bsd_dgram_socket(local, remote, false,
				      mem_ctx, dgram, location);

	return ret;
}

int _tdgram_inet_udp_broadcast_socket(const struct tsocket_address *local,
				      TALLOC_CTX *mem_ctx,
				      struct tdgram_context **dgram,
				      const char *location)
{
	struct samba_sockaddr *lbsda =
		talloc_get_type_abort(local->private_data,
		struct samba_sockaddr);
	int ret;

	switch (lbsda->u.sa.sa_family) {
	case AF_INET:
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		/* only ipv4 */
		errno = EINVAL;
		return -1;
#endif
	default:
		errno = EINVAL;
		return -1;
	}

	ret = tdgram_bsd_dgram_socket(local, NULL, true,
				      mem_ctx, dgram, location);

	return ret;
}

int _tdgram_unix_socket(const struct tsocket_address *local,
			const struct tsocket_address *remote,
			TALLOC_CTX *mem_ctx,
			struct tdgram_context **dgram,
			const char *location)
{
	struct samba_sockaddr *lbsda =
		talloc_get_type_abort(local->private_data,
		struct samba_sockaddr);
	int ret;

	switch (lbsda->u.sa.sa_family) {
	case AF_UNIX:
		break;
	default:
		errno = EINVAL;
		return -1;
	}

	ret = tdgram_bsd_dgram_socket(local, remote, false,
				      mem_ctx, dgram, location);

	return ret;
}

struct tstream_bsd {
	int fd;
	int error;

	void *event_ptr;
	struct tevent_fd *fde;
	bool optimize_readv;
	bool fail_readv_first_error;

	void *error_private;
	void (*error_handler)(void *private_data);
	void *readable_private;
	void (*readable_handler)(void *private_data);
	void *writeable_private;
	void (*writeable_handler)(void *private_data);
};

bool tstream_bsd_optimize_readv(struct tstream_context *stream,
				bool on)
{
	struct tstream_bsd *bsds =
		talloc_get_type(_tstream_context_data(stream),
		struct tstream_bsd);
	bool old;

	if (bsds == NULL) {
		/* not a bsd socket */
		return false;
	}

	old = bsds->optimize_readv;
	bsds->optimize_readv = on;

	return old;
}

bool tstream_bsd_fail_readv_first_error(struct tstream_context *stream,
					bool on)
{
	struct tstream_bsd *bsds =
		talloc_get_type(_tstream_context_data(stream),
		struct tstream_bsd);
	bool old;

	if (bsds == NULL) {
		/* not a bsd socket */
		return false;
	}

	old = bsds->fail_readv_first_error;
	bsds->fail_readv_first_error = on;

	return old;
}

static void tstream_bsd_fde_handler(struct tevent_context *ev,
				    struct tevent_fd *fde,
				    uint16_t flags,
				    void *private_data)
{
	struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
				   struct tstream_bsd);

	if (flags & TEVENT_FD_ERROR) {
		/*
		 * We lazily keep TEVENT_FD_READ alive
		 * in tstream_bsd_set_readable_handler()
		 *
		 * So we have to check TEVENT_FD_READ
		 * as well as bsds->readable_handler
		 *
		 * We only drain remaining data from the
		 * the recv queue if available and desired.
		 */
		if ((flags & TEVENT_FD_READ) &&
		    !bsds->fail_readv_first_error &&
		    (bsds->readable_handler != NULL))
		{
			/*
			 * If there's still data to read
			 * we allow it to be read until
			 * we reach EOF (=> EPIPE).
			 */
			bsds->readable_handler(bsds->readable_private);
			return;
		}

		/*
		 * If there's no data left to read,
		 * we get the error.
		 *
		 * It means we no longer call any readv or
		 * writev, as bsds->error is checked first.
		 */
		if (bsds->error == 0) {
			int ret = samba_socket_poll_or_sock_error(bsds->fd);

			if (ret == -1) {
				bsds->error = errno;
			}
			/* fallback to EPIPE */
			if (bsds->error == 0) {
				bsds->error = EPIPE;
			}
		}

		/*
		 * Let write to fail early.
		 *
		 * Note we only need to check TEVENT_FD_WRITE
		 * as tstream_bsd_set_writeable_handler()
		 * clear it together with the handler.
		 */
		if (flags & TEVENT_FD_WRITE) {
			bsds->writeable_handler(bsds->writeable_private);
			return;
		}

		/* We prefer the readable handler to fire first. */
		if (bsds->readable_handler != NULL) {
			bsds->readable_handler(bsds->readable_private);
			return;
		}

		/* As last resort we notify the writeable handler */
		if (bsds->writeable_handler != NULL) {
			bsds->writeable_handler(bsds->writeable_private);
			return;
		}

		/*
		 * Failing reads and writes will also terminate
		 * pending monitor requests at the main tstream layer,
		 * so we only call the error handler if it is the only
		 * pending request
		 */
		if (bsds->error_handler != NULL) {
			bsds->error_handler(bsds->error_private);
			return;
		}

		/*
		 * We may hit this because we don't clear TEVENT_FD_ERROR
		 * in tstream_bsd_set_readable_handler() nor
		 * tstream_bsd_set_writeable_handler().
		 *
		 * As we already captured the error, we can remove
		 * the fde completely.
		 */
		TALLOC_FREE(bsds->fde);
		return;
	}
	if (flags & TEVENT_FD_WRITE) {
		bsds->writeable_handler(bsds->writeable_private);
		return;
	}
	if (flags & TEVENT_FD_READ) {
		if (!bsds->readable_handler) {
			/*
			 * tstream_bsd_set_readable_handler
			 * doesn't clear TEVENT_FD_READ.
			 *
			 * In order to avoid cpu-spinning
			 * we need to clear it here.
			 */
			TEVENT_FD_NOT_READABLE(bsds->fde);

			/*
			 * Here we're lazy and keep TEVENT_FD_ERROR
			 * alive. If it's triggered the next time
			 * we'll handle it gracefully above
			 * and end up with TALLOC_FREE(bsds->fde);
			 * in order to spin on TEVENT_FD_ERROR.
			 */
			return;
		}
		bsds->readable_handler(bsds->readable_private);
		return;
	}
}

static int tstream_bsd_set_error_handler(struct tstream_bsd *bsds,
					    struct tevent_context *ev,
					    void (*handler)(void *private_data),
					    void *private_data)
{
	if (ev == NULL) {
		if (handler) {
			errno = EINVAL;
			return -1;
		}
		if (!bsds->error_handler) {
			return 0;
		}
		bsds->error_handler = NULL;
		bsds->error_private = NULL;

		/*
		 * Here we are lazy as it's very likely that the next
		 * tevent_monitor_send() will come in shortly,
		 * so we keep TEVENT_FD_ERROR alive.
		 */
		return 0;
	}

	/* monitor, read and write must use the same tevent_context */
	if (bsds->event_ptr != ev) {
		if (bsds->error_handler ||
		    bsds->readable_handler ||
		    bsds->writeable_handler)
		{
			errno = EINVAL;
			return -1;
		}
		bsds->event_ptr = NULL;
		TALLOC_FREE(bsds->fde);
	}

	if (tevent_fd_get_flags(bsds->fde) == 0) {
		TALLOC_FREE(bsds->fde);

		bsds->fde = tevent_add_fd(ev, bsds,
					  bsds->fd,
					  TEVENT_FD_ERROR,
					  tstream_bsd_fde_handler,
					  bsds);
		if (!bsds->fde) {
			errno = ENOMEM;
			return -1;
		}

		/* cache the event context we're running on */
		bsds->event_ptr = ev;
	} else if (!bsds->error_handler) {
		/*
		 * TEVENT_FD_ERROR is likely already set, so
		 * TEVENT_FD_WANTERROR() is most likely a no-op.
		 */
		TEVENT_FD_WANTERROR(bsds->fde);
	}

	bsds->error_handler = handler;
	bsds->error_private = private_data;

	return 0;
}

static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
					    struct tevent_context *ev,
					    void (*handler)(void *private_data),
					    void *private_data)
{
	if (ev == NULL) {
		if (handler) {
			errno = EINVAL;
			return -1;
		}
		if (!bsds->readable_handler) {
			return 0;
		}
		bsds->readable_handler = NULL;
		bsds->readable_private = NULL;

		/*
		 * Here we are lazy as it's very likely that the next
		 * tevent_readv_send() will come in shortly,
		 * so we keep TEVENT_FD_READ alive.
		 */
		return 0;
	}

	/* monitor, read and write must use the same tevent_context */
	if (bsds->event_ptr != ev) {
		if (bsds->error_handler ||
		    bsds->readable_handler ||
		    bsds->writeable_handler)
		{
			errno = EINVAL;
			return -1;
		}
		bsds->event_ptr = NULL;
		TALLOC_FREE(bsds->fde);
	}

	if (tevent_fd_get_flags(bsds->fde) == 0) {
		TALLOC_FREE(bsds->fde);

		bsds->fde = tevent_add_fd(ev, bsds,
					  bsds->fd,
					  TEVENT_FD_ERROR | TEVENT_FD_READ,
					  tstream_bsd_fde_handler,
					  bsds);
		if (!bsds->fde) {
			errno = ENOMEM;
			return -1;
		}

		/* cache the event context we're running on */
		bsds->event_ptr = ev;
	} else if (!bsds->readable_handler) {
		TEVENT_FD_READABLE(bsds->fde);
		/*
		 * TEVENT_FD_ERROR is likely already set, so
		 * TEVENT_FD_WANTERROR() is most likely a no-op.
		 */
		TEVENT_FD_WANTERROR(bsds->fde);
	}

	bsds->readable_handler = handler;
	bsds->readable_private = private_data;

	return 0;
}

static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
					     struct tevent_context *ev,
					     void (*handler)(void *private_data),
					     void *private_data)
{
	if (ev == NULL) {
		if (handler) {
			errno = EINVAL;
			return -1;
		}
		if (!bsds->writeable_handler) {
			return 0;
		}
		bsds->writeable_handler = NULL;
		bsds->writeable_private = NULL;

		/*
		 * The writeable handler is only
		 * set if we got EAGAIN or a short
		 * writev on the first try, so
		 * this isn't the hot path.
		 *
		 * Here we are lazy and leave TEVENT_FD_ERROR
		 * alive as it's shared with the readable
		 * handler. So we only clear TEVENT_FD_WRITE.
		 */
		TEVENT_FD_NOT_WRITEABLE(bsds->fde);
		return 0;
	}

	/* monitor, read and write must use the same tevent_context */
	if (bsds->event_ptr != ev) {
		if (bsds->error_handler ||
		    bsds->readable_handler ||
		    bsds->writeable_handler)
		{
			errno = EINVAL;
			return -1;
		}
		bsds->event_ptr = NULL;
		TALLOC_FREE(bsds->fde);
	}

	if (tevent_fd_get_flags(bsds->fde) == 0) {
		TALLOC_FREE(bsds->fde);

		bsds->fde = tevent_add_fd(ev, bsds,
					  bsds->fd,
					  TEVENT_FD_ERROR | TEVENT_FD_WRITE,
					  tstream_bsd_fde_handler,
					  bsds);
		if (!bsds->fde) {
			errno = ENOMEM;
			return -1;
		}

		/* cache the event context we're running on */
		bsds->event_ptr = ev;
	} else if (!bsds->writeable_handler) {
		TEVENT_FD_WRITEABLE(bsds->fde);
		/*
		 * TEVENT_FD_ERROR is likely already set, so
		 * TEVENT_FD_WANTERROR() is most likely a no-op.
		 */
		TEVENT_FD_WANTERROR(bsds->fde);
	}

	bsds->writeable_handler = handler;
	bsds->writeable_private = private_data;

	return 0;
}

static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
{
	struct tstream_bsd *bsds = tstream_context_data(stream,
				   struct tstream_bsd);
	ssize_t ret;

	if (bsds->fd == -1) {
		errno = ENOTCONN;
		return -1;
	}

	if (bsds->error != 0) {
		errno = bsds->error;
		return -1;
	}

	ret = tsocket_bsd_pending(bsds->fd);
	if (ret == -1) {
		/*
		 * remember the error and don't
		 * allow further requests
		 */
		bsds->error = errno;
	}

	return ret;
}

struct tstream_bsd_readv_state {
	struct tstream_context *stream;

	struct iovec *vector;
	size_t count;

	int ret;
};

static void tstream_bsd_readv_cleanup(struct tevent_req *req,
				      enum tevent_req_state req_state)
{
	struct tstream_bsd_readv_state *state =
		tevent_req_data(req,
		struct tstream_bsd_readv_state);

	if (state->stream != NULL) {
		struct tstream_bsd *bsds =
			tstream_context_data(state->stream,
			struct tstream_bsd);

		tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
		state->stream = NULL;
	}
}

static void tstream_bsd_readv_handler(void *private_data);

static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
					struct tevent_context *ev,
					struct tstream_context *stream,
					struct iovec *vector,
					size_t count)
{
	struct tevent_req *req;
	struct tstream_bsd_readv_state *state;
	struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
	int ret;

	req = tevent_req_create(mem_ctx, &state,
				struct tstream_bsd_readv_state);
	if (!req) {
		return NULL;
	}

	state->stream	= stream;
	/* we make a copy of the vector so that we can modify it */
	state->vector	= talloc_array(state, struct iovec, count);
	if (tevent_req_nomem(state->vector, req)) {
		goto post;
	}
	memcpy(state->vector, vector, sizeof(struct iovec)*count);
	state->count	= count;
	state->ret	= 0;

	tevent_req_set_cleanup_fn(req, tstream_bsd_readv_cleanup);

	if (bsds->fd == -1) {
		tevent_req_error(req, ENOTCONN);
		goto post;
	}

	/*
	 * this is a fast path, not waiting for the
	 * socket to become explicit readable gains
	 * about 10%-20% performance in benchmark tests.
	 */
	if (bsds->optimize_readv) {
		/*
		 * We only do the optimization on
		 * readv if the caller asked for it.
		 *
		 * This is needed because in most cases
		 * we prefer to flush send buffers before
		 * receiving incoming requests.
		 */
		tstream_bsd_readv_handler(req);
		if (!tevent_req_is_in_progress(req)) {
			goto post;
		}
	}

	ret = tstream_bsd_set_readable_handler(bsds, ev,
					      tstream_bsd_readv_handler,
					      req);
	if (ret == -1) {
		tevent_req_error(req, errno);
		goto post;
	}

	return req;

 post:
	tevent_req_post(req, ev);
	return req;
}

static void tstream_bsd_readv_handler(void *private_data)
{
	struct tevent_req *req = talloc_get_type_abort(private_data,
				 struct tevent_req);
	struct tstream_bsd_readv_state *state = tevent_req_data(req,
					struct tstream_bsd_readv_state);
	struct tstream_context *stream = state->stream;
	struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
	int ret;
	int err;
	int _count;
	bool ok, retry;

	if (bsds->error != 0) {
		tevent_req_error(req, bsds->error);
		return;
	}

	ret = readv(bsds->fd, state->vector, state->count);
	if (ret == 0) {
		/* propagate end of file */
		bsds->error = EPIPE;
		tevent_req_error(req, EPIPE);
		return;
	}
	err = tsocket_bsd_error_from_errno(ret, errno, &retry);
	if (retry) {
		/* retry later */
		return;
	}
	if (err != 0) {
		/*
		 * remember the error and don't
		 * allow further requests
		 */
		bsds->error = err;
	}
	if (tevent_req_error(req, err)) {
		return;
	}

	state->ret += ret;

	_count = state->count; /* tstream has size_t count, readv has int */
	ok = iov_advance(&state->vector, &_count, ret);
	state->count = _count;

	if (!ok) {
		tevent_req_error(req, EINVAL);
		return;
	}

	if (state->count > 0) {
		/* we have more to read */
		return;
	}

	tevent_req_done(req);
}

static int tstream_bsd_readv_recv(struct tevent_req *req,
				  int *perrno)
{
	struct tstream_bsd_readv_state *state = tevent_req_data(req,
					struct tstream_bsd_readv_state);
	int ret;

	ret = tsocket_simple_int_recv(req, perrno);
	if (ret == 0) {
		ret = state->ret;
	}

	tevent_req_received(req);
	return ret;
}

struct tstream_bsd_writev_state {
	struct tstream_context *stream;

	struct iovec *vector;
	size_t count;

	int ret;
};

static void tstream_bsd_writev_cleanup(struct tevent_req *req,
				       enum tevent_req_state req_state)
{
	struct tstream_bsd_writev_state *state =
		tevent_req_data(req,
		struct tstream_bsd_writev_state);

	if (state->stream != NULL) {
		struct tstream_bsd *bsds =
			tstream_context_data(state->stream,
			struct tstream_bsd);

		tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
		state->stream = NULL;
	}
}

static void tstream_bsd_writev_handler(void *private_data);

static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
						 struct tevent_context *ev,
						 struct tstream_context *stream,
						 const struct iovec *vector,
						 size_t count)
{
	struct tevent_req *req;
	struct tstream_bsd_writev_state *state;
	struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
	int ret;

	req = tevent_req_create(mem_ctx, &state,
				struct tstream_bsd_writev_state);
	if (!req) {
		return NULL;
	}

	state->stream	= stream;
	/* we make a copy of the vector so that we can modify it */
	state->vector	= talloc_array(state, struct iovec, count);
	if (tevent_req_nomem(state->vector, req)) {
		goto post;
	}
	memcpy(state->vector, vector, sizeof(struct iovec)*count);
	state->count	= count;
	state->ret	= 0;

	tevent_req_set_cleanup_fn(req, tstream_bsd_writev_cleanup);

	if (bsds->fd == -1) {
		tevent_req_error(req, ENOTCONN);
		goto post;
	}

	/*
	 * this is a fast path, not waiting for the
	 * socket to become explicit writeable gains
	 * about 10%-20% performance in benchmark tests.
	 */
	tstream_bsd_writev_handler(req);
	if (!tevent_req_is_in_progress(req)) {
		goto post;
	}

	ret = tstream_bsd_set_writeable_handler(bsds, ev,
					       tstream_bsd_writev_handler,
					       req);
	if (ret == -1) {
		tevent_req_error(req, errno);
		goto post;
	}

	return req;

 post:
	tevent_req_post(req, ev);
	return req;
}

static void tstream_bsd_writev_handler(void *private_data)
{
	struct tevent_req *req = talloc_get_type_abort(private_data,
				 struct tevent_req);
	struct tstream_bsd_writev_state *state = tevent_req_data(req,
					struct tstream_bsd_writev_state);
	struct tstream_context *stream = state->stream;
	struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
	ssize_t ret;
	int err;
	int _count;
	bool ok, retry;

	if (bsds->error != 0) {
		tevent_req_error(req, bsds->error);
		return;
	}

	ret = writev(bsds->fd, state->vector, state->count);
	if (ret == 0) {
		/* propagate end of file */
		bsds->error = EPIPE;
		tevent_req_error(req, EPIPE);
		return;
	}
	err = tsocket_bsd_error_from_errno(ret, errno, &retry);
	if (retry) {
		/*
		 * retry later...
		 */
		return;
	}
	if (err != 0) {
		/*
		 * remember the error and don't
		 * allow further requests
		 */
		bsds->error = err;
	}
	if (tevent_req_error(req, err)) {
		return;
	}

	state->ret += ret;

	_count = state->count; /* tstream has size_t count, writev has int */
	ok = iov_advance(&state->vector, &_count, ret);
	state->count = _count;

	if (!ok) {
		tevent_req_error(req, EINVAL);
		return;
	}

	if (state->count > 0) {
		/*
		 * we have more to write
		 */
		return;
	}

	tevent_req_done(req);
}

static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
{
	struct tstream_bsd_writev_state *state = tevent_req_data(req,
					struct tstream_bsd_writev_state);
	int ret;

	ret = tsocket_simple_int_recv(req, perrno);
	if (ret == 0) {
		ret = state->ret;
	}

	tevent_req_received(req);
	return ret;
}

struct tstream_bsd_disconnect_state {
	void *__dummy;
};

static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
						     struct tevent_context *ev,
						     struct tstream_context *stream)
{
	struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
	struct tevent_req *req;
	struct tstream_bsd_disconnect_state *state;
	int ret;
	int err;
	bool dummy;

	req = tevent_req_create(mem_ctx, &state,
				struct tstream_bsd_disconnect_state);
	if (req == NULL) {
		return NULL;
	}

	if (bsds->fd == -1) {
		tevent_req_error(req, ENOTCONN);
		goto post;
	}

	TALLOC_FREE(bsds->fde);
	ret = close(bsds->fd);
	bsds->fd = -1;
	err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
	if (tevent_req_error(req, err)) {
		goto post;
	}

	tevent_req_done(req);
post:
	tevent_req_post(req, ev);
	return req;
}

static int tstream_bsd_disconnect_recv(struct tevent_req *req,
				      int *perrno)
{
	int ret;

	ret = tsocket_simple_int_recv(req, perrno);

	tevent_req_received(req);
	return ret;
}

struct tstream_bsd_monitor_state {
	struct tstream_context *stream;
};

static void tstream_bsd_monitor_cleanup(struct tevent_req *req,
					enum tevent_req_state req_state)
{
	struct tstream_bsd_monitor_state *state =
		tevent_req_data(req,
		struct tstream_bsd_monitor_state);

	if (state->stream != NULL) {
		struct tstream_bsd *bsds =
			tstream_context_data(state->stream,
			struct tstream_bsd);

		tstream_bsd_set_error_handler(bsds, NULL, NULL, NULL);
		state->stream = NULL;
	}
}

static void tstream_bsd_monitor_handler(void *private_data);

static struct tevent_req *tstream_bsd_monitor_send(TALLOC_CTX *mem_ctx,
					struct tevent_context *ev,
					struct tstream_context *stream)
{
	struct tevent_req *req = NULL;
	struct tstream_bsd_monitor_state *state = NULL;
	struct tstream_bsd *bsds =
		tstream_context_data(stream, struct tstream_bsd);
	int ret;

	req = tevent_req_create(mem_ctx, &state,
				struct tstream_bsd_monitor_state);
	if (req == NULL) {
		return NULL;
	}
	state->stream = stream;

	tevent_req_set_cleanup_fn(req, tstream_bsd_monitor_cleanup);

	if (bsds->fd == -1) {
		tevent_req_error(req, ENOTCONN);
		return tevent_req_post(req, ev);
	}

	ret = tstream_bsd_set_error_handler(bsds, ev,
					    tstream_bsd_monitor_handler,
					    req);
	if (ret == -1) {
		tevent_req_error(req, errno);
		return tevent_req_post(req, ev);
	}

	return req;
}

static void tstream_bsd_monitor_handler(void *private_data)
{
	struct tevent_req *req = talloc_get_type_abort(private_data,
				 struct tevent_req);
	struct tstream_bsd_monitor_state *state = tevent_req_data(req,
					struct tstream_bsd_monitor_state);
	struct tstream_context *stream = state->stream;
	struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);

	/*
	 * tstream_bsd_fde_handler already
	 * made sure that bsds->error is not 0.
	 */
	tevent_req_error(req, bsds->error);
}

static int tstream_bsd_monitor_recv(struct tevent_req *req,
				    int *perrno)
{
	int ret;

	ret = tsocket_simple_int_recv(req, perrno);

	tevent_req_received(req);
	return ret;
}

static const struct tstream_context_ops tstream_bsd_ops = {
	.name			= "bsd",

	.pending_bytes		= tstream_bsd_pending_bytes,

	.readv_send		= tstream_bsd_readv_send,
	.readv_recv		= tstream_bsd_readv_recv,

	.writev_send		= tstream_bsd_writev_send,
	.writev_recv		= tstream_bsd_writev_recv,

	.disconnect_send	= tstream_bsd_disconnect_send,
	.disconnect_recv	= tstream_bsd_disconnect_recv,

	.monitor_send		= tstream_bsd_monitor_send,
	.monitor_recv		= tstream_bsd_monitor_recv,
};

static int tstream_bsd_destructor(struct tstream_bsd *bsds)
{
	TALLOC_FREE(bsds->fde);
	if (bsds->fd != -1) {
		close(bsds->fd);
		bsds->fd = -1;
	}
	return 0;
}

int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
				 int fd,
				 struct tstream_context **_stream,
				 const char *location)
{
	struct tstream_context *stream;
	struct tstream_bsd *bsds;

	stream = tstream_context_create(mem_ctx,
					&tstream_bsd_ops,
					&bsds,
					struct tstream_bsd,
					location);
	if (!stream) {
		return -1;
	}
	ZERO_STRUCTP(bsds);
	bsds->fd = fd;
	talloc_set_destructor(bsds, tstream_bsd_destructor);

	*_stream = stream;
	return 0;
}

struct tstream_bsd_connect_state {
	int fd;
	struct tevent_fd *fde;
	struct tsocket_address *local;
};

static void tstream_bsd_connect_cleanup(struct tevent_req *req,
					enum tevent_req_state req_state)
{
	struct tstream_bsd_connect_state *state =
		tevent_req_data(req,
		struct tstream_bsd_connect_state);

	if (req_state == TEVENT_REQ_DONE) {
		return;
	}

	TALLOC_FREE(state->fde);
	if (state->fd != -1) {
		close(state->fd);
		state->fd = -1;
	}
}

static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
					    struct tevent_fd *fde,
					    uint16_t flags,
					    void *private_data);

static struct tevent_req *tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
					struct tevent_context *ev,
					int sys_errno,
					const struct tsocket_address *local,
					const struct tsocket_address *remote)
{
	struct tevent_req *req;
	struct tstream_bsd_connect_state *state;
	struct samba_sockaddr *lbsda =
		talloc_get_type_abort(local->private_data,
		struct samba_sockaddr);
	struct samba_sockaddr *lrbsda = NULL;
	struct samba_sockaddr *rbsda =
		talloc_get_type_abort(remote->private_data,
		struct samba_sockaddr);
	int ret;
	bool do_bind = false;
	bool do_reuseaddr = false;
	bool do_ipv6only = false;
	bool is_inet = false;
	int sa_fam = lbsda->u.sa.sa_family;

	req = tevent_req_create(mem_ctx, &state,
				struct tstream_bsd_connect_state);
	if (!req) {
		return NULL;
	}
	state->fd = -1;
	state->fde = NULL;

	tevent_req_set_cleanup_fn(req, tstream_bsd_connect_cleanup);

	/* give the wrappers a chance to report an error */
	if (sys_errno != 0) {
		tevent_req_error(req, sys_errno);
		goto post;
	}

	switch (lbsda->u.sa.sa_family) {
	case AF_UNIX:
		if (lbsda->u.un.sun_path[0] != 0) {
			do_reuseaddr = true;
			do_bind = true;
		}
		break;
	case AF_INET:
		if (lbsda->u.in.sin_port != 0) {
			do_reuseaddr = true;
			do_bind = true;
		}
		if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
			do_bind = true;
		}
		is_inet = true;
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		if (lbsda->u.in6.sin6_port != 0) {
			do_reuseaddr = true;
			do_bind = true;
		}
		if (memcmp(&in6addr_any,
			   &lbsda->u.in6.sin6_addr,
			   sizeof(in6addr_any)) != 0) {
			do_bind = true;
		}
		is_inet = true;
		do_ipv6only = true;
		break;
#endif
	default:
		tevent_req_error(req, EINVAL);
		goto post;
	}

	if (!do_bind && is_inet) {
		sa_fam = rbsda->u.sa.sa_family;
		switch (sa_fam) {
		case AF_INET:
			do_ipv6only = false;
			break;
#ifdef HAVE_IPV6
		case AF_INET6:
			do_ipv6only = true;
			break;
#endif
		}
	}

	if (is_inet) {
		state->local = tsocket_address_create(state,
						      &tsocket_address_bsd_ops,
						      &lrbsda,
						      struct samba_sockaddr,
						      __location__ "bsd_connect");
		if (tevent_req_nomem(state->local, req)) {
			goto post;
		}

		ZERO_STRUCTP(lrbsda);
		lrbsda->sa_socklen = sizeof(lrbsda->u.ss);
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
		lrbsda->u.sa.sa_len = lrbsda->sa_socklen;
#endif
	}

	state->fd = socket(sa_fam, SOCK_STREAM, 0);
	if (state->fd == -1) {
		tevent_req_error(req, errno);
		goto post;
	}

	state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
	if (state->fd == -1) {
		tevent_req_error(req, errno);
		goto post;
	}

#ifdef HAVE_IPV6
	if (do_ipv6only) {
		int val = 1;

		ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
				 (const void *)&val, sizeof(val));
		if (ret == -1) {
			tevent_req_error(req, errno);
			goto post;
		}
	}
#endif

	if (do_reuseaddr) {
		int val = 1;

		ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
				 (const void *)&val, sizeof(val));
		if (ret == -1) {
			tevent_req_error(req, errno);
			goto post;
		}
	}

	if (do_bind) {
		ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen);
		if (ret == -1) {
			tevent_req_error(req, errno);
			goto post;
		}
	}

	if (rbsda->u.sa.sa_family != sa_fam) {
		tevent_req_error(req, EINVAL);
		goto post;
	}

	ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
	if (ret == -1) {
		if (errno == EINPROGRESS) {
			goto async;
		}
		tevent_req_error(req, errno);
		goto post;
	}

	if (!state->local) {
		tevent_req_done(req);
		goto post;
	}

	if (lrbsda != NULL) {
		ret = getsockname(state->fd,
				  &lrbsda->u.sa,
				  &lrbsda->sa_socklen);
		if (ret == -1) {
			tevent_req_error(req, errno);
			goto post;
		}
	}

	tevent_req_done(req);
	goto post;

 async:

	/*
	 * Note for historic reasons TEVENT_FD_WRITE is not enough
	 * to get notified for POLLERR or EPOLLHUP even if they
	 * come together with POLLOUT. That means we need to
	 * use TEVENT_FD_READ in addition until we have
	 * TEVENT_FD_ERROR.
	 */
	state->fde = tevent_add_fd(ev, state,
				   state->fd,
				   TEVENT_FD_ERROR | TEVENT_FD_WRITE,
				   tstream_bsd_connect_fde_handler,
				   req);
	if (tevent_req_nomem(state->fde, req)) {
		goto post;
	}

	return req;

 post:
	tevent_req_post(req, ev);
	return req;
}

static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
					    struct tevent_fd *fde,
					    uint16_t flags,
					    void *private_data)
{
	struct tevent_req *req = talloc_get_type_abort(private_data,
				 struct tevent_req);
	struct tstream_bsd_connect_state *state = tevent_req_data(req,
					struct tstream_bsd_connect_state);
	struct samba_sockaddr *lrbsda = NULL;
	int ret;
	int err;
	bool retry;

	ret = samba_socket_sock_error(state->fd);
	err = tsocket_bsd_error_from_errno(ret, errno, &retry);
	if (retry) {
		/* retry later */
		return;
	}
	if (tevent_req_error(req, err)) {
		return;
	}

	if (!state->local) {
		tevent_req_done(req);
		return;
	}

	lrbsda = talloc_get_type_abort(state->local->private_data,
				       struct samba_sockaddr);

	ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen);
	if (ret == -1) {
		tevent_req_error(req, errno);
		return;
	}

	tevent_req_done(req);
}

static int tstream_bsd_connect_recv(struct tevent_req *req,
				    int *perrno,
				    TALLOC_CTX *mem_ctx,
				    struct tstream_context **stream,
				    struct tsocket_address **local,
				    const char *location)
{
	struct tstream_bsd_connect_state *state = tevent_req_data(req,
					struct tstream_bsd_connect_state);
	int ret;

	ret = tsocket_simple_int_recv(req, perrno);
	if (ret == 0) {
		ret = _tstream_bsd_existing_socket(mem_ctx,
						   state->fd,
						   stream,
						   location);
		if (ret == -1) {
			*perrno = errno;
			goto done;
		}
		TALLOC_FREE(state->fde);
		state->fd = -1;

		if (local) {
			*local = talloc_move(mem_ctx, &state->local);
		}
	}

done:
	tevent_req_received(req);
	return ret;
}

struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
					struct tevent_context *ev,
					const struct tsocket_address *local,
					const struct tsocket_address *remote)
{
	struct samba_sockaddr *lbsda =
		talloc_get_type_abort(local->private_data,
		struct samba_sockaddr);
	struct tevent_req *req;
	int sys_errno = 0;

	switch (lbsda->u.sa.sa_family) {
	case AF_INET:
		break;
#ifdef HAVE_IPV6
	case AF_INET6:
		break;
#endif
	default:
		sys_errno = EINVAL;
		break;
	}

	req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);

	return req;
}

int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
				   int *perrno,
				   TALLOC_CTX *mem_ctx,
				   struct tstream_context **stream,
				   struct tsocket_address **local,
				   const char *location)
{
	return tstream_bsd_connect_recv(req, perrno,
					mem_ctx, stream, local,
					location);
}

struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
					struct tevent_context *ev,
					const struct tsocket_address *local,
					const struct tsocket_address *remote)
{
	struct samba_sockaddr *lbsda =
		talloc_get_type_abort(local->private_data,
		struct samba_sockaddr);
	struct tevent_req *req;
	int sys_errno = 0;

	switch (lbsda->u.sa.sa_family) {
	case AF_UNIX:
		break;
	default:
		sys_errno = EINVAL;
		break;
	}

	req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);

	return req;
}

int _tstream_unix_connect_recv(struct tevent_req *req,
				      int *perrno,
				      TALLOC_CTX *mem_ctx,
				      struct tstream_context **stream,
				      const char *location)
{
	return tstream_bsd_connect_recv(req, perrno,
					mem_ctx, stream, NULL,
					location);
}

int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
			     struct tstream_context **_stream1,
			     TALLOC_CTX *mem_ctx2,
			     struct tstream_context **_stream2,
			     const char *location)
{
	int ret;
	int fds[2];
	int fd1;
	int fd2;
	struct tstream_context *stream1 = NULL;
	struct tstream_context *stream2 = NULL;

	ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
	if (ret == -1) {
		return -1;
	}
	fd1 = fds[0];
	fd2 = fds[1];

	fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
	if (fd1 == -1) {
		int sys_errno = errno;
		close(fd2);
		errno = sys_errno;
		return -1;
	}

	fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
	if (fd2 == -1) {
		int sys_errno = errno;
		close(fd1);
		errno = sys_errno;
		return -1;
	}

	ret = _tstream_bsd_existing_socket(mem_ctx1,
					   fd1,
					   &stream1,
					   location);
	if (ret == -1) {
		int sys_errno = errno;
		close(fd1);
		close(fd2);
		errno = sys_errno;
		return -1;
	}

	ret = _tstream_bsd_existing_socket(mem_ctx2,
					   fd2,
					   &stream2,
					   location);
	if (ret == -1) {
		int sys_errno = errno;
		talloc_free(stream1);
		close(fd2);
		errno = sys_errno;
		return -1;
	}

	*_stream1 = stream1;
	*_stream2 = stream2;
	return 0;
}

