/* Copyright (C) 2005,2006      David Decotigny

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
   USA.
*/
#ifndef _SOS_BLKDEV_H_
#define _SOS_BLKDEV_H_

/**
 * @file blkdev.h
 *
 * Interface between the VFS and the "block" devices (real devices and
 * their partitions). The following functions provide the mechanisms
 * to bind the "block device" nodes (@see mknod) to their device
 * driver.
 *
 * The "blkdev" layer is to be perceived as a FS-agnostic layer both
 * below and on top of the FS that binds the special "block device"
 * nodes to a set of system-wide block_read/block_write functions and
 * which supports the cache of blocks for the device.
 *
 * The differences between the char device and the block device layers are:
 * - A character device is byte-stream-oriented: one can easily fetch
 *   the data character by character. Theoretically, this stream has
 *   no limitation in size: it has a beginning but not necessarly an
 *   end. One can not necessarly rewind the stream, that is seek
 *   anywhere in it (as for a stream of audio data coming from a
 *   soundcard for example). But for some character devices, it is
 *   possible, though (it is possible to seek anywhere in /dev/zero
 *   for example). For some other devices, it is possible to seek only
 *   in some authorized regions (as for /dev/kmem for example: only
 *   the mapped kernel regions are available).
 * - A block device is block-oriented: one can easily fetch fixed-size
 *   blocks of data anywhere inside the device, possibly in a random
 *   order. The capacity of the device is limited: there is a defined
 *   number of contiguous blocks reachable in the device, one cannot
 *   seek beyond its limits.
 * - A block device is well suited to a cache layer: it is possible to
 *   cache the most used blocks inside memory to speed up the accesses
 *   to data. It doesn't make sense to do so on stream-oriented devices
 *   like character devices.
 * - A character device is limited to the interaction between
 *   user-mode programs and the devices themselves. The kernel only acts
 *   as an intermediary and does not take care of the operations
 *   carried out.
 * - A block device is used as the underlying data support for a
 *   filesystem. As a result, it must be managed by the kernel itself
 *   since the FS code resides inside the kernel. It must offer an API
 *   compatible with kernel-space interaction.
 *
 * A partition has the same properties as a disk: both are block
 * devices. The main difference is that a disk really "owns" its
 * operations (ie the read/write/ioctl operations are defined by him)
 * and its block cache, whereas a partition shares them with its
 * parent disk/partition. The partitions may be nested: a disk can
 * have partitions, which can have sub-partitions, which can have
 * sub-sub-partittions, etc. As a consequence, a parent disk/partition
 * cannot be unregistered as long as it owns any child partitions. The
 * only constraint being that the partitions must fit completely
 * inside their parent partition/disk. However, no check is made
 * regarding partitions overlapping one another...
 *
 * This implementation is based on two subsystems:
 * - the block cache to accelerate accesses to the hardware by caching
 *   the most frequently used blocks in main memory
 * - the page cache to guarantee consistency between read/write
 *   accesses and MMU access of mapped pages (mmap API). This is a
 *   simple dictionary of the device's mapped pages: offset -> mapped
 *   page. The read/write vs MMU consistency is achieved in a simple
 *   manner: before each read/write accesses, we first try to use the
 *   page cache, and eventually the block cache if the data is not
 *   mapped by anybody.
 */

/* Forward declaration */
struct sos_blockdev_instance;

#include <sos/fs.h>

/**
 * The fundamental callbacks for a real block device (ie not a
 * partition).
 */
struct sos_blockdev_operations {

  /** @note MANDATORY */
  sos_ret_t (*read_block)(void * blockdev_instance_custom_data,
			  sos_vaddr_t dest_buf /* Kernel address */,
			  sos_luoffset_t block_offset);


  /** @note Optional (may be NULL) */
  sos_ret_t (*write_block)(void * blockdev_instance_custom_data,
			   sos_vaddr_t src_buf /* Kernel address */,
			   sos_luoffset_t block_offset);


  /**
   * @note Optional (may be NULL)
   * @note Also called when an ioctl is made to a partition
   */
  sos_ret_t (*ioctl)(void * blockdev_instance_custom_data,
		     int req_id,
		     sos_ui32_t req_arg /* Usually: sos_uaddr_t */);
};


/*
 * Functions restricted to block device driver code
 */


sos_ret_t sos_blockdev_subsystem_setup(void);


/**
 * Contrary to Character devices, block devices are registered
 * individually: any single registration corresponds to a driver for a
 * single device instance
 */
sos_ret_t
sos_blockdev_register_disk (sos_ui32_t     device_class,
			    sos_ui32_t     device_instance,
			    sos_size_t     block_size,
			    sos_lcount_t   number_of_blocks,
			    sos_count_t    cache_size_in_blocks,
			    struct sos_blockdev_operations * blockdev_ops,
			    void * blockdev_instance_custom_data);


/**
 * @param index_of_first_block is the index of the first block
 * relative to parent_bd, NOT relative to the top-most disk block
 * device
 */
sos_ret_t
sos_blockdev_register_partition(sos_ui32_t device_class,
				sos_ui32_t device_instance,
				struct sos_blockdev_instance * parent_bd,
				sos_luoffset_t index_of_first_block,
				sos_lcount_t number_of_blocks,
				void * blockdev_instance_custom_data);


sos_ret_t sos_blockdev_unregister_device (sos_ui32_t device_class,
					  sos_ui32_t device_instance);


/**
 * Flush all caches of all devices to disk
 */
sos_ret_t sos_blockdev_sync_all_devices(void);


/**
 * Increments the instance's reference counter: this will make any
 * blockdev_unregister return BUSY. As a consequence, the following
 * operations on blockdev instances should be as fast as possible and
 * call release_instance as early as possible
 */
struct sos_blockdev_instance *
sos_blockdev_ref_instance(sos_ui32_t device_class,
			  sos_ui32_t device_instance);


sos_ret_t
sos_blockdev_release_instance(struct sos_blockdev_instance * blockdev);


/** Read data from disk and use it directly in the kernel. Mostly used
    by the partition drivers to identify the partitions of a disk and
    register them */
sos_ret_t sos_blockdev_kernel_read(struct sos_blockdev_instance * blockdev,
				   sos_luoffset_t offset,
				   sos_vaddr_t dest_buf,
				   sos_size_t * /* in/out */len);


/**
 * Write data to disk directly from the kernel. Mostly used by the
 * partition drivers to modify the partitions of a disk and register
 * them.
 *
 * @note The operation is normally NOT needed (fdisk is a userspace
 * program) and is NOT synchronous
 */
sos_ret_t sos_blockdev_kernel_write(struct sos_blockdev_instance * blockdev,
				    sos_luoffset_t offset,
				    sos_vaddr_t src_buf,
				    sos_size_t * /* in/out */len);


/**
 * Flush the modified blocks back to hardware (both block and page
 * cache are flusehd)
 */
sos_ret_t sos_blockdev_sync(struct sos_blockdev_instance * blockdev);


/*
 * Callbacks and functions restricted to fs.c internals
 */

/**
 * Update the FS node ops_blockdev callbacks after an FS
 * allocate_new_node or fetch_node_from_disk, in order to point to
 * the block layer API functions
 */
sos_ret_t sos_blockdev_helper_ref_new_fsnode(struct sos_fs_node * this);
sos_ret_t sos_blockdev_helper_release_fsnode(struct sos_fs_node * this);
sos_ret_t sos_blockdev_helper_sync_fsnode(struct sos_fs_node * this);

#endif